org.fcrepo.modeshape.binary.S3BinaryStore.java Source code

Java tutorial

Introduction

Here is the source code for org.fcrepo.modeshape.binary.S3BinaryStore.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.fcrepo.modeshape.binary;

import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.Date;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.TimeUnit;

import javax.jcr.RepositoryException;

import com.amazonaws.AmazonClientException;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.iterable.S3Objects;
import com.amazonaws.services.s3.model.CopyObjectRequest;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.s3.model.S3ObjectSummary;
import org.modeshape.common.logging.Logger;
import org.modeshape.jcr.JcrI18n;
import org.modeshape.jcr.value.BinaryKey;
import org.modeshape.jcr.value.BinaryValue;
import org.modeshape.jcr.value.binary.AbstractBinaryStore;
import org.modeshape.jcr.value.binary.BinaryStoreException;
import org.modeshape.jcr.value.binary.FileSystemBinaryStore;
import org.modeshape.jcr.value.binary.StoredBinaryValue;
import org.modeshape.jcr.value.binary.TransientBinaryStore;

/**
 * Custom binary storage option for ModeShape which manages the storage
 * of files to Amazon S3
 *
 * @author bbranan
 */
public class S3BinaryStore extends AbstractBinaryStore {
    /*
     * AWS client which provides access to Amazon S3
     */
    private AmazonS3Client s3Client = null;

    /*
     * Temporary local file cache to allow for checksum computation
     */
    private FileSystemBinaryStore fileSystemCache;

    /*
     * S3 bucket used to store and retrieve content
     */
    private String bucketName;

    /*
     * System property key from which the bucket name will be retrieved
     */
    private static final String BUCKET_PROPERTY_KEY = "aws.bucket";

    /*
     * Key for storing and retrieving extracted text from S3 object user metadata
     */
    protected static final String EXTRACTED_TEXT_KEY = "extracted-text";

    /*
     * Key for storing boolean which describes if object is unused
     */
    protected static final String UNUSED_KEY = "unused";

    /**
     * Creates a binary store with a connection to Amazon S3
     *
     * In order to connect to AWS, an access key ID and secret key must be made available
     * as environment variables, java system properties, or via a credential file.
     * See: http://docs.aws.amazon.com/AWSSdkDocsJava/latest/DeveloperGuide/credentials.html
     *
     * Bucket name must be set via the java system property: aws.bucket
     */
    public S3BinaryStore() {
        this.bucketName = System.getProperty(BUCKET_PROPERTY_KEY);
        this.s3Client = new AmazonS3Client();
        this.fileSystemCache = TransientBinaryStore.get();
        this.fileSystemCache.setMinimumBinarySizeInBytes(1L);

        if (null == bucketName) {
            throw new RuntimeException(
                    "Bucket name for binary store must be set " + "using system property: " + BUCKET_PROPERTY_KEY);
        } else {
            // Ensure bucket exists
            if (!s3Client.doesBucketExist(bucketName)) {
                s3Client.createBucket(bucketName);
            }
        }
    }

    /**
     * Creates a binary store with a connection to Amazon S3. This constructor is
     * intended for testing only.
     *
     * @param bucketName The name of the S3 bucket where files will be stored
     * @param s3Client Client for communicating with Amazon S3
     */
    protected S3BinaryStore(String bucketName, AmazonS3Client s3Client) {
        this.bucketName = bucketName;
        this.s3Client = s3Client;
        this.fileSystemCache = TransientBinaryStore.get();
        this.fileSystemCache.setMinimumBinarySizeInBytes(1L);
    }

    @Override
    protected String getStoredMimeType(BinaryValue binaryValue) throws BinaryStoreException {
        try {
            String key = binaryValue.getKey().toString();
            ObjectMetadata metadata = s3Client.getObjectMetadata(bucketName, key);
            return metadata.getContentType();
        } catch (AmazonClientException e) {
            throw new BinaryStoreException(e);
        }
    }

    @Override
    protected void storeMimeType(BinaryValue binaryValue, String mimeType) throws BinaryStoreException {
        try {
            String key = binaryValue.getKey().toString();
            ObjectMetadata metadata = s3Client.getObjectMetadata(bucketName, key);
            metadata.setContentType(mimeType);

            // Update the object in place
            CopyObjectRequest copyRequest = new CopyObjectRequest(bucketName, key, bucketName, key);
            copyRequest.setNewObjectMetadata(metadata);
            s3Client.copyObject(copyRequest);
        } catch (AmazonClientException e) {
            throw new BinaryStoreException(e);
        }
    }

    @Override
    public void storeExtractedText(BinaryValue binaryValue, String extractedText) throws BinaryStoreException {
        // User defined metadata for S3 objects cannot exceed 2KB
        // This checks for the absolute top of that range
        if (extractedText.length() > 2000) {
            throw new BinaryStoreException("S3 objects cannot store associated data " + "that is larger than 2KB");
        }

        setS3ObjectUserProperty(binaryValue.getKey(), EXTRACTED_TEXT_KEY, extractedText);
    }

    private void setS3ObjectUserProperty(BinaryKey binaryKey, String metadataKey, String metadataValue)
            throws BinaryStoreException {
        try {
            String key = binaryKey.toString();
            ObjectMetadata metadata = s3Client.getObjectMetadata(bucketName, key);
            Map<String, String> userMetadata = metadata.getUserMetadata();

            if (null != metadataValue && metadataValue.equals(userMetadata.get(metadataKey))) {
                return; // The key/value pair already exists in user metadata, skip update
            }

            userMetadata.put(metadataKey, metadataValue);
            metadata.setUserMetadata(userMetadata);

            // Update the object in place
            CopyObjectRequest copyRequest = new CopyObjectRequest(bucketName, key, bucketName, key);
            copyRequest.setNewObjectMetadata(metadata);
            s3Client.copyObject(copyRequest);
        } catch (AmazonClientException e) {
            throw new BinaryStoreException(e);
        }
    }

    @Override
    public String getExtractedText(BinaryValue binaryValue) throws BinaryStoreException {
        try {
            String key = binaryValue.getKey().toString();
            ObjectMetadata metadata = s3Client.getObjectMetadata(bucketName, key);
            return metadata.getUserMetadata().get(EXTRACTED_TEXT_KEY);
        } catch (AmazonClientException e) {
            throw new BinaryStoreException(e);
        }
    }

    @Override
    public BinaryValue storeValue(InputStream stream, boolean markAsUnused) throws BinaryStoreException {
        // Cache file on the file system in order to have SHA-1 hash calculated
        BinaryValue cachedFile = fileSystemCache.storeValue(stream, markAsUnused);
        try {
            // Retrieve SHA-1 hash
            BinaryKey key = new BinaryKey(cachedFile.getKey().toString());

            // If file is NOT already in S3 storage, store it
            if (!s3Client.doesObjectExist(bucketName, key.toString())) {
                ObjectMetadata metadata = new ObjectMetadata();
                // Set Mimetype
                metadata.setContentType(fileSystemCache.getMimeType(cachedFile, key.toString()));
                // Set Unused value
                Map<String, String> userMetadata = metadata.getUserMetadata();
                userMetadata.put(UNUSED_KEY, String.valueOf(markAsUnused));
                metadata.setUserMetadata(userMetadata);
                // Store content in S3
                s3Client.putObject(bucketName, key.toString(), fileSystemCache.getInputStream(key), metadata);
            } else {
                // Set the unused value, if necessary
                if (markAsUnused) {
                    markAsUnused(Collections.singleton(key));
                } else {
                    markAsUsed(Collections.singleton(key));
                }
            }
            return new StoredBinaryValue(this, key, cachedFile.getSize());
        } catch (AmazonClientException | RepositoryException | IOException e) {
            throw new BinaryStoreException(e);
        } finally {
            // Remove cached file
            fileSystemCache.markAsUnused(Collections.singleton(cachedFile.getKey()));
            fileSystemCache.removeValuesUnusedLongerThan(1, TimeUnit.MICROSECONDS);
        }
    }

    @Override
    public InputStream getInputStream(BinaryKey key) throws BinaryStoreException {
        try {
            S3Object s3Object = s3Client.getObject(bucketName, key.toString());
            return s3Object.getObjectContent();
        } catch (AmazonClientException e) {
            throw new BinaryStoreException(e);
        }
    }

    @Override
    public void markAsUsed(Iterable<BinaryKey> keys) throws BinaryStoreException {
        for (BinaryKey key : keys) {
            setS3ObjectUserProperty(key, UNUSED_KEY, String.valueOf(false));
        }
    }

    @Override
    public void markAsUnused(Iterable<BinaryKey> keys) throws BinaryStoreException {
        for (BinaryKey key : keys) {
            setS3ObjectUserProperty(key, UNUSED_KEY, String.valueOf(true));
        }
    }

    @Override
    public void removeValuesUnusedLongerThan(long minimumAge, TimeUnit timeUnit) throws BinaryStoreException {
        Date deadline = new Date(System.currentTimeMillis() - timeUnit.toMillis(minimumAge));

        // There is no capacity in S3 to query on object properties. This must be done
        // by straight iteration, so may take a very long time for large data sets.
        try {
            for (BinaryKey key : getAllBinaryKeys()) {
                ObjectMetadata metadata = s3Client.getObjectMetadata(bucketName, key.toString());
                String unused = metadata.getUserMetadata().get(UNUSED_KEY);
                if (null != unused && unused.equals(String.valueOf(true))) {
                    Date lastMod = metadata.getLastModified();
                    if (lastMod.before(deadline)) {
                        try {
                            s3Client.deleteObject(bucketName, key.toString());
                        } catch (AmazonClientException e) {
                            Logger log = Logger.getLogger(getClass());
                            log.warn(e, JcrI18n.unableToDeleteTemporaryFile, e.getMessage());
                        }
                    }
                } // Assumes that if no value is set, content is used
            }
        } catch (AmazonClientException e) {
            throw new BinaryStoreException(e);
        }
    }

    @Override
    public Iterable<BinaryKey> getAllBinaryKeys() throws BinaryStoreException {
        try {
            final Iterator<S3ObjectSummary> objectsIterator = S3Objects.inBucket(s3Client, bucketName).iterator();
            // Lambda to hand back BinaryKeys rather than S3ObjectSummaries
            return () -> {
                return new Iterator<BinaryKey>() {
                    @Override
                    public boolean hasNext() {
                        return objectsIterator.hasNext();
                    }

                    @Override
                    public BinaryKey next() {
                        S3ObjectSummary object = objectsIterator.next();
                        return new BinaryKey(object.getKey());
                    }
                };
            };
        } catch (AmazonClientException e) {
            throw new BinaryStoreException(e);
        }
    }

}