com.github.rholder.esthree.command.GetMultipart.java Source code

Introduction

Here is the source code for com.github.rholder.esthree.command.GetMultipart.java
Source

/*
 * Copyright 2014 Ray Holder
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.github.rholder.esthree.command;

import com.amazonaws.AmazonClientException;
import com.amazonaws.event.ProgressEvent;
import com.amazonaws.event.ProgressEventType;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.s3.transfer.TransferProgress;
import com.amazonaws.util.BinaryUtils;
import com.github.rholder.esthree.progress.MutableProgressListener;
import com.github.rholder.esthree.progress.Progress;
import com.github.rholder.esthree.progress.TransferProgressWrapper;
import com.github.rholder.esthree.util.RetryUtils;
import com.github.rholder.moar.concurrent.partition.Part;
import com.github.rholder.moar.concurrent.partition.Parts;
import com.github.rholder.retry.RetryException;
import org.apache.commons.io.IOUtils;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.security.MessageDigest;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;

public class GetMultipart implements Callable<Integer> {

    // TODO parameterize internal buffer size?
    public static final int DEFAULT_BUF_SIZE = 4096 * 4;
    public static final int DEFAULT_CHUNK_SIZE = 1024 * 1024 * 5; // 5 MB

    public AmazonS3Client amazonS3Client;
    public String bucket;
    public String key;
    public File outputFile;
    public boolean verbose;
    public RandomAccessFile output;

    private Integer chunkSize;
    private MutableProgressListener progressListener;

    private MessageDigest currentDigest;
    private List<Part> fileParts;
    private long contentLength;

    public GetMultipart(AmazonS3Client amazonS3Client, String bucket, String key, File outputFile, boolean verbose)
            throws FileNotFoundException {
        this.amazonS3Client = amazonS3Client;
        this.bucket = bucket;
        this.key = key;
        this.outputFile = outputFile;
        this.verbose = verbose;
    }

    public GetMultipart withProgressListener(MutableProgressListener progressListener) {
        this.progressListener = progressListener;
        return this;
    }

    public GetMultipart withChunkSize(Integer chunkSize) {
        this.chunkSize = chunkSize;
        return this;
    }

    @Override
    public Integer call() throws Exception {
        ObjectMetadata om = amazonS3Client.getObjectMetadata(bucket, key);
        contentLength = om.getContentLength();

        // this is the most up to date digest, it's initialized here but later holds the most up to date valid digest
        currentDigest = MessageDigest.getInstance("MD5");
        chunkSize = chunkSize == null ? DEFAULT_CHUNK_SIZE : chunkSize;
        fileParts = Parts.among(contentLength, chunkSize);
        for (Part fp : fileParts) {

            /*
             * We'll need to compute the digest on the full incoming stream for
             * each valid chunk that comes in. Invalid chunks will need to be
             * recomputed and fed through a copy of the MD5 that was valid up
             * until the latest chunk.
             */
            currentDigest = retryingGetWithRange(fp.start, fp.end);
        }

        // TODO fix total content length progress bar
        if (progressListener != null) {
            progressListener.progressChanged(new ProgressEvent(ProgressEventType.TRANSFER_STARTED_EVENT));
        }

        String fullETag = om.getETag();
        if (!fullETag.contains("-")) {
            byte[] expected = BinaryUtils.fromHex(fullETag);
            byte[] current = currentDigest.digest();
            if (!Arrays.equals(expected, current)) {
                throw new AmazonClientException("Unable to verify integrity of data download.  "
                        + "Client calculated content hash didn't match hash calculated by Amazon S3.  "
                        + "The data may be corrupt.");
            }
        } else {
            // TODO log warning that we can't validate the MD5
            if (verbose) {
                System.err.println("\nMD5 does not exist on AWS for file, calculated value: "
                        + BinaryUtils.toHex(currentDigest.digest()));
            }
        }
        // TODO add ability to resume from previously downloaded chunks
        // TODO add rate limiter

        return 0;
    }

    public MessageDigest retryingGetWithRange(final long start, final long end)
            throws ExecutionException, RetryException {

        return (MessageDigest) RetryUtils.AWS_RETRYER.call(new Callable<Object>() {
            public MessageDigest call() throws Exception {

                long totalBytes = end - start + 1;
                Progress progress = new TransferProgressWrapper(new TransferProgress());
                progress.setTotalBytesToTransfer(totalBytes);

                if (progressListener != null) {
                    progressListener.withTransferProgress(progress).withCompleted((100.0 * start) / contentLength)
                            .withMultiplier(
                                    (1.0 * totalBytes / (Math.min(contentLength, chunkSize))) / fileParts.size());
                }

                GetObjectRequest req = new GetObjectRequest(bucket, key).withRange(start, end);

                S3Object s3Object = amazonS3Client.getObject(req);
                InputStream input = null;
                try {
                    // create the output file, now that we know it actually exists
                    if (output == null) {
                        output = new RandomAccessFile(outputFile, "rw");
                    }

                    // seek to the start of the chunk in the file, just in case we're retrying
                    output.seek(start);
                    input = s3Object.getObjectContent();

                    return copyAndHash(input, totalBytes, progress);
                } finally {
                    IOUtils.closeQuietly(input);
                }
            }
        });
    }

    public MessageDigest copyAndHash(InputStream input, long totalBytes, Progress progress)
            throws IOException, CloneNotSupportedException {

        // clone the current digest, such that it remains unchanged in this method
        MessageDigest computedDigest = (MessageDigest) currentDigest.clone();
        byte[] buffer = new byte[DEFAULT_BUF_SIZE];

        long count = 0;
        int n;
        while (-1 != (n = input.read(buffer))) {
            output.write(buffer, 0, n);
            if (progressListener != null) {
                progress.updateProgress(n);
                progressListener
                        .progressChanged(new ProgressEvent(ProgressEventType.REQUEST_BYTE_TRANSFER_EVENT, n));
            }
            computedDigest.update(buffer, 0, n);
            count += n;
        }

        // verify that at least this many bytes were read
        if (totalBytes != count) {
            throw new IOException(
                    String.format("%d bytes downloaded instead of expected %d bytes", count, totalBytes));
        }
        return computedDigest;
    }
}