com.quixey.hadoop.fs.oss.MultiPartUploader.java Source code

Java tutorial

Introduction

Here is the source code for com.quixey.hadoop.fs.oss.MultiPartUploader.java

Source

/* Copyright 2014, Quixey Inc
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.quixey.hadoop.fs.oss;

import com.aliyun.openservices.oss.OSSClient;
import com.aliyun.openservices.oss.model.CompleteMultipartUploadRequest;
import com.aliyun.openservices.oss.model.InitiateMultipartUploadRequest;
import com.aliyun.openservices.oss.model.InitiateMultipartUploadResult;
import com.aliyun.openservices.oss.model.ObjectMetadata;
import com.aliyun.openservices.oss.model.PartETag;
import com.aliyun.openservices.oss.model.UploadPartRequest;
import com.aliyun.openservices.oss.model.UploadPartResult;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Objects;
import com.google.common.base.Optional;
import com.google.common.io.ByteStreams;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.commons.codec.binary.Base64;
import org.apache.hadoop.conf.Configuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.quixey.hadoop.fs.oss.OSSFileSystemConfigKeys.OSS_MULTIPART_UPLOADS_BLOCK_SIZE_PROPERTY;
import static com.quixey.hadoop.fs.oss.OSSFileSystemConfigKeys.OSS_MULTIPART_UPLOADS_MAX_THREADS_PROPERTY;

/**
 * Handles multipart upload for OSS.
 *
 * @author Jim Lim - jim@quixey.com
 */
@VisibleForTesting
class MultiPartUploader {

    public static final long MAX_PART_SIZE = (long) 5 * 1024 * 1024 * 1024;
    public static final int DEFAULT_MULTIPART_BLOCK_SIZE = 64 * 1024 * 1024;
    private static final Logger LOG = LoggerFactory.getLogger(CloudOSSFileSystemStore.class);
    private final OSSClient client;
    private final String bucket;
    private final boolean multipartEnabled;
    private final int maxThreads;

    private long partSize;

    MultiPartUploader(OSSClient client, String bucket, Configuration conf) {
        this.client = checkNotNull(client);
        this.bucket = checkNotNull(bucket);

        checkNotNull(conf);

        multipartEnabled = conf.getBoolean(OSSFileSystemConfigKeys.OSS_MULTIPART_UPLOADS_ENABLED, true);
        partSize = conf.getLong(OSS_MULTIPART_UPLOADS_BLOCK_SIZE_PROPERTY, DEFAULT_MULTIPART_BLOCK_SIZE);
        maxThreads = conf.getInt(OSS_MULTIPART_UPLOADS_MAX_THREADS_PROPERTY, Integer.MAX_VALUE);

        checkArgument(partSize <= MAX_PART_SIZE, "%s must be at most %s", OSS_MULTIPART_UPLOADS_BLOCK_SIZE_PROPERTY,
                MAX_PART_SIZE);
    }

    /**
     * @param fileSize size of local file
     * @return true iff {@code file} should be broken up into parts and uploaded
     */
    boolean shouldUpload(long fileSize) {
        return multipartEnabled && partSize < fileSize;
    }

    /**
     * Breaks up {@code file} into multiple parts and uploads it.
     *
     * @param key     destination key
     * @param file    local file
     * @param md5Hash checksum
     * @throws com.aliyun.openservices.ServiceException from OSSClient.
     */
    void upload(String key, File file, Optional<byte[]> md5Hash) throws IOException {
        checkNotNull(key);
        checkNotNull(file);
        checkNotNull(md5Hash);

        // initiate upload
        ObjectMetadata metadata = metadata(file, md5Hash);
        String uploadId = initiateMultiPartUpload(key, metadata);

        // count number of parts
        int parts = (int) calculateNumParts(file.length());
        LOG.info("Initiating multipart upload request for key {} with {} parts", key, parts);

        List<PartETag> eTags = uploadParts(key, file, uploadId, parts);
        completeMultipartUpload(key, uploadId, eTags);
    }

    @VisibleForTesting
    void setPartSize(long partSize) {
        this.partSize = partSize;
    }

    @Override
    public String toString() {
        return Objects.toStringHelper(this).add("bucket", bucket).add("multipart enabled", multipartEnabled)
                .add("part size", partSize).add("max threads", maxThreads).toString();
    }

    private void completeMultipartUpload(String key, String uploadId, List<PartETag> eTags) {
        CompleteMultipartUploadRequest request = new CompleteMultipartUploadRequest(bucket, key, uploadId, eTags);
        client.completeMultipartUpload(request);
    }

    @SuppressWarnings("unchecked")
    private List<PartETag> uploadParts(final String key, final File file, final String uploadId, int parts)
            throws IOException {

        // construct thread pool
        ExecutorService pool = newExecutorService(file, parts);

        final Future<PartETag>[] futures = new Future[parts];
        for (int i = 0; i < parts; i++) {
            final int partNum = i;
            futures[i] = pool.submit(new Callable<PartETag>() {
                @Override
                public PartETag call() throws Exception {
                    return uploadPart(key, file, uploadId, partNum);
                }
            });
        }
        pool.shutdown();

        // wait for uploads to complete
        awaitTermination(pool);

        // retrieve etags and verify uploads
        PartETag[] eTags = new PartETag[parts];
        int i = 0;
        for (Future<PartETag> future : futures) {
            try {
                eTags[i++] = future.get();
            } catch (InterruptedException | ExecutionException e) {
                throw new IOException("Unable to upload part " + i, e);
            }
        }

        return Arrays.asList(eTags);
    }

    private void awaitTermination(ExecutorService pool) {
        while (!pool.isTerminated()) {
            try {
                pool.awaitTermination(5, TimeUnit.SECONDS);
            } catch (InterruptedException e) {
                throw new RuntimeException(e);
            }
        }
    }

    private ExecutorService newExecutorService(File file, int parts) {
        int threads = Math.min(parts, maxThreads);
        ThreadFactory factory = new ThreadFactoryBuilder().setNameFormat("dragon-mp-" + file.getName() + "-%d")
                .build();
        return Executors.newFixedThreadPool(threads, factory);
    }

    /**
     * Uploads a single part to OSS.
     *
     * @param key      destination key
     * @param file     local file
     * @param uploadId upload ID
     * @param partNum  part number
     * @return part etag
     */
    private PartETag uploadPart(String key, File file, String uploadId, int partNum) {
        long start = partNum * partSize;
        long size = Math.min(partSize, file.length() - start);

        try (InputStream stream = new BufferedInputStream(new FileInputStream(file))) {

            if (start > 0)
                ByteStreams.skipFully(stream, start);

            UploadPartRequest request = new UploadPartRequest();
            request.setBucketName(bucket);
            request.setInputStream(stream);
            request.setKey(key);
            request.setPartNumber(partNum + 1);
            request.setPartSize(size);
            request.setUploadId(uploadId);

            UploadPartResult result = client.uploadPart(request);
            LOG.info("{} part {}: upload complete", key, partNum);

            return result.getPartETag();
        } catch (IOException e) {
            throw new RuntimeException("Error uploading part " + partNum + " of " + key, e);
        }
    }

    /**
     * @param length size of the local file
     * @return number of parts to upload
     */
    private long calculateNumParts(long length) {
        return length / partSize + (0 == length % partSize ? 0 : 1);
    }

    private String initiateMultiPartUpload(String key, ObjectMetadata metadata) {
        InitiateMultipartUploadRequest request = new InitiateMultipartUploadRequest(bucket, key);
        request.setObjectMetadata(metadata);
        InitiateMultipartUploadResult result = client.initiateMultipartUpload(request);
        return result.getUploadId();
    }

    /**
     * @param file    file to upload
     * @param md5Hash file checksum
     * @return object metadata
     */
    private ObjectMetadata metadata(File file, Optional<byte[]> md5Hash) {
        // create metadata
        ObjectMetadata metadata = new ObjectMetadata();
        metadata.setContentLength(file.length());
        metadata.setLastModified(new Date(file.lastModified()));
        // add MD5, if provided
        if (md5Hash.isPresent()) {
            String contentMd5 = Base64.encodeBase64String(md5Hash.get());
            metadata.setHeader("Content-MD5", contentMd5.trim());
        }
        return metadata;
    }
}