com.emc.ecs.sync.source.EcsS3Source.java Source code

Java tutorial

Introduction

Here is the source code for com.emc.ecs.sync.source.EcsS3Source.java

Source

/*
 * Copyright 2013-2015 EMC Corporation. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License").
 * You may not use this file except in compliance with the License.
 * A copy of the License is located at
 *
 * http://www.apache.org/licenses/LICENSE-2.0.txt
 *
 * or in the "license" file accompanying this file. This file is distributed
 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */
package com.emc.ecs.sync.source;

import com.emc.ecs.sync.filter.SyncFilter;
import com.emc.ecs.sync.model.SyncEstimate;
import com.emc.ecs.sync.model.object.EcsS3ObjectVersion;
import com.emc.ecs.sync.model.object.EcsS3SyncObject;
import com.emc.ecs.sync.model.object.SyncObject;
import com.emc.ecs.sync.target.EcsS3Target;
import com.emc.ecs.sync.target.SyncTarget;
import com.emc.ecs.sync.util.*;
import com.emc.object.Protocol;
import com.emc.object.s3.S3Client;
import com.emc.object.s3.S3Config;
import com.emc.object.s3.S3Exception;
import com.emc.object.s3.bean.*;
import com.emc.object.s3.jersey.S3JerseyClient;
import com.emc.rest.smart.ecs.Vdc;
import com.sun.jersey.client.urlconnection.URLConnectionClientHandler;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.util.Assert;

import java.io.File;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URLDecoder;
import java.util.*;

public class EcsS3Source extends SyncSource<EcsS3SyncObject> {
    private static final Logger log = LoggerFactory.getLogger(EcsS3Source.class);

    public static final String BUCKET_OPTION = "source-bucket";
    public static final String BUCKET_DESC = "Required. Specifies the source bucket to use.";
    public static final String BUCKET_ARG_NAME = "bucket";

    public static final String DECODE_KEYS_OPTION = "source-decode-keys";
    public static final String DECODE_KEYS_DESC = "If specified, keys will be URL-decoded after listing them.  This can fix problems if you see file or directory names with characters like %2f in them.";

    public static final String ENABLE_VHOSTS_OPTION = "source-enable-vhost";
    public static final String ENABLE_VHOSTS_DESC = "If specified, virtual hosted buckets will be enabled (bucket.s3.company.com). This will also disable node discovery.";

    public static final String NO_SMART_CLIENT_OPTION = "source-no-smart-client";
    public static final String NO_SMART_CLIENT_DESC = "Disables the smart client (client-side load balancing). Necessary when using a proxy or external load balancer without DNS configuration.";

    public static final String APACHE_CLIENT_OPTION = "source-apache-client";
    public static final String APACHE_CLIENT_DESC = "If specified, source will use the Apache HTTP client, which is not as efficient, but enables Expect: 100-Continue (header pre-flight).";

    public static final String OPERATION_DELETE_OBJECT = "EcsS3DeleteObject";

    public static final String SOURCE_KEY_LIST_OPTION = "source-key-list";
    public static final String SOURCE_KEY_LIST_DESC = "If specified, the list of keys to transfer will be read from the named file.";

    private String protocol;
    private List<Vdc> vdcs;
    private int port;
    private URI endpoint;
    private String accessKey;
    private String secretKey;
    private boolean enableVHosts;
    private boolean smartClientEnabled = true;
    private String bucketName;
    private String rootKey;
    private boolean decodeKeys;
    private EcsS3Target s3Target;
    private boolean versioningEnabled;
    private boolean apacheClientEnabled;
    private File sourceKeyList;

    private S3Client s3;

    @Override
    public boolean canHandleSource(String sourceUri) {
        return sourceUri.startsWith(EcsS3Util.URI_PREFIX);
    }

    @Override
    public Options getCustomOptions() {
        Options opts = new Options();
        opts.addOption(Option.builder().longOpt(BUCKET_OPTION).desc(BUCKET_DESC).hasArg().argName(BUCKET_ARG_NAME)
                .build());
        opts.addOption(Option.builder().longOpt(DECODE_KEYS_OPTION).desc(DECODE_KEYS_DESC).build());
        opts.addOption(Option.builder().longOpt(ENABLE_VHOSTS_OPTION).desc(ENABLE_VHOSTS_DESC).build());
        opts.addOption(Option.builder().longOpt(NO_SMART_CLIENT_OPTION).desc(NO_SMART_CLIENT_DESC).build());
        opts.addOption(Option.builder().longOpt(APACHE_CLIENT_OPTION).desc(APACHE_CLIENT_DESC).build());
        opts.addOption(Option.builder().longOpt(SOURCE_KEY_LIST_OPTION).hasArg().argName("filename")
                .desc(SOURCE_KEY_LIST_DESC).build());
        return opts;
    }

    @Override
    public void parseCustomOptions(CommandLine line) {
        EcsS3Util.S3Uri s3Uri = EcsS3Util.parseUri(sourceUri);
        protocol = s3Uri.protocol;
        vdcs = s3Uri.vdcs;
        port = s3Uri.port;
        accessKey = s3Uri.accessKey;
        secretKey = s3Uri.secretKey;
        rootKey = s3Uri.rootKey;
        endpoint = s3Uri.getEndpointUri();

        if (line.hasOption(BUCKET_OPTION))
            bucketName = line.getOptionValue(BUCKET_OPTION);

        decodeKeys = line.hasOption(DECODE_KEYS_OPTION);

        enableVHosts = line.hasOption(ENABLE_VHOSTS_OPTION);

        smartClientEnabled = !line.hasOption(NO_SMART_CLIENT_OPTION);

        apacheClientEnabled = line.hasOption(APACHE_CLIENT_OPTION);

        if (line.hasOption(SOURCE_KEY_LIST_OPTION)) {
            sourceKeyList = new File(line.getOptionValue(SOURCE_KEY_LIST_OPTION));
        }
    }

    @Override
    public void configure(SyncSource source, Iterator<SyncFilter> filters, SyncTarget target) {
        Assert.hasText(accessKey, "accessKey is required");
        Assert.hasText(secretKey, "secretKey is required");
        Assert.hasText(bucketName, "bucketName is required");
        Assert.isTrue(bucketName.matches("[A-Za-z0-9._-]+"), bucketName + " is not a valid bucket name");

        S3Config s3Config;
        if (enableVHosts) {
            Assert.notNull(endpoint, "endpoint is required");
            s3Config = new S3Config(endpoint);
        } else {
            // try to infer from endpoint
            if (endpoint != null) {
                if (vdcs == null && endpoint.getHost() != null) {
                    vdcs = new ArrayList<>();
                    for (String host : endpoint.getHost().split(",")) {
                        vdcs.add(new Vdc(host));
                    }
                }
                if (port <= 0 && endpoint.getPort() > 0)
                    port = endpoint.getPort();
                if (protocol == null && endpoint.getScheme() != null)
                    protocol = endpoint.getScheme();
            }
            Assert.hasText(protocol, "protocol is required");
            Assert.notEmpty(vdcs, "at least one VDC is required");
            s3Config = new S3Config(Protocol.valueOf(protocol.toUpperCase()), vdcs.toArray(new Vdc[vdcs.size()]));
            if (port > 0)
                s3Config.setPort(port);
            s3Config.setSmartClient(smartClientEnabled);
        }
        s3Config.withIdentity(accessKey).withSecretKey(secretKey);

        if (apacheClientEnabled) {
            s3 = new S3JerseyClient(s3Config);
        } else {
            System.setProperty("http.maxConnections", "100");
            s3 = new S3JerseyClient(s3Config, new URLConnectionClientHandler());
        }

        // TODO: generalize uri translation
        EcsS3Util.S3Uri s3Uri = new EcsS3Util.S3Uri();
        s3Uri.protocol = protocol;
        s3Uri.vdcs = vdcs;
        s3Uri.port = port;
        s3Uri.accessKey = accessKey;
        s3Uri.secretKey = secretKey;
        s3Uri.rootKey = rootKey;
        if (sourceUri == null)
            sourceUri = s3Uri.toUri();

        if (!s3.bucketExists(bucketName)) {
            throw new ConfigurationException("The bucket " + bucketName + " does not exist.");
        }

        if (rootKey == null)
            rootKey = ""; // make sure rootKey isn't null

        // for version support. TODO: genericize version support
        if (target instanceof EcsS3Target) {
            s3Target = (EcsS3Target) target;
            if (s3Target.isIncludeVersions()) {
                VersioningConfiguration versioningConfig = s3.getBucketVersioning(bucketName);
                List<VersioningConfiguration.Status> versionedStates = Arrays
                        .asList(VersioningConfiguration.Status.Enabled, VersioningConfiguration.Status.Suspended);
                versioningEnabled = versionedStates.contains(versioningConfig.getStatus());
            }
        }

        if (sourceKeyList != null) {
            if (!sourceKeyList.exists()) {
                throw new ConfigurationException("The key list file " + sourceKeyList + " does not exist");
            }
        }
    }

    @Override
    public SyncEstimate createEstimate() {
        SyncEstimate estimate = new SyncEstimate();

        if (sourceKeyList != null) {
            Iterator<EcsS3SyncObject> i = getSourceKeyListIterator();
            while (i.hasNext() && i.next() != null)
                estimate.incTotalObjectCount(1);
            return estimate;
        }

        // root key ending in a slash signifies a directory
        if (rootKey.isEmpty() || rootKey.endsWith("/")) {
            ListObjectsResult listResult = null;
            do {
                if (listResult == null)
                    listResult = s3.listObjects(bucketName, rootKey);
                else
                    listResult = s3.listMoreObjects(listResult);

                for (S3Object object : listResult.getObjects()) {
                    estimate.incTotalObjectCount(1);
                    estimate.incTotalByteCount(object.getSize());
                }
            } while (listResult.isTruncated());

        } else { // otherwise, assume only one object
            estimate.incTotalObjectCount(1);
            estimate.incTotalByteCount(s3.getObjectMetadata(bucketName, rootKey).getContentLength());
        }

        return estimate;
    }

    @Override
    public Iterator<EcsS3SyncObject> iterator() {
        if (sourceKeyList != null) {
            return getSourceKeyListIterator();
        }

        // root key ending in a slash signifies a directory
        if (rootKey.isEmpty() || rootKey.endsWith("/")) {
            if (versioningEnabled) {
                return new CombinedIterator<>(
                        Arrays.asList(new PrefixIterator(rootKey), new DeletedObjectIterator(rootKey)));
            } else {
                return new PrefixIterator(rootKey);
            }
        } else { // otherwise, assume only one object
            return Collections
                    .singletonList(new EcsS3SyncObject(this, s3, bucketName, rootKey, getRelativePath(rootKey)))
                    .iterator();
        }
    }

    /**
     * Enumerates the keys to transfer from a flat list file
     */
    private Iterator<EcsS3SyncObject> getSourceKeyListIterator() {
        final Iterator<String> fileIterator = new FileLineIterator(sourceKeyList);

        return new ReadOnlyIterator<EcsS3SyncObject>() {
            @Override
            protected EcsS3SyncObject getNextObject() {
                if (fileIterator.hasNext()) {
                    String key = fileIterator.next();
                    return new EcsS3SyncObject(EcsS3Source.this, s3, bucketName, key, getRelativePath(key));
                }
                return null;
            }
        };
    }

    /**
     * This source is designed to query all objects under the root prefix as a flat list of results (no hierarchy),
     * which means <strong>no</strong> objects should have children
     */
    @Override
    public Iterator<EcsS3SyncObject> childIterator(EcsS3SyncObject syncObject) {
        return Collections.emptyIterator();
    }

    /**
     * To support versions. Called by S3Target to sync all versions of an object
     */
    public ListIterator<EcsS3ObjectVersion> versionIterator(EcsS3SyncObject syncObject) {
        return EcsS3Util.listVersions(this, s3, bucketName, syncObject.getKey(), syncObject.getRelativePath());
    }

    /**
     * Overridden to support versions
     */
    @Override
    public void verify(EcsS3SyncObject syncObject, SyncFilter filterChain) {

        // this implementation only verifies data objects
        if (syncObject.isDirectory())
            return;

        // must first verify versions
        if (s3Target != null && s3Target.isIncludeVersions()) {
            Iterator<EcsS3ObjectVersion> sourceVersions = versionIterator(syncObject);
            Iterator<EcsS3ObjectVersion> targetVersions = s3Target.versionIterator(syncObject);

            // special workaround for bug where objects are listed, but they have no versions
            if (sourceVersions.hasNext()) {

                while (sourceVersions.hasNext()) {
                    if (!targetVersions.hasNext())
                        throw new RuntimeException(
                                "The source system has more versions of the object than the target");

                    EcsS3ObjectVersion sourceVersion = sourceVersions.next();
                    EcsS3ObjectVersion targetVersion = targetVersions.next();

                    if (sourceVersion.isLatest())
                        continue; // current version is verified through filter chain below

                    log.debug("#==? verifying version (source vID: {}, target vID: {})",
                            sourceVersion.getVersionId(), targetVersion.getVersionId());

                    if (sourceVersion.isDeleteMarker()) {
                        if (targetVersion.isDeleteMarker()) {
                            log.info("#==# delete marker verified for version (source vID: {}, target vID: {})",
                                    sourceVersion.getVersionId(), targetVersion.getVersionId());
                        } else {
                            throw new RuntimeException(String.format(
                                    "Version: source is delete marker; target isn't (source vID: %s, target vID: %s)",
                                    sourceVersion.getVersionId(), targetVersion.getVersionId()));
                        }

                    } else {
                        if (targetVersion.isDeleteMarker()) {
                            throw new RuntimeException(String.format(
                                    "Version: target is delete marker; source isn't (source vID: %s, target vID: %s)",
                                    sourceVersion.getVersionId(), targetVersion.getVersionId()));
                        }

                        try {
                            verifyObjects(sourceVersion, targetVersion);
                            log.info("#==# checksum verified for version (source vID: {}, target vID: {})",
                                    sourceVersion.getVersionId(), targetVersion.getVersionId());
                        } catch (RuntimeException e) {
                            throw new RuntimeException(
                                    String.format("Version: checksum failed (source vID: %s, target vID: %s)",
                                            sourceVersion.getVersionId(), targetVersion.getVersionId()),
                                    e);
                        }
                    }
                }

                if (targetVersions.hasNext()) {
                    throw new RuntimeException(String.format(
                            "The target system has more versions of the object than the source (are other clients writing to the target?) [{%s}]",
                            targetVersions.next().getSourceIdentifier()));
                }
            }
        }

        // verify current version
        if (syncObject instanceof EcsS3ObjectVersion && ((EcsS3ObjectVersion) syncObject).isDeleteMarker()) {
            SyncObject targetObject = filterChain.reverseFilter(syncObject);
            try {
                targetObject.getMetadata(); // this should return a 404
                throw new RuntimeException(
                        "Latest object version is a delete marker on the source, but exists on the target");
            } catch (S3Exception e) {
                if (e.getHttpCode() != 404)
                    throw e; // if it's not a 404, there was some other error
            }
        } else {
            super.verify(syncObject, filterChain);
        }
    }

    @Override
    public void delete(final EcsS3SyncObject syncObject) {
        time(new Function<Void>() {
            @Override
            public Void call() {
                s3.deleteObject(bucketName, syncObject.getKey());
                return null;
            }
        }, OPERATION_DELETE_OBJECT);
    }

    @Override
    public String getName() {
        return "S3 Source";
    }

    @Override
    public String getDocumentation() {
        return "Scans and reads content from an ECS S3 bucket. This "
                + "source plugin is triggered by the pattern:\n" + EcsS3Util.PATTERN_DESC + "\n"
                + "Scheme, host and port are all required. "
                + "root-prefix (optional) is the prefix under which to start "
                + "enumerating within the bucket, e.g. dir1/. If omitted the "
                + "root of the bucket will be enumerated.";
    }

    @Override
    public String summarizeConfig() {
        return super.summarizeConfig() + " - enableVHosts: " + enableVHosts + "\n" + " - smartClientEnabled: "
                + smartClientEnabled + "\n" + " - bucketName: " + bucketName + "\n" + " - rootKey: " + rootKey
                + "\n" + " - decodeKeys: " + decodeKeys + "\n" + " - versioningEnabled: " + versioningEnabled + "\n"
                + " - apacheClientEnabled: " + apacheClientEnabled + "\n" + " - sourceKeyList: " + sourceKeyList
                + "\n";
    }

    @Override
    public void cleanup() {
        s3.destroy();
        super.cleanup();
    }

    protected String getRelativePath(String key) {
        if (key.startsWith(rootKey))
            key = key.substring(rootKey.length());
        return decodeKeys ? decodeKey(key) : key;
    }

    protected String decodeKey(String key) {
        try {
            return URLDecoder.decode(key, "UTF-8");
        } catch (UnsupportedEncodingException e) {
            throw new RuntimeException("UTF-8 is not supported on this platform");
        }
    }

    protected class PrefixIterator extends ReadOnlyIterator<EcsS3SyncObject> {
        private String prefix;
        private ListObjectsResult listing;
        private Iterator<S3Object> objectIterator;

        public PrefixIterator(String prefix) {
            this.prefix = prefix;
        }

        @Override
        protected EcsS3SyncObject getNextObject() {
            if (listing == null || (!objectIterator.hasNext() && listing.isTruncated())) {
                getNextBatch();
            }

            if (objectIterator.hasNext()) {
                S3Object object = objectIterator.next();
                return new EcsS3SyncObject(EcsS3Source.this, s3, bucketName, object.getKey(),
                        getRelativePath(object.getKey()), object.getSize());
            }

            // list is not truncated and iterators are finished; no more objects
            return null;
        }

        private void getNextBatch() {
            if (listing == null) {
                listing = s3.listObjects(bucketName, "".equals(prefix) ? null : prefix);
            } else {
                log.info(
                        "getting next page of objects [prefix: {}, marker: {}, nextMarker: {}, encodingType: {}, maxKeys: {}]",
                        listing.getPrefix(), listing.getMarker(), listing.getNextMarker(),
                        listing.getEncodingType(), listing.getMaxKeys());
                listing = s3.listMoreObjects(listing);
            }
            objectIterator = listing.getObjects().iterator();
        }
    }

    protected class DeletedObjectIterator extends ReadOnlyIterator<EcsS3SyncObject> {
        private String prefix;
        private ListVersionsResult versionListing;
        private Iterator<AbstractVersion> versionIterator;

        public DeletedObjectIterator(String prefix) {
            this.prefix = prefix;
        }

        @Override
        protected EcsS3SyncObject getNextObject() {
            while (true) {
                AbstractVersion version = getNextVersion();

                if (version == null)
                    return null;

                if (version.isLatest() && version instanceof DeleteMarker)
                    return new EcsS3ObjectVersion(EcsS3Source.this, s3, bucketName, version.getKey(),
                            version.getVersionId(), version.isLatest(), true, version.getLastModified(), null,
                            getRelativePath(version.getKey()));
            }
        }

        protected AbstractVersion getNextVersion() {
            // look for deleted objects in versioned bucket
            if (versionListing == null || (!versionIterator.hasNext() && versionListing.isTruncated())) {
                getNextVersionBatch();
            }

            if (versionIterator.hasNext()) {
                return versionIterator.next();
            }

            // no more versions
            return null;
        }

        private void getNextVersionBatch() {
            if (versionListing == null) {
                versionListing = s3.listVersions(bucketName, "".equals(prefix) ? null : prefix);
            } else {
                versionListing = s3.listMoreVersions(versionListing);
            }
            versionIterator = versionListing.getVersions().iterator();
        }
    }

    protected class CombinedIterator<T> extends ReadOnlyIterator<T> {
        private List<? extends Iterator<T>> iterators;
        private int currentIterator = 0;

        public CombinedIterator(List<? extends Iterator<T>> iterators) {
            this.iterators = iterators;
        }

        @Override
        protected T getNextObject() {
            while (currentIterator < iterators.size()) {
                if (iterators.get(currentIterator).hasNext())
                    return iterators.get(currentIterator).next();
                currentIterator++;
            }

            return null;
        }
    }

    /**
     * @return the bucketName
     */
    public String getBucketName() {
        return bucketName;
    }

    /**
     * @param bucketName the bucketName to set
     */
    public void setBucketName(String bucketName) {
        this.bucketName = bucketName;
    }

    /**
     * @return the rootKey
     */
    public String getRootKey() {
        return rootKey;
    }

    /**
     * @param rootKey the rootKey to set
     */
    public void setRootKey(String rootKey) {
        this.rootKey = rootKey;
    }

    public String getProtocol() {
        return protocol;
    }

    public void setProtocol(String protocol) {
        this.protocol = protocol;
    }

    public List<Vdc> getVdcs() {
        return vdcs;
    }

    public void setVdcs(List<Vdc> vdcs) {
        this.vdcs = vdcs;
    }

    public Integer getPort() {
        return port;
    }

    public void setPort(Integer port) {
        this.port = port;
    }

    /**
     * @return the endpoint
     */
    public URI getEndpoint() {
        return endpoint;
    }

    /**
     * @param endpoint the endpoint to set
     */
    public void setEndpoint(URI endpoint) {
        this.endpoint = endpoint;
    }

    /**
     * @return the accessKey
     */
    public String getAccessKey() {
        return accessKey;
    }

    /**
     * @param accessKey the accessKey to set
     */
    public void setAccessKey(String accessKey) {
        this.accessKey = accessKey;
    }

    /**
     * @return the secretKey
     */
    public String getSecretKey() {
        return secretKey;
    }

    /**
     * @param secretKey the secretKey to set
     */
    public void setSecretKey(String secretKey) {
        this.secretKey = secretKey;
    }

    /**
     * @return the decodeKeys
     */
    public boolean isDecodeKeys() {
        return decodeKeys;
    }

    /**
     * @param decodeKeys the decodeKeys to set
     */
    public void setDecodeKeys(boolean decodeKeys) {
        this.decodeKeys = decodeKeys;
    }

    public boolean isEnableVHosts() {
        return enableVHosts;
    }

    public void setEnableVHosts(boolean enableVHosts) {
        this.enableVHosts = enableVHosts;
    }

    public boolean isSmartClientEnabled() {
        return smartClientEnabled;
    }

    public void setSmartClientEnabled(boolean smartClientEnabled) {
        this.smartClientEnabled = smartClientEnabled;
    }

    public boolean isVersioningEnabled() {
        return versioningEnabled;
    }

    public void setVersioningEnabled(boolean versioningEnabled) {
        this.versioningEnabled = versioningEnabled;
    }

    public boolean isApacheClientEnabled() {
        return apacheClientEnabled;
    }

    public void setApacheClientEnabled(boolean apacheClientEnabled) {
        this.apacheClientEnabled = apacheClientEnabled;
    }

    public File getSourceKeyList() {
        return sourceKeyList;
    }

    public void setSourceKeyList(File sourceKeyList) {
        this.sourceKeyList = sourceKeyList;
    }
}