Example usage for com.amazonaws.services.s3.model S3ObjectSummary getKey

Introduction

In this page you can find the example usage for com.amazonaws.services.s3.model S3ObjectSummary getKey.

Prototype

public String getKey()

Source Link

Document

Gets the key under which this object is stored in Amazon S3.

Usage

From source file:com.scoyo.tools.s3cacheenhancer.S3HeaderEnhancer.java

License:Apache License

private void setHeaders(ObjectListing listing, final String maxAgeHeader, ExecutorService executorService) {

    for (final S3ObjectSummary summary : listing.getObjectSummaries()) {
        executorService.submit(new Runnable() {
            @Override/*  w w  w .jav  a 2s  .c  o m*/
            public void run() {
                String bucket = summary.getBucketName();
                String key = summary.getKey();

                ObjectMetadata metadata = null;
                try {
                    metadata = s3.getObjectMetadata(bucket, key);
                } catch (AmazonS3Exception exception) {
                    System.out.println("Could not update " + key + " [" + exception.getMessage() + "]");
                    return;
                }

                if ("application/x-directory".equals(metadata.getContentType())) {
                    System.out.println("Skipping because content-type " + key);
                    return;
                }

                if (!maxAgeHeader.equals(metadata.getCacheControl())) {
                    metadata.setCacheControl(maxAgeHeader);
                } else {
                    System.out.println("Skipping because header is already correct " + key);
                    return;
                }

                AccessControlList acl = s3.getObjectAcl(summary.getBucketName(), summary.getKey());

                CopyObjectRequest copyReq = new CopyObjectRequest(bucket, key, bucket, key)
                        .withAccessControlList(acl).withNewObjectMetadata(metadata);

                CopyObjectResult result = s3.copyObject(copyReq);

                if (result != null) {
                    System.out.println("Updated " + key);
                } else {
                    System.out.println("Could not update " + key);
                }
            }
        });
    }
}

From source file:com.shareplaylearn.models.UserItemManager.java

License:Open Source License

private HashSet<String> getExternalItemListing(ObjectListing objectListing) {
    HashSet<String> itemLocations = new HashSet<>();
    for (S3ObjectSummary obj : objectListing.getObjectSummaries()) {
        String internalPath = obj.getKey();
        String externalPath = makeExternalLocation(internalPath);
        if (externalPath != null) {
            itemLocations.add(externalPath);
            log.debug("External path was " + externalPath);
        } else {/*ww w.  j  a v  a  2  s .  c  o m*/
            log.info("External path for object list was null?");
        }
    }
    return itemLocations;
}

From source file:com.sjsu.faceit.example.S3Sample.java

License:Open Source License

public static void main(String[] args) throws IOException {
    /*/*from ww  w . j  a  v  a  2s  .c o  m*/
     * Important: Be sure to fill in your AWS access credentials in the
     *            AwsCredentials.properties file before you try to run this
     *            sample.
     * http://aws.amazon.com/security-credentials
     */
    System.out.println(new File(".").getAbsolutePath());
    AmazonS3 s3 = new AmazonS3Client(
            new PropertiesCredentials(S3Sample.class.getResourceAsStream("AwsCredentials.properties")));

    String bucketName = "my-first-s3-bucket-" + UUID.randomUUID();
    String key = "MyObjectKey";

    System.out.println("===========================================");
    System.out.println("Getting Started with Amazon S3");
    System.out.println("===========================================\n");

    try {
        /*
         * Create a new S3 bucket - Amazon S3 bucket names are globally unique,
         * so once a bucket name has been taken by any user, you can't create
         * another bucket with that same name.
         *
         * You can optionally specify a location for your bucket if you want to
         * keep your data closer to your applications or users.
         */
        System.out.println("Creating bucket " + bucketName + "\n");
        s3.createBucket(bucketName);

        /*
         * List the buckets in your account
         */
        System.out.println("Listing buckets");
        for (Bucket bucket : s3.listBuckets()) {
            System.out.println(" - " + bucket.getName());
        }
        System.out.println();

        /*
         * Upload an object to your bucket - You can easily upload a file to
         * S3, or upload directly an InputStream if you know the length of
         * the data in the stream. You can also specify your own metadata
         * when uploading to S3, which allows you set a variety of options
         * like content-type and content-encoding, plus additional metadata
         * specific to your applications.
         */
        System.out.println("Uploading a new object to S3 from a file\n");
        s3.putObject(new PutObjectRequest(bucketName, "abc/" + key, new File("/Users/prayag/Desktop/2.jpg")));

        /*
         * Download an object - When you download an object, you get all of
         * the object's metadata and a stream from which to read the contents.
         * It's important to read the contents of the stream as quickly as
         * possibly since the data is streamed directly from Amazon S3 and your
         * network connection will remain open until you read all the data or
         * close the input stream.
         *
         * GetObjectRequest also supports several other options, including
         * conditional downloading of objects based on modification times,
         * ETags, and selectively downloading a range of an object.
         */
        System.out.println("Downloading an object");
        S3Object object = s3.getObject(new GetObjectRequest(bucketName, "abc/" + key));
        System.out.println("Content-Type: " + object.getObjectMetadata().getContentType());
        displayTextInputStream(object.getObjectContent());

        /*
         * List objects in your bucket by prefix - There are many options for
         * listing the objects in your bucket.  Keep in mind that buckets with
         * many objects might truncate their results when listing their objects,
         * so be sure to check if the returned object listing is truncated, and
         * use the AmazonS3.listNextBatchOfObjects(...) operation to retrieve
         * additional results.
         */
        System.out.println("Listing objects");
        ObjectListing objectListing = s3
                .listObjects(new ListObjectsRequest().withBucketName(bucketName).withPrefix("My"));
        for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) {
            System.out.println(
                    " - " + objectSummary.getKey() + "  " + "(size = " + objectSummary.getSize() + ")");
        }
        System.out.println();

        /*
         * Delete an object - Unless versioning has been turned on for your bucket,
         * there is no way to undelete an object, so use caution when deleting objects.
         */
        //            System.out.println("Deleting an object\n");
        //            s3.deleteObject(bucketName, key);

        /*
         * Delete a bucket - A bucket must be completely empty before it can be
         * deleted, so remember to delete any objects from your buckets before
         * you try to delete them.
         */
        //            System.out.println("Deleting bucket " + bucketName + "\n");
        //            s3.deleteBucket(bucketName);
    } catch (AmazonServiceException ase) {
        System.out.println("Caught an AmazonServiceException, which means your request made it "
                + "to Amazon S3, but was rejected with an error response for some reason.");
        System.out.println("Error Message:    " + ase.getMessage());
        System.out.println("HTTP Status Code: " + ase.getStatusCode());
        System.out.println("AWS Error Code:   " + ase.getErrorCode());
        System.out.println("Error Type:       " + ase.getErrorType());
        System.out.println("Request ID:       " + ase.getRequestId());
    } catch (AmazonClientException ace) {
        System.out.println("Caught an AmazonClientException, which means the client encountered "
                + "a serious internal problem while trying to communicate with S3, "
                + "such as not being able to access the network.");
        System.out.println("Error Message: " + ace.getMessage());
    }
}

From source file:com.sludev.commons.vfs2.provider.s3.SS3FileObject.java

License:Apache License

/**
 * Lists the children of this file.  Is only called if {@link #doGetType}
 * returns {@link FileType#FOLDER}.  The return value of this method
 * is cached, so the implementation can be expensive.<br />
 * @return a possible empty String array if the file is a directory or null or an exception if the
 * file is not a directory or can't be read.
 * @throws Exception if an error occurs.
 *///from  w w  w . ja v a2  s .co m
@Override
protected String[] doListChildren() throws Exception {
    String[] res = null;

    Pair<String, String> path = getContainerAndPath();

    String prefix = path.getRight();
    if (prefix.endsWith("/") == false) {
        // We need folders ( prefixes ) to end with a slash
        prefix += "/";
    }
    ListObjectsRequest loReq = new ListObjectsRequest();
    loReq.withBucketName(path.getLeft());
    loReq.withPrefix(prefix);
    loReq.withDelimiter("/");

    ObjectListing blobs = fileSystem.getClient().listObjects(loReq);

    List<String> resList = new ArrayList<>();
    for (S3ObjectSummary osum : blobs.getObjectSummaries()) {
        String currBlobStr = osum.getKey();
        resList.add(String.format("/%s/%s", path.getLeft(), currBlobStr));
    }

    List<String> commPrefixes = blobs.getCommonPrefixes();
    if (commPrefixes != null) {
        for (String currFld : commPrefixes) {
            resList.add(String.format("/%s/%s", path.getLeft(), currFld));
        }
    }

    res = resList.toArray(new String[resList.size()]);

    return res;
}

From source file:com.springboot.demo.framework.aws.s3.S3Sample.java

License:Open Source License

public static void main(String[] args) throws IOException {

    /*/*from ww w  .  jav  a  2s  .  c  om*/
     * The ProfileCredentialsProvider will return your [default]
     * credential profile by reading from the credentials file located at
     * (~/.aws/credentials).
     */
    AWSCredentials basicCredentials = new BasicAWSCredentials(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY);

    AWSCredentials credentials = null;
    try {
        credentials = new ProfileCredentialsProvider().getCredentials();
    } catch (Exception e) {
        throw new AmazonClientException("Cannot load the credentials from the credential profiles file. "
                + "Please make sure that your credentials file is at the correct "
                + "location (~/.aws/credentials), and is in valid format.", e);
    }

    /*
     * Create S3 Client
     */
    AmazonS3 s3 = AmazonS3ClientBuilder.defaultClient();
    Region usWest2 = Region.getRegion(Regions.US_WEST_2);
    s3.setRegion(usWest2);

    String bucketName = "my-first-s3-bucket-" + UUID.randomUUID();
    String key = "MyObjectKey";

    System.out.println("===========================================");
    System.out.println("Getting Started with Amazon S3");
    System.out.println("===========================================\n");

    try {
        /*
         * Create a new S3 bucket - Amazon S3 bucket names are globally unique,
         * so once a bucket name has been taken by any user, you can't create
         * another bucket with that same name.
         *
         * You can optionally specify a location for your bucket if you want to
         * keep your data closer to your applications or users.
         */
        System.out.println("Creating bucket " + bucketName + "\n");
        s3.createBucket(bucketName);

        /*
         * List the buckets in your account
         */
        System.out.println("Listing buckets");
        for (Bucket bucket : s3.listBuckets()) {
            System.out.println(" - " + bucket.getName());
        }
        System.out.println();

        /*
         * Upload an object to your bucket - You can easily upload a file to
         * S3, or upload directly an InputStream if you know the length of
         * the data in the stream. You can also specify your own metadata
         * when uploading to S3, which allows you set a variety of options
         * like content-type and content-encoding, plus additional metadata
         * specific to your applications.
         */
        System.out.println("Uploading a new object to S3 from a file\n");
        s3.putObject(new PutObjectRequest(bucketName, key, createSampleFile()));

        /*
         * Returns an URL for the object stored in the specified bucket and  key
         */
        URL url = s3.getUrl(bucketName, key);
        System.out.println("upload file url : " + url.toString());

        /*
         * Download an object - When you download an object, you get all of
         * the object's metadata and a stream from which to read the contents.
         * It's important to read the contents of the stream as quickly as
         * possibly since the data is streamed directly from Amazon S3 and your
         * network connection will remain open until you read all the data or
         * close the input stream.
         *
         * GetObjectRequest also supports several other options, including
         * conditional downloading of objects based on modification times,
         * ETags, and selectively downloading a range of an object.
         */
        System.out.println("Downloading an object");
        S3Object object = s3.getObject(new GetObjectRequest(bucketName, key));
        System.out.println("Content-Type: " + object.getObjectMetadata().getContentType());
        displayTextInputStream(object.getObjectContent());

        /*
         * List objects in your bucket by prefix - There are many options for
         * listing the objects in your bucket.  Keep in mind that buckets with
         * many objects might truncate their results when listing their objects,
         * so be sure to check if the returned object listing is truncated, and
         * use the AmazonS3.listNextBatchOfObjects(...) operation to retrieve
         * additional results.
         */
        System.out.println("Listing objects");
        ObjectListing objectListing = s3
                .listObjects(new ListObjectsRequest().withBucketName(bucketName).withPrefix("My"));
        for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) {
            System.out.println(
                    " - " + objectSummary.getKey() + "  " + "(size = " + objectSummary.getSize() + ")");
        }
        System.out.println();

        /*
         * Delete an object - Unless versioning has been turned on for your bucket,
         * there is no way to undelete an object, so use caution when deleting objects.
         */
        System.out.println("Deleting an object\n");
        s3.deleteObject(bucketName, key);

        /*
         * Delete a bucket - A bucket must be completely empty before it can be
         * deleted, so remember to delete any objects from your buckets before
         * you try to delete them.
         */
        System.out.println("Deleting bucket " + bucketName + "\n");
        s3.deleteBucket(bucketName);
    } catch (AmazonServiceException ase) {
        System.out.println("Caught an AmazonServiceException, which means your request made it "
                + "to Amazon S3, but was rejected with an error response for some reason.");
        System.out.println("Error Message:    " + ase.getMessage());
        System.out.println("HTTP Status Code: " + ase.getStatusCode());
        System.out.println("AWS Error Code:   " + ase.getErrorCode());
        System.out.println("Error Type:       " + ase.getErrorType());
        System.out.println("Request ID:       " + ase.getRequestId());
    } catch (AmazonClientException ace) {
        System.out.println("Caught an AmazonClientException, which means the client encountered "
                + "a serious internal problem while trying to communicate with S3, "
                + "such as not being able to access the network.");
        System.out.println("Error Message: " + ace.getMessage());
    }
}

From source file:com.streamsets.pipeline.stage.origin.s3.AbstractAmazonS3Source.java

License:Apache License

private S3Offset fetchNextObjectFromSpooler(S3Offset s3Offset) throws StageException {
    setCurrentObject(null);//  w  w w. j a  v a2s  .co  m
    try {
        //The next object found in queue is mostly eligible since we process objects in chronological order.

        //However after processing a few files, if the configuration is changed [say relax the prefix] and an older file
        //gets selected for processing, it must be ignored.
        S3ObjectSummary nextAvailObj = null;
        do {
            if (nextAvailObj != null) {
                LOG.warn("Ignoring object '{}' in spool directory as is lesser than offset object '{}'",
                        nextAvailObj.getKey(), s3Offset.getKey());
            }
            nextAvailObj = spooler.poolForObject(s3Offset, s3ConfigBean.basicConfig.maxWaitTime,
                    TimeUnit.MILLISECONDS);
        } while (!isEligible(nextAvailObj, s3Offset));

        if (nextAvailObj == null) {
            // no object to process
            LOG.debug("No new object available in spool directory after '{}' secs, producing empty batch",
                    s3ConfigBean.basicConfig.maxWaitTime / 1000);
        } else {
            setCurrentObject(nextAvailObj);

            // if the current offset object is null or the object returned by the spooler is greater than the current offset
            // object we take the object returned by the spooler as the new object and set the offset to zero.
            // if not, it means the spooler returned us the current object, we just keep processing it from the last
            // offset we processed (known via offset tracking)
            if (s3Offset.getKey() == null
                    || isLaterThan(nextAvailObj.getKey(), nextAvailObj.getLastModified().getTime(),
                            s3Offset.getKey(), Long.parseLong(s3Offset.getTimestamp()))) {
                s3Offset = new S3Offset(getCurrentObject().getKey(), ZERO, getCurrentObject().getETag(),
                        String.valueOf(getCurrentObject().getLastModified().getTime()));
            }
        }
    } catch (InterruptedException ex) {
        // the spooler was interrupted while waiting for an object, we log and return, the pipeline agent will invoke us
        // again to wait for an object again
        LOG.warn("Pooling interrupted");
    } catch (AmazonClientException e) {
        throw new StageException(Errors.S3_SPOOLDIR_23, e.toString(), e);
    }
    return s3Offset;
}

From source file:com.streamsets.pipeline.stage.origin.s3.AmazonS3Runnable.java

License:Apache License

private S3Offset produce(S3Offset offset, int maxBatchSize, BatchContext batchContext)
        throws StageException, BadSpoolObjectException {
    BatchMaker batchMaker = batchContext.getBatchMaker();

    S3ObjectSummary s3Object;

    if (offset != null) {
        spooler.postProcessOlderObjectIfNeeded(offset);
    }//from w w w.j av a 2 s.c om

    //check if we have an object to produce records from. Otherwise get from spooler.
    if (needToFetchNextObjectFromSpooler(offset)) {
        updateGauge(Status.SPOOLING, null);
        offset = fetchNextObjectFromSpooler(offset, batchContext);
        LOG.debug("Object '{}' with offset '{}' fetched from Spooler", offset.getKey(), offset.getOffset());
    } else {
        //check if the current object was modified between batches
        LOG.debug("Checking if Object '{}' has been modified between batches", getCurrentObject().getKey());
        if (!getCurrentObject().getETag().equals(offset.geteTag())) {
            //send the current object to error archive and get next object from spooler
            LOG.debug("Object '{}' has been modified between batches. Sending the object to error",
                    getCurrentObject().getKey());
            try {
                spooler.handleCurrentObjectAsError();
            } catch (AmazonClientException e) {
                throw new StageException(Errors.S3_SPOOLDIR_24, e.toString(), e);
            }
            offset = fetchNextObjectFromSpooler(offset, batchContext);
        }
    }

    s3Object = getCurrentObject();
    if (s3Object != null) {
        amazonS3Source.updateOffset(runnerId, offset);
        try {
            if (parser == null) {
                String recordId = s3ConfigBean.s3Config.bucket + s3ConfigBean.s3Config.delimiter
                        + s3Object.getKey();
                if (dataParser.isWholeFileFormat()) {
                    handleWholeFileDataFormat(s3Object, recordId);
                } else {
                    //Get S3 object instead of stream because we want to call close on the object when we close the
                    // parser (and stream)
                    if (context.isPreview()) {
                        long fetchSize = s3Object.getSize() > S3Constants.DEFAULT_FETCH_SIZE
                                ? S3Constants.DEFAULT_FETCH_SIZE
                                : s3Object.getSize();
                        if (fetchSize > 0) {
                            object = AmazonS3Util.getObjectRange(s3Client, s3ConfigBean.s3Config.bucket,
                                    s3Object.getKey(), fetchSize, s3ConfigBean.sseConfig.useCustomerSSEKey,
                                    s3ConfigBean.sseConfig.customerKey, s3ConfigBean.sseConfig.customerKeyMd5);
                        } else {
                            LOG.warn("Size of object with key '{}' is 0", s3Object.getKey());
                            object = AmazonS3Util.getObject(s3Client, s3ConfigBean.s3Config.bucket,
                                    s3Object.getKey(), s3ConfigBean.sseConfig.useCustomerSSEKey,
                                    s3ConfigBean.sseConfig.customerKey, s3ConfigBean.sseConfig.customerKeyMd5);
                        }
                    } else {
                        object = AmazonS3Util.getObject(s3Client, s3ConfigBean.s3Config.bucket,
                                s3Object.getKey(), s3ConfigBean.sseConfig.useCustomerSSEKey,
                                s3ConfigBean.sseConfig.customerKey, s3ConfigBean.sseConfig.customerKeyMd5);
                    }
                    parser = dataParser.getParser(recordId, object.getObjectContent(), offset.getOffset());
                }
                sendLineageEvent(s3Object);
                //we don't use S3 GetObject range capabilities to skip the already process offset because the parsers cannot
                // pick up from a non root doc depth in the case of a single object with records.
            }
            int i = 0;
            updateGauge(Status.READING, offset.toString());
            while (i < maxBatchSize) {
                try {
                    Record record;
                    try {
                        record = parser.parse();
                    } catch (RecoverableDataParserException ex) {
                        // Propagate partially parsed record to error stream
                        record = ex.getUnparsedRecord();
                        setHeaders(record, object);
                        errorRecordHandler
                                .onError(new OnRecordErrorException(record, ex.getErrorCode(), ex.getParams()));
                        // We'll simply continue reading pass this recoverable error
                        continue;
                    }
                    if (record != null) {
                        setHeaders(record, object);
                        batchMaker.addRecord(record);
                        amazonS3Source.incrementNoMoreDataRecordCount();
                        i++;
                        offset.setOffset(parser.getOffset());
                    } else {
                        parser.close();
                        parser = null;
                        if (object != null) {
                            object.close();
                            object = null;
                        }
                        amazonS3Source.incrementNoMoreDataFileCount();
                        offset.setOffset(S3Constants.MINUS_ONE);
                        break;
                    }
                } catch (ObjectLengthException ex) {
                    errorRecordHandler.onError(Errors.S3_SPOOLDIR_02, s3Object.getKey(), offset.toString(), ex);
                    amazonS3Source.incrementNoMoreDataErrorCount();
                    offset.setOffset(S3Constants.MINUS_ONE);
                }
            }
        } catch (AmazonClientException e) {
            LOG.error("Error processing object with key '{}' offset '{}'", s3Object.getKey(), offset, e);
            throw new StageException(Errors.S3_SPOOLDIR_25, e.toString(), e);
        } catch (IOException | DataParserException ex) {
            if (!(ex.getCause() instanceof AbortedException)) {
                offset.setOffset(S3Constants.MINUS_ONE);
                String exOffset;
                if (ex instanceof OverrunException) {
                    exOffset = String.valueOf(((OverrunException) ex).getStreamOffset());
                } else {
                    try {
                        exOffset = (parser != null) ? parser.getOffset() : S3Constants.MINUS_ONE;
                    } catch (IOException ex1) {
                        LOG.warn("Could not get the object offset to report with error, reason: {}",
                                ex1.toString(), ex);
                        exOffset = S3Constants.MINUS_ONE;
                    }
                }
                offset.setOffset(exOffset);

                switch (context.getOnErrorRecord()) {
                case DISCARD:
                    break;
                case TO_ERROR:
                    // we failed to produce a record, which leaves the input file in an unknown state. all we can do here is
                    // throw an exception.
                    throw new BadSpoolObjectException(s3Object.getKey(), exOffset, ex);
                case STOP_PIPELINE:
                    context.reportError(Errors.S3_SPOOLDIR_03, s3Object.getKey(), exOffset, ex.toString(), ex);
                    throw new StageException(Errors.S3_SPOOLDIR_03, s3Object.getKey(), exOffset, ex.toString(),
                            ex);
                default:
                    throw new IllegalStateException(
                            Utils.format("Unknown OnError value '{}'", context.getOnErrorRecord(), ex));
                }
            }
        } finally {
            if (S3Constants.MINUS_ONE.equals(offset.getOffset())) {
                if (parser != null) {
                    try {
                        parser.close();
                        parser = null;
                    } catch (IOException ex) {
                        LOG.debug("Exception while closing parser : '{}'", ex.toString(), ex);
                    }
                }
                if (object != null) {
                    try {
                        object.close();
                        object = null;
                    } catch (IOException ex) {
                        LOG.debug("Exception while closing S3 object : '{}'", ex.toString(), ex);
                    }
                }
            }
        }
    }
    context.processBatch(batchContext);
    updateGauge(Status.BATCH_GENERATED, offset.toString());
    return offset;
}

From source file:com.streamsets.pipeline.stage.origin.s3.AmazonS3Runnable.java

License:Apache License

private void handleWholeFileDataFormat(S3ObjectSummary s3ObjectSummary, String recordId) throws StageException {
    S3Object partialS3ObjectForMetadata;
    //partialObject with fetchSize 1 byte.
    //This is mostly used for extracting metadata and such.
    partialS3ObjectForMetadata = AmazonS3Util.getObjectRange(s3Client, s3ConfigBean.s3Config.bucket,
            s3ObjectSummary.getKey(), 1, s3ConfigBean.sseConfig.useCustomerSSEKey,
            s3ConfigBean.sseConfig.customerKey, s3ConfigBean.sseConfig.customerKeyMd5);

    S3FileRef.Builder s3FileRefBuilder = new S3FileRef.Builder().s3Client(s3Client)
            .s3ObjectSummary(s3ObjectSummary).useSSE(s3ConfigBean.sseConfig.useCustomerSSEKey)
            .customerKey(s3ConfigBean.sseConfig.customerKey)
            .customerKeyMd5(s3ConfigBean.sseConfig.customerKeyMd5)
            .bufferSize((int) dataParser.suggestedWholeFileBufferSize()).createMetrics(true)
            .totalSizeInBytes(s3ObjectSummary.getSize()).rateLimit(dataParser.wholeFileRateLimit());

    if (dataParser.isWholeFileChecksumRequired()) {
        s3FileRefBuilder.verifyChecksum(true).checksumAlgorithm(HashingUtil.HashType.MD5)
                //128 bit hex encoded md5 checksum.
                .checksum(partialS3ObjectForMetadata.getObjectMetadata().getETag());
    }/*from  w ww.j  a  v  a 2  s .  co m*/
    Map<String, Object> metadata = AmazonS3Util.getMetaData(partialS3ObjectForMetadata);
    metadata.put(S3Constants.BUCKET, s3ObjectSummary.getBucketName());
    metadata.put(S3Constants.OBJECT_KEY, s3ObjectSummary.getKey());
    metadata.put(S3Constants.OWNER, s3ObjectSummary.getOwner());
    metadata.put(S3Constants.SIZE, s3ObjectSummary.getSize());
    metadata.put(HeaderAttributeConstants.FILE_NAME, s3ObjectSummary.getKey());

    metadata.remove(S3Constants.CONTENT_LENGTH);
    parser = dataParser.getParser(recordId, metadata, s3FileRefBuilder.build());
    //Object is assigned so that setHeaders() function can use this to get metadata
    //information about the object
    object = partialS3ObjectForMetadata;
}

From source file:com.streamsets.pipeline.stage.origin.s3.AmazonS3Runnable.java

License:Apache License

private S3Offset fetchNextObjectFromSpooler(S3Offset s3Offset, BatchContext batchContext)
        throws StageException {
    setCurrentObject(null);//w ww.  java 2 s .  c om
    try {
        //The next object found in queue is mostly eligible since we process objects in chronological order.

        //However after processing a few files, if the configuration is changed [say relax the prefix] and an older file
        //gets selected for processing, it must be ignored.
        S3ObjectSummary nextAvailObj = null;
        do {
            if (nextAvailObj != null) {
                LOG.warn("Ignoring object '{}' in spool directory as is lesser than offset object '{}'",
                        nextAvailObj.getKey(), s3Offset.getKey());
            }
            nextAvailObj = spooler.poolForObject(amazonS3Source, s3ConfigBean.basicConfig.maxWaitTime,
                    TimeUnit.MILLISECONDS, batchContext);
        } while (!isEligible(nextAvailObj, s3Offset));

        if (nextAvailObj == null) {
            // no object to process
            LOG.debug("No new object available in spool queue after '{}' secs, producing empty batch",
                    s3ConfigBean.basicConfig.maxWaitTime / 1000);
        } else {
            setCurrentObject(nextAvailObj);

            // if the current offset object is null or the object returned by the spooler is greater than the current offset
            // object we take the object returned by the spooler as the new object and set the offset to zero.
            // if not, it means the spooler returned us the current object, we just keep processing it from the last
            // offset we processed (known via offset tracking)
            if (s3Offset.getKey() == null || s3Offset.getKey().equals(S3Constants.EMPTY)
                    || isLaterThan(nextAvailObj.getKey(), nextAvailObj.getLastModified().getTime(),
                            s3Offset.getKey(), Long.parseLong(s3Offset.getTimestamp()))) {
                s3Offset = new S3Offset(getCurrentObject().getKey(), S3Constants.ZERO,
                        getCurrentObject().getETag(),
                        String.valueOf(getCurrentObject().getLastModified().getTime()));
            }
        }
    } catch (InterruptedException ex) {
        // the spooler was interrupted while waiting for an object, we log and return, the pipeline agent will invoke us
        // again to wait for an object again
        LOG.warn("Pooling interrupted");
    } catch (AmazonClientException e) {
        throw new StageException(Errors.S3_SPOOLDIR_23, e.toString(), e);
    }
    return s3Offset;
}

From source file:com.streamsets.pipeline.stage.origin.s3.AmazonS3Runnable.java

License:Apache License

private boolean isEligible(S3ObjectSummary nextAvailObj, S3Offset s3Offset) {

    ObjectOrdering objectOrdering = s3ConfigBean.s3FileConfig.objectOrdering;
    switch (objectOrdering) {
    case TIMESTAMP:
        return nextAvailObj == null || s3Offset == null
                || nextAvailObj.getLastModified().getTime() >= Long.parseLong(s3Offset.getTimestamp());
    case LEXICOGRAPHICAL:
        return nextAvailObj == null || s3Offset == null || s3Offset.getKey() == null
                || s3Offset.getKey().equals(S3Constants.EMPTY)
                || nextAvailObj.getKey().compareTo(s3Offset.getKey()) > 0;
    default://from   w  w  w  .j a va 2s . co  m
        throw new IllegalArgumentException("Unknown ordering: " + objectOrdering.getLabel());
    }
}