Example usage for com.amazonaws.services.s3.model S3ObjectSummary getKey

Introduction

In this page you can find the example usage for com.amazonaws.services.s3.model S3ObjectSummary getKey.

Prototype

public String getKey()

Source Link

Document

Gets the key under which this object is stored in Amazon S3.

Usage

From source file:com.streamsets.pipeline.stage.origin.s3.AmazonS3Runnable.java

License:Apache License

private void sendLineageEvent(S3ObjectSummary s3Object) {
    LineageEvent event = context.createLineageEvent(LineageEventType.ENTITY_READ);
    event.setSpecificAttribute(LineageSpecificAttribute.ENDPOINT_TYPE, EndPointType.S3.name());
    event.setSpecificAttribute(LineageSpecificAttribute.ENTITY_NAME, s3Object.getKey());
    event.setSpecificAttribute(LineageSpecificAttribute.DESCRIPTION, s3ConfigBean.s3Config.bucket);

    context.publishLineageEvent(event);//  ww w  . j  a v a 2 s . c  o m
}

From source file:com.streamsets.pipeline.stage.origin.s3.AmazonS3Source.java

License:Apache License

private S3Offset fetchNextObjectFromSpooler(S3Offset s3Offset) throws StageException {
    setCurrentObject(null);//from  w  w w . ja va  2s . co m
    try {
        //The next object found in queue is mostly eligible since we process objects in chronological order.

        //However after processing a few files, if the configuration is changed [say relax the prefix] and an older file
        //gets selected for processing, it must be ignored.
        S3ObjectSummary nextAvailObj = null;
        do {
            if (nextAvailObj != null) {
                LOG.warn("Ignoring object '{}' in spool directory as is lesser than offset object '{}'",
                        nextAvailObj.getKey(), s3Offset.getKey());
            }
            nextAvailObj = spooler.poolForObject(s3Offset, s3ConfigBean.basicConfig.maxWaitTime,
                    TimeUnit.MILLISECONDS);
        } while (!isEligible(nextAvailObj, s3Offset));

        if (nextAvailObj == null) {
            // no object to process
            LOG.debug("No new object available in spool directory after '{}' secs, producing empty batch",
                    s3ConfigBean.basicConfig.maxWaitTime / 1000);
        } else {
            setCurrentObject(nextAvailObj);

            // if the current offset object is null or the object returned by the spooler is greater than the current offset
            // object we take the object returned by the spooler as the new object and set the offset to zero.
            // if not, it means the spooler returned us the current object, we just keep processing it from the last
            // offset we processed (known via offset tracking)
            if (s3Offset.getKey() == null
                    || isLaterThan(nextAvailObj.getKey(), nextAvailObj.getLastModified().getTime(),
                            s3Offset.getKey(), Long.parseLong(s3Offset.getTimestamp()))) {
                s3Offset = new S3Offset(getCurrentObject().getKey(), ZERO, getCurrentObject().getETag(),
                        String.valueOf(getCurrentObject().getLastModified().getTime()));
            }
        }
    } catch (InterruptedException ex) {
        // the spooler was interrupted while waiting for an object, we log and return, the pipeline agent will invoke us
        // again to wait for an object again
        LOG.warn("Pooling interrupted");
    } catch (AmazonClientException e) {
        throw new StageException(Errors.S3_SPOOLDIR_23, e.toString());
    }
    return s3Offset;
}

From source file:com.streamsets.pipeline.stage.origin.s3.AmazonS3Source.java

License:Apache License

public String produce(S3ObjectSummary s3Object, String offset, int maxBatchSize, BatchMaker batchMaker)
        throws StageException, BadSpoolObjectException {
    try {//from   w w w. j  av a 2s  .  com
        if (parser == null) {
            //Get S3 object instead of stream because we want to call close on the object when we close the
            // parser (and stream)
            if (getContext().isPreview()) {
                long fetchSize = s3Object.getSize() > DEFAULT_FETCH_SIZE ? DEFAULT_FETCH_SIZE
                        : s3Object.getSize();
                if (fetchSize > 0) {
                    object = AmazonS3Util.getObjectRange(s3ConfigBean.s3Config.getS3Client(),
                            s3ConfigBean.s3Config.bucket, s3Object.getKey(), fetchSize);
                } else {
                    LOG.warn("Size of object with key '{}' is 0", s3Object.getKey());
                    object = AmazonS3Util.getObject(s3ConfigBean.s3Config.getS3Client(),
                            s3ConfigBean.s3Config.bucket, s3Object.getKey());
                }
            } else {
                object = AmazonS3Util.getObject(s3ConfigBean.s3Config.getS3Client(),
                        s3ConfigBean.s3Config.bucket, s3Object.getKey());
            }
            String recordId = s3ConfigBean.s3Config.bucket + s3ConfigBean.s3Config.delimiter
                    + s3Object.getKey();
            parser = s3ConfigBean.dataFormatConfig.getParserFactory().getParser(recordId,
                    object.getObjectContent(), Long.parseLong(offset));
            //we don't use S3 GetObject range capabilities to skip the already process offset because the parsers cannot
            // pick up from a non root doc depth in the case of a single object with records.
        }
        for (int i = 0; i < maxBatchSize; i++) {
            try {
                Record record = parser.parse();
                if (record != null) {
                    batchMaker.addRecord(record);
                    offset = parser.getOffset();
                } else {
                    parser.close();
                    parser = null;
                    object.close();
                    object = null;
                    offset = MINUS_ONE;
                    break;
                }
            } catch (ObjectLengthException ex) {
                String exOffset = offset;
                offset = MINUS_ONE;
                switch (getContext().getOnErrorRecord()) {
                case DISCARD:
                    break;
                case TO_ERROR:
                    getContext().reportError(Errors.S3_SPOOLDIR_02, s3Object.getKey(), exOffset);
                    break;
                case STOP_PIPELINE:
                    throw new StageException(Errors.S3_SPOOLDIR_02, s3Object.getKey(), exOffset);
                default:
                    throw new IllegalStateException(Utils.format("It should never happen. OnError '{}'",
                            getContext().getOnErrorRecord(), ex));
                }
            }
        }
    } catch (AmazonClientException e) {
        LOG.error("Error processing object with key '{}' offset '{}'", s3Object.getKey(), offset);
        throw new StageException(Errors.S3_SPOOLDIR_25, e.toString());
    } catch (IOException | DataParserException ex) {
        if (ex.getCause() instanceof AbortedException) {
            //If the pipeline was stopped, the amazon s3 client thread catches the interrupt and throws aborted exception
            //do not treat this as an error. Instead produce what ever you have and move one.

        } else {
            offset = MINUS_ONE;
            String exOffset;
            if (ex instanceof OverrunException) {
                exOffset = String.valueOf(((OverrunException) ex).getStreamOffset());
            } else {
                try {
                    exOffset = (parser != null) ? parser.getOffset() : MINUS_ONE;
                } catch (IOException ex1) {
                    LOG.warn("Could not get the object offset to report with error, reason: {}", ex1.toString(),
                            ex);
                    exOffset = MINUS_ONE;
                }
            }

            switch (getContext().getOnErrorRecord()) {
            case DISCARD:
                break;
            case TO_ERROR:
                throw new BadSpoolObjectException(s3Object.getKey(), exOffset, ex);
            case STOP_PIPELINE:
                getContext().reportError(Errors.S3_SPOOLDIR_03, s3Object.getKey(), exOffset, ex.toString());
                throw new StageException(Errors.S3_SPOOLDIR_03, s3Object.getKey(), exOffset, ex.toString(), ex);
            default:
                throw new IllegalStateException(Utils.format("It should never happen. OnError '{}'",
                        getContext().getOnErrorRecord(), ex));
            }
        }
    } finally {
        if (MINUS_ONE.equals(offset)) {
            if (parser != null) {
                try {
                    parser.close();
                    parser = null;
                } catch (IOException ex) {
                    LOG.debug("Exception while closing parser : '{}'", ex.toString());
                }
            }
            if (object != null) {
                try {
                    object.close();
                    object = null;
                } catch (IOException ex) {
                    LOG.debug("Exception while closing S3 object : '{}'", ex.toString());
                }
            }
        }
    }
    return offset;
}

From source file:com.streamsets.pipeline.stage.origin.s3.AmazonS3Util.java

License:Apache License

/**
 * Lists objects from AmazonS3 in chronological order [lexicographical order if 2 files have same timestamp] which are
 * later than or equal to the timestamp of the previous offset object
 *
 * @param s3Client//from   w  ww.  ja v a2s  .c o  m
 * @param s3ConfigBean
 * @param pathMatcher glob patterns to match file name against
 * @param s3Offset current offset which provides the timestamp of the previous object
 * @param fetchSize number of objects to fetch in one go
 * @return
 * @throws AmazonClientException
 */
static List<S3ObjectSummary> listObjectsChronologically(AmazonS3Client s3Client, S3ConfigBean s3ConfigBean,
        PathMatcher pathMatcher, AmazonS3Source.S3Offset s3Offset, int fetchSize) throws AmazonClientException {

    //Algorithm:
    // - Full scan all objects that match the file name pattern and which are later than the file in the offset
    // - Select the oldest "fetchSize" number of files and return them.
    TreeSet<S3ObjectSummary> treeSet = new TreeSet<>(new Comparator<S3ObjectSummary>() {
        @Override
        public int compare(S3ObjectSummary o1, S3ObjectSummary o2) {
            int result = o1.getLastModified().compareTo(o2.getLastModified());
            if (result != 0) {
                //same modified time. Use name to sort
                return result;
            }
            return o1.getKey().compareTo(o2.getKey());
        }
    });

    S3Objects s3ObjectSummaries = S3Objects
            .withPrefix(s3Client, s3ConfigBean.s3Config.bucket, s3ConfigBean.s3Config.folder)
            .withBatchSize(BATCH_SIZE);
    for (S3ObjectSummary s : s3ObjectSummaries) {
        String fileName = s.getKey().substring(s3ConfigBean.s3Config.folder.length(), s.getKey().length());
        if (!fileName.isEmpty()) {
            //fileName can be empty.
            //If the user manually creates a folder "myFolder/mySubFolder" in bucket "myBucket" and uploads "myObject",
            // then the first objects returned here are:
            // myFolder/mySubFolder
            // myFolder/mySubFolder/myObject
            //
            // All is good when pipeline is run but preview returns with no data. So we should ignore the empty file as it
            // has no data
            if (pathMatcher.matches(Paths.get(fileName)) && isEligible(s, s3Offset)) {
                treeSet.add(s);
            }
            if (treeSet.size() > fetchSize) {
                treeSet.pollLast();
            }
        }
    }

    return new ArrayList<>(treeSet);
}

From source file:com.streamsets.pipeline.stage.origin.s3.AmazonS3Util.java

License:Apache License

private static boolean isEligible(S3ObjectSummary s, AmazonS3Source.S3Offset s3Offset) {

    //The object is eligible if
    //1. The timestamp is greater than that of the current object in offset
    //2. The timestamp is same but the name is lexicographically greater than the current object [can happen when multiple objects are uploaded in one go]
    //3. Same timestamp, same name [same as the current object in offset], eligible if it was not completely processed [offset != -1]

    boolean isEligible = false;
    if (s.getLastModified().compareTo(new Date(Long.parseLong(s3Offset.getTimestamp()))) > 0) {
        isEligible = true;/*ww  w  .  ja  v  a 2 s .  co  m*/
    } else if (s.getLastModified().compareTo(new Date(Long.parseLong(s3Offset.getTimestamp()))) == 0) {
        //same timestamp
        //compare names
        if (s.getKey().compareTo(s3Offset.getKey()) > 0) {
            isEligible = true;
        } else if (s.getKey().compareTo(s3Offset.getKey()) == 0) {
            //same time stamp, same name
            //If the current offset is not -1, return the file. It means the previous file was partially processed.
            if (Long.parseLong(s3Offset.getOffset()) != -1) {
                isEligible = true;
            }
        }
    }
    return isEligible;
}

From source file:com.streamsets.pipeline.stage.origin.s3.AmazonS3Util.java

License:Apache License

static S3ObjectSummary getObjectSummary(AmazonS3Client s3Client, String bucket, String objectKey) {
    S3ObjectSummary s3ObjectSummary = null;
    S3Objects s3ObjectSummaries = S3Objects.withPrefix(s3Client, bucket, objectKey);
    for (S3ObjectSummary s : s3ObjectSummaries) {
        if (s.getKey().equals(objectKey)) {
            s3ObjectSummary = s;/*from w  w w  .j  a  v  a2s  . c  o m*/
            break;
        }
    }
    return s3ObjectSummary;
}

From source file:com.streamsets.pipeline.stage.origin.s3.S3Spooler.java

License:Apache License

void addObjectToQueue(S3ObjectSummary objectSummary, boolean checkCurrent) {
    Preconditions.checkNotNull(objectSummary, "file cannot be null");
    if (checkCurrent) {
        Preconditions.checkState(currentObject == null
                || currentObject.getLastModified().compareTo(objectSummary.getLastModified()) < 0);
    }//from  w  w  w  .  j a  v a2  s  . c om
    if (!objectQueue.contains(objectSummary)) {
        if (objectQueue.size() >= MAX_SPOOL_SIZE) {
            LOG.warn("Exceeded '{}' of queued files", objectQueue.size());
        }
        objectQueue.add(objectSummary);
        spoolQueueMeter.mark(objectQueue.size());
    } else {
        LOG.warn("Object '{}' already in queue, ignoring", objectSummary.getKey());
    }
}

From source file:com.supprema.utils.S3Sample.java

License:Open Source License

public static void main(String[] args) throws IOException {

    /*/*  www .  j av  a 2 s  .co m*/
     * The ProfileCredentialsProvider will return your [fabiano-user-s3]
     * credential profile by reading from the credentials file located at
     * (/Users/fabianorodriguesmatias/.aws/credentials).
     */
    AWSCredentials credentials = null;
    try {
        credentials = new ProfileCredentialsProvider("fabiano-user-s3").getCredentials();
    } catch (Exception e) {
        throw new AmazonClientException(
                "Cannot load the credentials from the credential profiles file. "
                        + "Please make sure that your credentials file is at the correct "
                        + "location (/Users/fabianorodriguesmatias/.aws/credentials), and is in valid format.",
                e);
    }

    AmazonS3 s3 = new AmazonS3Client(credentials);
    Region usWest2 = Region.getRegion(Regions.US_WEST_2);
    s3.setRegion(usWest2);

    String bucketName = "my-first-s3-bucket-" + UUID.randomUUID();
    String key = "MyObjectKey";

    System.out.println("===========================================");
    System.out.println("Getting Started with Amazon S3");
    System.out.println("===========================================\n");

    try {
        /*
         * Create a new S3 bucket - Amazon S3 bucket names are globally unique,
         * so once a bucket name has been taken by any user, you can't create
         * another bucket with that same name.
         *
         * You can optionally specify a location for your bucket if you want to
         * keep your data closer to your applications or users.
         */
        System.out.println("Creating bucket " + bucketName + "\n");
        s3.createBucket(bucketName);

        /*
         * List the buckets in your account
         */
        System.out.println("Listing buckets");
        for (Bucket bucket : s3.listBuckets()) {
            System.out.println(" - " + bucket.getName());
        }
        System.out.println();

        /*
         * Upload an object to your bucket - You can easily upload a file to
         * S3, or upload directly an InputStream if you know the length of
         * the data in the stream. You can also specify your own metadata
         * when uploading to S3, which allows you set a variety of options
         * like content-type and content-encoding, plus additional metadata
         * specific to your applications.
         */
        System.out.println("Uploading a new object to S3 from a file\n");
        s3.putObject(new PutObjectRequest(bucketName, key, createSampleFile()));

        /*
         * Download an object - When you download an object, you get all of
         * the object's metadata and a stream from which to read the contents.
         * It's important to read the contents of the stream as quickly as
         * possibly since the data is streamed directly from Amazon S3 and your
         * network connection will remain open until you read all the data or
         * close the input stream.
         *
         * GetObjectRequest also supports several other options, including
         * conditional downloading of objects based on modification times,
         * ETags, and selectively downloading a range of an object.
         */
        System.out.println("Downloading an object");
        S3Object object = s3.getObject(new GetObjectRequest(bucketName, key));
        System.out.println("Content-Type: " + object.getObjectMetadata().getContentType());
        displayTextInputStream(object.getObjectContent());

        /*
         * List objects in your bucket by prefix - There are many options for
         * listing the objects in your bucket.  Keep in mind that buckets with
         * many objects might truncate their results when listing their objects,
         * so be sure to check if the returned object listing is truncated, and
         * use the AmazonS3.listNextBatchOfObjects(...) operation to retrieve
         * additional results.
         */
        System.out.println("Listing objects");
        ObjectListing objectListing = s3
                .listObjects(new ListObjectsRequest().withBucketName(bucketName).withPrefix("My"));
        for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) {
            System.out.println(
                    " - " + objectSummary.getKey() + "  " + "(size = " + objectSummary.getSize() + ")");
        }
        System.out.println();

        /*
         * Delete an object - Unless versioning has been turned on for your bucket,
         * there is no way to undelete an object, so use caution when deleting objects.
         */
        System.out.println("Deleting an object\n");
        s3.deleteObject(bucketName, key);

        /*
         * Delete a bucket - A bucket must be completely empty before it can be
         * deleted, so remember to delete any objects from your buckets before
         * you try to delete them.
         */
        System.out.println("Deleting bucket " + bucketName + "\n");
        s3.deleteBucket(bucketName);
    } catch (AmazonServiceException ase) {
        System.out.println("Caught an AmazonServiceException, which means your request made it "
                + "to Amazon S3, but was rejected with an error response for some reason.");
        System.out.println("Error Message:    " + ase.getMessage());
        System.out.println("HTTP Status Code: " + ase.getStatusCode());
        System.out.println("AWS Error Code:   " + ase.getErrorCode());
        System.out.println("Error Type:       " + ase.getErrorType());
        System.out.println("Request ID:       " + ase.getRequestId());
    } catch (AmazonClientException ace) {
        System.out.println("Caught an AmazonClientException, which means the client encountered "
                + "a serious internal problem while trying to communicate with S3, "
                + "such as not being able to access the network.");
        System.out.println("Error Message: " + ace.getMessage());
    }
}

From source file:com.tfnsnproject.util.S3StorageManager.java

License:Open Source License

/**
 * Deletes the specified S3 object from the S3 storage service.  If a
 * storage path is passed in that has child S3 objects, it will recursively
 * delete the underlying objects./*from w  ww.j  a v a2s  .c  o m*/
 *
 * @param s3Store the s3 object to be deleted
 */
public void delete(S3StorageObject s3Store) {

    if (s3Store.getStoragePath() == null || s3Store.getStoragePath().equals("")) {
        logger.log(Level.WARNING, "Empty storage path passed to delete method");
        return; // We don't want to delete everything in a path
    }

    // Go through the store structure and delete child objects
    ObjectListing listing = s3client.listObjects(s3Store.getBucketName(), s3Store.getStoragePath());
    while (true) {
        List<S3ObjectSummary> objectList = listing.getObjectSummaries();
        for (S3ObjectSummary summary : objectList) {
            s3client.deleteObject(s3Store.getBucketName(), summary.getKey());
        }
        if (listing.isTruncated()) {
            listing = s3client.listNextBatchOfObjects(listing);
        } else {
            break;
        }
    }

}

From source file:com.topera.epoch.service.S3Util.java

License:Open Source License

public static void main(String[] args) throws IOException {
    /*//w  ww  .ja va  2  s. co  m
     * Create your credentials file at ~/.aws/credentials (C:\Users\USER_NAME\.aws\credentials for Windows users) 
     * and save the following lines after replacing the underlined values with your own.
     *
     * [default]
     * aws_access_key_id = YOUR_ACCESS_KEY_ID
     * aws_secret_access_key = YOUR_SECRET_ACCESS_KEY
     */
    AWSCredentials creds = new AWSCredentials() {

        public String getAWSSecretKey() {
            // TODO Auto-generated method stub
            return "5VVtmI7vcecuVbw8JsG4uo2O1/9RwwLHrTT01Itz";
        }

        public String getAWSAccessKeyId() {
            // TODO Auto-generated method stub
            return "AKIAJCMYALI46A2DIPRQ";
        }
    };

    AmazonS3 s3 = new AmazonS3Client(creds);
    Region usWest2 = Region.getRegion(Regions.US_WEST_2);
    s3.setRegion(usWest2);

    String bucketName = "my-first-s3-bucket-" + UUID.randomUUID();
    String key = "MyObjectKey";

    System.out.println("===========================================");
    System.out.println("Getting Started with Amazon S3");
    System.out.println("===========================================\n");

    try {
        /*
         * Create a new S3 bucket - Amazon S3 bucket names are globally unique,
         * so once a bucket name has been taken by any user, you can't create
         * another bucket with that same name.
         *
         * You can optionally specify a location for your bucket if you want to
         * keep your data closer to your applications or users.
         */
        System.out.println("Creating bucket " + bucketName + "\n");
        s3.createBucket(bucketName);

        /*
         * List the buckets in your account
         */
        System.out.println("Listing buckets");
        for (Bucket bucket : s3.listBuckets()) {
            System.out.println(" - " + bucket.getName());
        }
        System.out.println();

        /*
         * Upload an object to your bucket - You can easily upload a file to
         * S3, or upload directly an InputStream if you know the length of
         * the data in the stream. You can also specify your own metadata
         * when uploading to S3, which allows you set a variety of options
         * like content-type and content-encoding, plus additional metadata
         * specific to your applications.
         */
        System.out.println("Uploading a new object to S3 from a file\n");
        s3.putObject(new PutObjectRequest(bucketName, key, createSampleFile()));

        /*
         * Download an object - When you download an object, you get all of
         * the object's metadata and a stream from which to read the contents.
         * It's important to read the contents of the stream as quickly as
         * possibly since the data is streamed directly from Amazon S3 and your
         * network connection will remain open until you read all the data or
         * close the input stream.
         *
         * GetObjectRequest also supports several other options, including
         * conditional downloading of objects based on modification times,
         * ETags, and selectively downloading a range of an object.
         */
        System.out.println("Downloading an object");
        S3Object object = s3.getObject(new GetObjectRequest(bucketName, key));
        System.out.println("Content-Type: " + object.getObjectMetadata().getContentType());
        displayTextInputStream(object.getObjectContent());

        /*
         * List objects in your bucket by prefix - There are many options for
         * listing the objects in your bucket.  Keep in mind that buckets with
         * many objects might truncate their results when listing their objects,
         * so be sure to check if the returned object listing is truncated, and
         * use the AmazonS3.listNextBatchOfObjects(...) operation to retrieve
         * additional results.
         */
        System.out.println("Listing objects");
        ObjectListing objectListing = s3
                .listObjects(new ListObjectsRequest().withBucketName(bucketName).withPrefix("My"));
        for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) {
            System.out.println(
                    " - " + objectSummary.getKey() + "  " + "(size = " + objectSummary.getSize() + ")");
        }
        System.out.println();

        /*
         * Delete an object - Unless versioning has been turned on for your bucket,
         * there is no way to undelete an object, so use caution when deleting objects.
         */
        System.out.println("Deleting an object\n");
        s3.deleteObject(bucketName, key);

        /*
         * Delete a bucket - A bucket must be completely empty before it can be
         * deleted, so remember to delete any objects from your buckets before
         * you try to delete them.
         */
        System.out.println("Deleting bucket " + bucketName + "\n");
        s3.deleteBucket(bucketName);
    } catch (AmazonServiceException ase) {
        System.out.println("Caught an AmazonServiceException, which means your request made it "
                + "to Amazon S3, but was rejected with an error response for some reason.");
        System.out.println("Error Message:    " + ase.getMessage());
        System.out.println("HTTP Status Code: " + ase.getStatusCode());
        System.out.println("AWS Error Code:   " + ase.getErrorCode());
        System.out.println("Error Type:       " + ase.getErrorType());
        System.out.println("Request ID:       " + ase.getRequestId());
    } catch (AmazonClientException ace) {
        System.out.println("Caught an AmazonClientException, which means the client encountered "
                + "a serious internal problem while trying to communicate with S3, "
                + "such as not being able to access the network.");
        System.out.println("Error Message: " + ace.getMessage());
    }
}