Example usage for com.amazonaws.services.s3.model S3ObjectSummary getSize

List of usage examples for com.amazonaws.services.s3.model S3ObjectSummary getSize

Introduction

In this page you can find the example usage for com.amazonaws.services.s3.model S3ObjectSummary getSize.

Prototype

public long getSize() 

Source Link

Document

Gets the size of this object in bytes.

Usage

From source file:com.mesosphere.dcos.cassandra.executor.backup.S3StorageDriver.java

License:Apache License

private static Map<String, Long> listSnapshotFiles(AmazonS3Client amazonS3Client, String bucketName,
        String backupName) {//  w  ww. j av a  2 s.co  m
    Map<String, Long> snapshotFiles = new HashMap<>();
    final ListObjectsV2Request req = new ListObjectsV2Request().withBucketName(bucketName)
            .withPrefix(backupName);
    ListObjectsV2Result result;
    do {
        result = amazonS3Client.listObjectsV2(req);
        for (S3ObjectSummary objectSummary : result.getObjectSummaries()) {
            snapshotFiles.put(objectSummary.getKey(), objectSummary.getSize());
        }
        req.setContinuationToken(result.getNextContinuationToken());
    } while (result.isTruncated());

    return snapshotFiles;
}

From source file:com.mycompany.mytubeaws.ListServlet.java

/**
 * Handles the HTTP <code>POST</code> method.
 *
 * @param request servlet request//from w  w  w  .  j  a  va2 s . c  om
 * @param response servlet response
 * @throws ServletException if a servlet-specific error occurs
 * @throws IOException if an I/O error occurs
 */
@Override
protected void doPost(HttpServletRequest request, HttpServletResponse response)
        throws ServletException, IOException {
    ArrayList<String> nameList = new ArrayList<>();
    ArrayList<String> sizeList = new ArrayList<>();
    ArrayList<String> dateList = new ArrayList<>();

    ObjectListing objects = s3.listObjects(bucketName);
    do {
        for (S3ObjectSummary objectSummary : objects.getObjectSummaries()) {
            nameList.add(objectSummary.getKey());
            sizeList.add(Long.toString(objectSummary.getSize()));
            dateList.add(StringUtils.fromDate(objectSummary.getLastModified()));
        }
        objects = s3.listNextBatchOfObjects(objects);
    } while (objects.isTruncated());

    request.setAttribute("nameList", nameList);
    request.setAttribute("sizeList", sizeList);
    request.setAttribute("dateList", dateList);
    request.getRequestDispatcher("/UploadResult.jsp").forward(request, response);
}

From source file:com.pinterest.terrapin.TerrapinUtil.java

License:Apache License

static public List<Pair<Path, Long>> getS3FileList(AWSCredentials credentials, String s3Bucket,
        String s3KeyPrefix) {//from   w  w w .  j  a v a2  s .  co  m
    List<Pair<Path, Long>> fileSizePairList = Lists.newArrayListWithCapacity(Constants.MAX_ALLOWED_SHARDS);
    AmazonS3Client s3Client = new AmazonS3Client(credentials);
    // List files and build the path using the s3n: prefix.
    // Note that keys > marker are retrieved where the > is by lexicographic order.
    String prefix = s3KeyPrefix;
    String marker = prefix;
    while (true) {
        boolean reachedEnd = false;
        ObjectListing listing = s3Client
                .listObjects(new ListObjectsRequest().withBucketName(s3Bucket).withMarker(marker));
        List<S3ObjectSummary> summaries = listing.getObjectSummaries();

        if (summaries.isEmpty()) {
            break;
        }

        for (S3ObjectSummary summary : summaries) {
            if (summary.getKey().startsWith(prefix)) {
                fileSizePairList.add(new ImmutablePair(new Path("s3n", s3Bucket, "/" + summary.getKey()),
                        summary.getSize()));
                if (fileSizePairList.size() > Constants.MAX_ALLOWED_SHARDS) {
                    throw new RuntimeException("Too many files " + fileSizePairList.size());
                }
            } else {
                // We found a key which does not match the prefix, stop.
                reachedEnd = true;
                break;
            }
        }
        if (reachedEnd) {
            break;
        }
        marker = summaries.get(summaries.size() - 1).getKey();
    }
    return fileSizePairList;
}

From source file:com.proofpoint.event.collector.combiner.S3StorageSystem.java

License:Apache License

@Override
public List<StoredObject> listObjects(URI storageArea) {
    S3StorageHelper.checkValidS3Uri(storageArea);

    String s3Path = getS3ObjectKey(storageArea);
    Iterator<S3ObjectSummary> iter = new S3ObjectListing(s3Service,
            new ListObjectsRequest(getS3Bucket(storageArea), s3Path, null, "/", null)).iterator();

    ImmutableList.Builder<StoredObject> builder = ImmutableList.builder();
    while (iter.hasNext()) {
        S3ObjectSummary summary = iter.next();
        builder.add(new StoredObject(buildS3Location(storageArea, summary.getKey().substring(s3Path.length())),
                summary.getETag(), summary.getSize(), summary.getLastModified().getTime()));
    }//from   w w w . j  av a  2  s  . c o  m
    return builder.build();
}

From source file:com.sjsu.backitup.AwsConsoleApp.java

License:Open Source License

public static void main(String[] args) throws Exception {

    System.out.println("===========================================");
    System.out.println("Welcome to the AWS Java SDK!");
    System.out.println("===========================================");

    init();/*  ww  w .  ja v a2s .  com*/

    /*
     * Amazon EC2
     *
     * The AWS EC2 client allows you to create, delete, and administer
     * instances programmatically.
     *
     * In this sample, we use an EC2 client to get a list of all the
     * availability zones, and all instances sorted by reservation id.
     */
    try {
    } catch (AmazonServiceException ase) {
        System.out.println("Caught Exception: " + ase.getMessage());
        System.out.println("Reponse Status Code: " + ase.getStatusCode());
        System.out.println("Error Code: " + ase.getErrorCode());
        System.out.println("Request ID: " + ase.getRequestId());
    }

    /*
     * Amazon SimpleDB
     *
     * The AWS SimpleDB client allows you to query and manage your data
     * stored in SimpleDB domains (similar to tables in a relational DB).
     *
     * In this sample, we use a SimpleDB client to iterate over all the
     * domains owned by the current user, and add up the number of items
     * (similar to rows of data in a relational DB) in each domain.
     */

    /*
     * Amazon S3
     *
     * The AWS S3 client allows you to manage buckets and programmatically
     * put and get objects to those buckets.
     *
     * In this sample, we use an S3 client to iterate over all the buckets
     * owned by the current user, and all the object metadata in each
     * bucket, to obtain a total object and space usage count. This is done
     * without ever actually downloading a single object -- the requests
     * work with object metadata only.
     */
    try {
        List<Bucket> buckets = s3.listBuckets();

        long totalSize = 0;
        int totalItems = 0;
        for (Bucket bucket : buckets) {
            /*
             * In order to save bandwidth, an S3 object listing does not
             * contain every object in the bucket; after a certain point the
             * S3ObjectListing is truncated, and further pages must be
             * obtained with the AmazonS3Client.listNextBatchOfObjects()
             * method.
             */
            ObjectListing objects = s3.listObjects(bucket.getName());
            do {
                for (S3ObjectSummary objectSummary : objects.getObjectSummaries()) {
                    totalSize += objectSummary.getSize();
                    totalItems++;
                }
                objects = s3.listNextBatchOfObjects(objects);
            } while (objects.isTruncated());
        }

        System.out.println("You have " + buckets.size() + " Amazon S3 bucket(s), " + "containing " + totalItems
                + " objects with a total size of " + totalSize + " bytes.");
    } catch (AmazonServiceException ase) {
        /*
         * AmazonServiceExceptions represent an error response from an AWS
         * services, i.e. your request made it to AWS, but the AWS service
         * either found it invalid or encountered an error trying to execute
         * it.
         */
        System.out.println("Error Message:    " + ase.getMessage());
        System.out.println("HTTP Status Code: " + ase.getStatusCode());
        System.out.println("AWS Error Code:   " + ase.getErrorCode());
        System.out.println("Error Type:       " + ase.getErrorType());
        System.out.println("Request ID:       " + ase.getRequestId());
    } catch (AmazonClientException ace) {
        /*
         * AmazonClientExceptions represent an error that occurred inside
         * the client on the local host, either while trying to send the
         * request to AWS or interpret the response. For example, if no
         * network connection is available, the client won't be able to
         * connect to AWS to execute a request and will throw an
         * AmazonClientException.
         */
        System.out.println("Error Message: " + ace.getMessage());
    }
}

From source file:com.sjsu.faceit.example.S3Sample.java

License:Open Source License

public static void main(String[] args) throws IOException {
    /*//from  w w  w.j  av  a  2  s .c o m
     * Important: Be sure to fill in your AWS access credentials in the
     *            AwsCredentials.properties file before you try to run this
     *            sample.
     * http://aws.amazon.com/security-credentials
     */
    System.out.println(new File(".").getAbsolutePath());
    AmazonS3 s3 = new AmazonS3Client(
            new PropertiesCredentials(S3Sample.class.getResourceAsStream("AwsCredentials.properties")));

    String bucketName = "my-first-s3-bucket-" + UUID.randomUUID();
    String key = "MyObjectKey";

    System.out.println("===========================================");
    System.out.println("Getting Started with Amazon S3");
    System.out.println("===========================================\n");

    try {
        /*
         * Create a new S3 bucket - Amazon S3 bucket names are globally unique,
         * so once a bucket name has been taken by any user, you can't create
         * another bucket with that same name.
         *
         * You can optionally specify a location for your bucket if you want to
         * keep your data closer to your applications or users.
         */
        System.out.println("Creating bucket " + bucketName + "\n");
        s3.createBucket(bucketName);

        /*
         * List the buckets in your account
         */
        System.out.println("Listing buckets");
        for (Bucket bucket : s3.listBuckets()) {
            System.out.println(" - " + bucket.getName());
        }
        System.out.println();

        /*
         * Upload an object to your bucket - You can easily upload a file to
         * S3, or upload directly an InputStream if you know the length of
         * the data in the stream. You can also specify your own metadata
         * when uploading to S3, which allows you set a variety of options
         * like content-type and content-encoding, plus additional metadata
         * specific to your applications.
         */
        System.out.println("Uploading a new object to S3 from a file\n");
        s3.putObject(new PutObjectRequest(bucketName, "abc/" + key, new File("/Users/prayag/Desktop/2.jpg")));

        /*
         * Download an object - When you download an object, you get all of
         * the object's metadata and a stream from which to read the contents.
         * It's important to read the contents of the stream as quickly as
         * possibly since the data is streamed directly from Amazon S3 and your
         * network connection will remain open until you read all the data or
         * close the input stream.
         *
         * GetObjectRequest also supports several other options, including
         * conditional downloading of objects based on modification times,
         * ETags, and selectively downloading a range of an object.
         */
        System.out.println("Downloading an object");
        S3Object object = s3.getObject(new GetObjectRequest(bucketName, "abc/" + key));
        System.out.println("Content-Type: " + object.getObjectMetadata().getContentType());
        displayTextInputStream(object.getObjectContent());

        /*
         * List objects in your bucket by prefix - There are many options for
         * listing the objects in your bucket.  Keep in mind that buckets with
         * many objects might truncate their results when listing their objects,
         * so be sure to check if the returned object listing is truncated, and
         * use the AmazonS3.listNextBatchOfObjects(...) operation to retrieve
         * additional results.
         */
        System.out.println("Listing objects");
        ObjectListing objectListing = s3
                .listObjects(new ListObjectsRequest().withBucketName(bucketName).withPrefix("My"));
        for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) {
            System.out.println(
                    " - " + objectSummary.getKey() + "  " + "(size = " + objectSummary.getSize() + ")");
        }
        System.out.println();

        /*
         * Delete an object - Unless versioning has been turned on for your bucket,
         * there is no way to undelete an object, so use caution when deleting objects.
         */
        //            System.out.println("Deleting an object\n");
        //            s3.deleteObject(bucketName, key);

        /*
         * Delete a bucket - A bucket must be completely empty before it can be
         * deleted, so remember to delete any objects from your buckets before
         * you try to delete them.
         */
        //            System.out.println("Deleting bucket " + bucketName + "\n");
        //            s3.deleteBucket(bucketName);
    } catch (AmazonServiceException ase) {
        System.out.println("Caught an AmazonServiceException, which means your request made it "
                + "to Amazon S3, but was rejected with an error response for some reason.");
        System.out.println("Error Message:    " + ase.getMessage());
        System.out.println("HTTP Status Code: " + ase.getStatusCode());
        System.out.println("AWS Error Code:   " + ase.getErrorCode());
        System.out.println("Error Type:       " + ase.getErrorType());
        System.out.println("Request ID:       " + ase.getRequestId());
    } catch (AmazonClientException ace) {
        System.out.println("Caught an AmazonClientException, which means the client encountered "
                + "a serious internal problem while trying to communicate with S3, "
                + "such as not being able to access the network.");
        System.out.println("Error Message: " + ace.getMessage());
    }
}

From source file:com.springboot.demo.framework.aws.s3.S3Sample.java

License:Open Source License

public static void main(String[] args) throws IOException {

    /*/*from w w w .j  ava 2s. c  o  m*/
     * The ProfileCredentialsProvider will return your [default]
     * credential profile by reading from the credentials file located at
     * (~/.aws/credentials).
     */
    AWSCredentials basicCredentials = new BasicAWSCredentials(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY);

    AWSCredentials credentials = null;
    try {
        credentials = new ProfileCredentialsProvider().getCredentials();
    } catch (Exception e) {
        throw new AmazonClientException("Cannot load the credentials from the credential profiles file. "
                + "Please make sure that your credentials file is at the correct "
                + "location (~/.aws/credentials), and is in valid format.", e);
    }

    /*
     * Create S3 Client
     */
    AmazonS3 s3 = AmazonS3ClientBuilder.defaultClient();
    Region usWest2 = Region.getRegion(Regions.US_WEST_2);
    s3.setRegion(usWest2);

    String bucketName = "my-first-s3-bucket-" + UUID.randomUUID();
    String key = "MyObjectKey";

    System.out.println("===========================================");
    System.out.println("Getting Started with Amazon S3");
    System.out.println("===========================================\n");

    try {
        /*
         * Create a new S3 bucket - Amazon S3 bucket names are globally unique,
         * so once a bucket name has been taken by any user, you can't create
         * another bucket with that same name.
         *
         * You can optionally specify a location for your bucket if you want to
         * keep your data closer to your applications or users.
         */
        System.out.println("Creating bucket " + bucketName + "\n");
        s3.createBucket(bucketName);

        /*
         * List the buckets in your account
         */
        System.out.println("Listing buckets");
        for (Bucket bucket : s3.listBuckets()) {
            System.out.println(" - " + bucket.getName());
        }
        System.out.println();

        /*
         * Upload an object to your bucket - You can easily upload a file to
         * S3, or upload directly an InputStream if you know the length of
         * the data in the stream. You can also specify your own metadata
         * when uploading to S3, which allows you set a variety of options
         * like content-type and content-encoding, plus additional metadata
         * specific to your applications.
         */
        System.out.println("Uploading a new object to S3 from a file\n");
        s3.putObject(new PutObjectRequest(bucketName, key, createSampleFile()));

        /*
         * Returns an URL for the object stored in the specified bucket and  key
         */
        URL url = s3.getUrl(bucketName, key);
        System.out.println("upload file url : " + url.toString());

        /*
         * Download an object - When you download an object, you get all of
         * the object's metadata and a stream from which to read the contents.
         * It's important to read the contents of the stream as quickly as
         * possibly since the data is streamed directly from Amazon S3 and your
         * network connection will remain open until you read all the data or
         * close the input stream.
         *
         * GetObjectRequest also supports several other options, including
         * conditional downloading of objects based on modification times,
         * ETags, and selectively downloading a range of an object.
         */
        System.out.println("Downloading an object");
        S3Object object = s3.getObject(new GetObjectRequest(bucketName, key));
        System.out.println("Content-Type: " + object.getObjectMetadata().getContentType());
        displayTextInputStream(object.getObjectContent());

        /*
         * List objects in your bucket by prefix - There are many options for
         * listing the objects in your bucket.  Keep in mind that buckets with
         * many objects might truncate their results when listing their objects,
         * so be sure to check if the returned object listing is truncated, and
         * use the AmazonS3.listNextBatchOfObjects(...) operation to retrieve
         * additional results.
         */
        System.out.println("Listing objects");
        ObjectListing objectListing = s3
                .listObjects(new ListObjectsRequest().withBucketName(bucketName).withPrefix("My"));
        for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) {
            System.out.println(
                    " - " + objectSummary.getKey() + "  " + "(size = " + objectSummary.getSize() + ")");
        }
        System.out.println();

        /*
         * Delete an object - Unless versioning has been turned on for your bucket,
         * there is no way to undelete an object, so use caution when deleting objects.
         */
        System.out.println("Deleting an object\n");
        s3.deleteObject(bucketName, key);

        /*
         * Delete a bucket - A bucket must be completely empty before it can be
         * deleted, so remember to delete any objects from your buckets before
         * you try to delete them.
         */
        System.out.println("Deleting bucket " + bucketName + "\n");
        s3.deleteBucket(bucketName);
    } catch (AmazonServiceException ase) {
        System.out.println("Caught an AmazonServiceException, which means your request made it "
                + "to Amazon S3, but was rejected with an error response for some reason.");
        System.out.println("Error Message:    " + ase.getMessage());
        System.out.println("HTTP Status Code: " + ase.getStatusCode());
        System.out.println("AWS Error Code:   " + ase.getErrorCode());
        System.out.println("Error Type:       " + ase.getErrorType());
        System.out.println("Request ID:       " + ase.getRequestId());
    } catch (AmazonClientException ace) {
        System.out.println("Caught an AmazonClientException, which means the client encountered "
                + "a serious internal problem while trying to communicate with S3, "
                + "such as not being able to access the network.");
        System.out.println("Error Message: " + ace.getMessage());
    }
}

From source file:com.streamsets.pipeline.stage.origin.s3.AmazonS3Runnable.java

License:Apache License

private S3Offset produce(S3Offset offset, int maxBatchSize, BatchContext batchContext)
        throws StageException, BadSpoolObjectException {
    BatchMaker batchMaker = batchContext.getBatchMaker();

    S3ObjectSummary s3Object;

    if (offset != null) {
        spooler.postProcessOlderObjectIfNeeded(offset);
    }//from ww w .  j  ava2  s.c  om

    //check if we have an object to produce records from. Otherwise get from spooler.
    if (needToFetchNextObjectFromSpooler(offset)) {
        updateGauge(Status.SPOOLING, null);
        offset = fetchNextObjectFromSpooler(offset, batchContext);
        LOG.debug("Object '{}' with offset '{}' fetched from Spooler", offset.getKey(), offset.getOffset());
    } else {
        //check if the current object was modified between batches
        LOG.debug("Checking if Object '{}' has been modified between batches", getCurrentObject().getKey());
        if (!getCurrentObject().getETag().equals(offset.geteTag())) {
            //send the current object to error archive and get next object from spooler
            LOG.debug("Object '{}' has been modified between batches. Sending the object to error",
                    getCurrentObject().getKey());
            try {
                spooler.handleCurrentObjectAsError();
            } catch (AmazonClientException e) {
                throw new StageException(Errors.S3_SPOOLDIR_24, e.toString(), e);
            }
            offset = fetchNextObjectFromSpooler(offset, batchContext);
        }
    }

    s3Object = getCurrentObject();
    if (s3Object != null) {
        amazonS3Source.updateOffset(runnerId, offset);
        try {
            if (parser == null) {
                String recordId = s3ConfigBean.s3Config.bucket + s3ConfigBean.s3Config.delimiter
                        + s3Object.getKey();
                if (dataParser.isWholeFileFormat()) {
                    handleWholeFileDataFormat(s3Object, recordId);
                } else {
                    //Get S3 object instead of stream because we want to call close on the object when we close the
                    // parser (and stream)
                    if (context.isPreview()) {
                        long fetchSize = s3Object.getSize() > S3Constants.DEFAULT_FETCH_SIZE
                                ? S3Constants.DEFAULT_FETCH_SIZE
                                : s3Object.getSize();
                        if (fetchSize > 0) {
                            object = AmazonS3Util.getObjectRange(s3Client, s3ConfigBean.s3Config.bucket,
                                    s3Object.getKey(), fetchSize, s3ConfigBean.sseConfig.useCustomerSSEKey,
                                    s3ConfigBean.sseConfig.customerKey, s3ConfigBean.sseConfig.customerKeyMd5);
                        } else {
                            LOG.warn("Size of object with key '{}' is 0", s3Object.getKey());
                            object = AmazonS3Util.getObject(s3Client, s3ConfigBean.s3Config.bucket,
                                    s3Object.getKey(), s3ConfigBean.sseConfig.useCustomerSSEKey,
                                    s3ConfigBean.sseConfig.customerKey, s3ConfigBean.sseConfig.customerKeyMd5);
                        }
                    } else {
                        object = AmazonS3Util.getObject(s3Client, s3ConfigBean.s3Config.bucket,
                                s3Object.getKey(), s3ConfigBean.sseConfig.useCustomerSSEKey,
                                s3ConfigBean.sseConfig.customerKey, s3ConfigBean.sseConfig.customerKeyMd5);
                    }
                    parser = dataParser.getParser(recordId, object.getObjectContent(), offset.getOffset());
                }
                sendLineageEvent(s3Object);
                //we don't use S3 GetObject range capabilities to skip the already process offset because the parsers cannot
                // pick up from a non root doc depth in the case of a single object with records.
            }
            int i = 0;
            updateGauge(Status.READING, offset.toString());
            while (i < maxBatchSize) {
                try {
                    Record record;
                    try {
                        record = parser.parse();
                    } catch (RecoverableDataParserException ex) {
                        // Propagate partially parsed record to error stream
                        record = ex.getUnparsedRecord();
                        setHeaders(record, object);
                        errorRecordHandler
                                .onError(new OnRecordErrorException(record, ex.getErrorCode(), ex.getParams()));
                        // We'll simply continue reading pass this recoverable error
                        continue;
                    }
                    if (record != null) {
                        setHeaders(record, object);
                        batchMaker.addRecord(record);
                        amazonS3Source.incrementNoMoreDataRecordCount();
                        i++;
                        offset.setOffset(parser.getOffset());
                    } else {
                        parser.close();
                        parser = null;
                        if (object != null) {
                            object.close();
                            object = null;
                        }
                        amazonS3Source.incrementNoMoreDataFileCount();
                        offset.setOffset(S3Constants.MINUS_ONE);
                        break;
                    }
                } catch (ObjectLengthException ex) {
                    errorRecordHandler.onError(Errors.S3_SPOOLDIR_02, s3Object.getKey(), offset.toString(), ex);
                    amazonS3Source.incrementNoMoreDataErrorCount();
                    offset.setOffset(S3Constants.MINUS_ONE);
                }
            }
        } catch (AmazonClientException e) {
            LOG.error("Error processing object with key '{}' offset '{}'", s3Object.getKey(), offset, e);
            throw new StageException(Errors.S3_SPOOLDIR_25, e.toString(), e);
        } catch (IOException | DataParserException ex) {
            if (!(ex.getCause() instanceof AbortedException)) {
                offset.setOffset(S3Constants.MINUS_ONE);
                String exOffset;
                if (ex instanceof OverrunException) {
                    exOffset = String.valueOf(((OverrunException) ex).getStreamOffset());
                } else {
                    try {
                        exOffset = (parser != null) ? parser.getOffset() : S3Constants.MINUS_ONE;
                    } catch (IOException ex1) {
                        LOG.warn("Could not get the object offset to report with error, reason: {}",
                                ex1.toString(), ex);
                        exOffset = S3Constants.MINUS_ONE;
                    }
                }
                offset.setOffset(exOffset);

                switch (context.getOnErrorRecord()) {
                case DISCARD:
                    break;
                case TO_ERROR:
                    // we failed to produce a record, which leaves the input file in an unknown state. all we can do here is
                    // throw an exception.
                    throw new BadSpoolObjectException(s3Object.getKey(), exOffset, ex);
                case STOP_PIPELINE:
                    context.reportError(Errors.S3_SPOOLDIR_03, s3Object.getKey(), exOffset, ex.toString(), ex);
                    throw new StageException(Errors.S3_SPOOLDIR_03, s3Object.getKey(), exOffset, ex.toString(),
                            ex);
                default:
                    throw new IllegalStateException(
                            Utils.format("Unknown OnError value '{}'", context.getOnErrorRecord(), ex));
                }
            }
        } finally {
            if (S3Constants.MINUS_ONE.equals(offset.getOffset())) {
                if (parser != null) {
                    try {
                        parser.close();
                        parser = null;
                    } catch (IOException ex) {
                        LOG.debug("Exception while closing parser : '{}'", ex.toString(), ex);
                    }
                }
                if (object != null) {
                    try {
                        object.close();
                        object = null;
                    } catch (IOException ex) {
                        LOG.debug("Exception while closing S3 object : '{}'", ex.toString(), ex);
                    }
                }
            }
        }
    }
    context.processBatch(batchContext);
    updateGauge(Status.BATCH_GENERATED, offset.toString());
    return offset;
}

From source file:com.streamsets.pipeline.stage.origin.s3.AmazonS3Runnable.java

License:Apache License

private void handleWholeFileDataFormat(S3ObjectSummary s3ObjectSummary, String recordId) throws StageException {
    S3Object partialS3ObjectForMetadata;
    //partialObject with fetchSize 1 byte.
    //This is mostly used for extracting metadata and such.
    partialS3ObjectForMetadata = AmazonS3Util.getObjectRange(s3Client, s3ConfigBean.s3Config.bucket,
            s3ObjectSummary.getKey(), 1, s3ConfigBean.sseConfig.useCustomerSSEKey,
            s3ConfigBean.sseConfig.customerKey, s3ConfigBean.sseConfig.customerKeyMd5);

    S3FileRef.Builder s3FileRefBuilder = new S3FileRef.Builder().s3Client(s3Client)
            .s3ObjectSummary(s3ObjectSummary).useSSE(s3ConfigBean.sseConfig.useCustomerSSEKey)
            .customerKey(s3ConfigBean.sseConfig.customerKey)
            .customerKeyMd5(s3ConfigBean.sseConfig.customerKeyMd5)
            .bufferSize((int) dataParser.suggestedWholeFileBufferSize()).createMetrics(true)
            .totalSizeInBytes(s3ObjectSummary.getSize()).rateLimit(dataParser.wholeFileRateLimit());

    if (dataParser.isWholeFileChecksumRequired()) {
        s3FileRefBuilder.verifyChecksum(true).checksumAlgorithm(HashingUtil.HashType.MD5)
                //128 bit hex encoded md5 checksum.
                .checksum(partialS3ObjectForMetadata.getObjectMetadata().getETag());
    }//  w  ww  .ja  v  a2  s.c  o m
    Map<String, Object> metadata = AmazonS3Util.getMetaData(partialS3ObjectForMetadata);
    metadata.put(S3Constants.BUCKET, s3ObjectSummary.getBucketName());
    metadata.put(S3Constants.OBJECT_KEY, s3ObjectSummary.getKey());
    metadata.put(S3Constants.OWNER, s3ObjectSummary.getOwner());
    metadata.put(S3Constants.SIZE, s3ObjectSummary.getSize());
    metadata.put(HeaderAttributeConstants.FILE_NAME, s3ObjectSummary.getKey());

    metadata.remove(S3Constants.CONTENT_LENGTH);
    parser = dataParser.getParser(recordId, metadata, s3FileRefBuilder.build());
    //Object is assigned so that setHeaders() function can use this to get metadata
    //information about the object
    object = partialS3ObjectForMetadata;
}

From source file:com.streamsets.pipeline.stage.origin.s3.AmazonS3Source.java

License:Apache License

public String produce(S3ObjectSummary s3Object, String offset, int maxBatchSize, BatchMaker batchMaker)
        throws StageException, BadSpoolObjectException {
    try {/*from  w  w  w . ja v  a  2s . co  m*/
        if (parser == null) {
            //Get S3 object instead of stream because we want to call close on the object when we close the
            // parser (and stream)
            if (getContext().isPreview()) {
                long fetchSize = s3Object.getSize() > DEFAULT_FETCH_SIZE ? DEFAULT_FETCH_SIZE
                        : s3Object.getSize();
                if (fetchSize > 0) {
                    object = AmazonS3Util.getObjectRange(s3ConfigBean.s3Config.getS3Client(),
                            s3ConfigBean.s3Config.bucket, s3Object.getKey(), fetchSize);
                } else {
                    LOG.warn("Size of object with key '{}' is 0", s3Object.getKey());
                    object = AmazonS3Util.getObject(s3ConfigBean.s3Config.getS3Client(),
                            s3ConfigBean.s3Config.bucket, s3Object.getKey());
                }
            } else {
                object = AmazonS3Util.getObject(s3ConfigBean.s3Config.getS3Client(),
                        s3ConfigBean.s3Config.bucket, s3Object.getKey());
            }
            String recordId = s3ConfigBean.s3Config.bucket + s3ConfigBean.s3Config.delimiter
                    + s3Object.getKey();
            parser = s3ConfigBean.dataFormatConfig.getParserFactory().getParser(recordId,
                    object.getObjectContent(), Long.parseLong(offset));
            //we don't use S3 GetObject range capabilities to skip the already process offset because the parsers cannot
            // pick up from a non root doc depth in the case of a single object with records.
        }
        for (int i = 0; i < maxBatchSize; i++) {
            try {
                Record record = parser.parse();
                if (record != null) {
                    batchMaker.addRecord(record);
                    offset = parser.getOffset();
                } else {
                    parser.close();
                    parser = null;
                    object.close();
                    object = null;
                    offset = MINUS_ONE;
                    break;
                }
            } catch (ObjectLengthException ex) {
                String exOffset = offset;
                offset = MINUS_ONE;
                switch (getContext().getOnErrorRecord()) {
                case DISCARD:
                    break;
                case TO_ERROR:
                    getContext().reportError(Errors.S3_SPOOLDIR_02, s3Object.getKey(), exOffset);
                    break;
                case STOP_PIPELINE:
                    throw new StageException(Errors.S3_SPOOLDIR_02, s3Object.getKey(), exOffset);
                default:
                    throw new IllegalStateException(Utils.format("It should never happen. OnError '{}'",
                            getContext().getOnErrorRecord(), ex));
                }
            }
        }
    } catch (AmazonClientException e) {
        LOG.error("Error processing object with key '{}' offset '{}'", s3Object.getKey(), offset);
        throw new StageException(Errors.S3_SPOOLDIR_25, e.toString());
    } catch (IOException | DataParserException ex) {
        if (ex.getCause() instanceof AbortedException) {
            //If the pipeline was stopped, the amazon s3 client thread catches the interrupt and throws aborted exception
            //do not treat this as an error. Instead produce what ever you have and move one.

        } else {
            offset = MINUS_ONE;
            String exOffset;
            if (ex instanceof OverrunException) {
                exOffset = String.valueOf(((OverrunException) ex).getStreamOffset());
            } else {
                try {
                    exOffset = (parser != null) ? parser.getOffset() : MINUS_ONE;
                } catch (IOException ex1) {
                    LOG.warn("Could not get the object offset to report with error, reason: {}", ex1.toString(),
                            ex);
                    exOffset = MINUS_ONE;
                }
            }

            switch (getContext().getOnErrorRecord()) {
            case DISCARD:
                break;
            case TO_ERROR:
                throw new BadSpoolObjectException(s3Object.getKey(), exOffset, ex);
            case STOP_PIPELINE:
                getContext().reportError(Errors.S3_SPOOLDIR_03, s3Object.getKey(), exOffset, ex.toString());
                throw new StageException(Errors.S3_SPOOLDIR_03, s3Object.getKey(), exOffset, ex.toString(), ex);
            default:
                throw new IllegalStateException(Utils.format("It should never happen. OnError '{}'",
                        getContext().getOnErrorRecord(), ex));
            }
        }
    } finally {
        if (MINUS_ONE.equals(offset)) {
            if (parser != null) {
                try {
                    parser.close();
                    parser = null;
                } catch (IOException ex) {
                    LOG.debug("Exception while closing parser : '{}'", ex.toString());
                }
            }
            if (object != null) {
                try {
                    object.close();
                    object = null;
                } catch (IOException ex) {
                    LOG.debug("Exception while closing S3 object : '{}'", ex.toString());
                }
            }
        }
    }
    return offset;
}