Example usage for com.amazonaws.services.s3.model ObjectListing getObjectSummaries

Introduction

In this page you can find the example usage for com.amazonaws.services.s3.model ObjectListing getObjectSummaries.

Prototype

public List<S3ObjectSummary> getObjectSummaries()

Source Link

Document

Gets the list of object summaries describing the objects stored in the S3 bucket.

Usage

From source file:org.commoncrawl.service.parser.ec2.EC2ParserMaster.java

License:Open Source License

private boolean doScan(boolean initialScan) throws IOException {
    try {/*w  ww  . ja v a  2 s.c  o m*/
        LOG.info("Scanner Thread Starting");
        AmazonS3Client s3Client = new AmazonS3Client(new BasicAWSCredentials(s3AccessKeyId, s3SecretKey));

        ObjectListing response = s3Client.listObjects(new ListObjectsRequest()
                .withBucketName("aws-publicdatasets").withPrefix(CC_BUCKET_ROOT + CC_CRAWLLOG_SOURCE));

        do {

            LOG.info("Response Key Count:" + response.getObjectSummaries().size());

            for (S3ObjectSummary entry : response.getObjectSummaries()) {

                Matcher matcher = crawlLogPattern.matcher(entry.getKey());
                if (matcher.matches()) {
                    ParseCandidate candidate = ParseCandidate.candidateFromBucketEntry(entry.getKey());
                    if (candidate == null) {
                        LOG.error("Failed to Parse Candidate for:" + entry.getKey());
                    } else {
                        LOG.info("Candidate is:" + candidate);
                        synchronized (this) {
                            if (_complete.contains(candidate._crawlLogName)) {
                                LOG.info("Skipping completed Candidate:" + candidate);
                            } else {
                                if (!_candidates.containsEntry(candidate._timestamp, candidate)
                                        && !_active.containsKey(candidate)) {
                                    // update candidate size here ... 
                                    candidate._size = entry.getSize();
                                    LOG.info("New Candidate:" + candidate._crawlLogName + " Found");
                                    _candidates.put(candidate._timestamp, candidate);
                                } else {
                                    LOG.info("Skipping Existing Candidate:" + candidate._crawlLogName);
                                }
                            }
                        }
                    }
                }
            }

            if (response.isTruncated()) {
                response = s3Client.listNextBatchOfObjects(response);
            } else {
                break;
            }
        } while (!shutdownFlag.get());

        if (initialScan) {
            // search for completions 
            synchronized (this) {
                scanForCompletions();
            }
        }

        return true;
    } catch (IOException e) {
        LOG.error(CCStringUtils.stringifyException(e));
        return false;
    }
}

From source file:org.commoncrawl.service.parser.ec2.EC2ParserMaster.java

License:Open Source License

public void scanForCompletions() throws IOException {
    AmazonS3Client s3Client = new AmazonS3Client(new BasicAWSCredentials(s3AccessKeyId, s3SecretKey));

    ObjectListing response = s3Client.listObjects(new ListObjectsRequest().withBucketName("aws-publicdatasets")
            .withPrefix(CC_BUCKET_ROOT + CC_PARSER_INTERMEDIATE));

    do {/*from  w  ww. j  a va  2s .c om*/

        LOG.info("Response Key Count:" + response.getObjectSummaries().size());

        for (S3ObjectSummary entry : response.getObjectSummaries()) {
            Matcher matcher = doneFilePattern.matcher(entry.getKey());
            if (matcher.matches()) {
                ParseCandidate candidate = ParseCandidate.candidateFromBucketEntry(entry.getKey());
                if (candidate == null) {
                    LOG.error("Failed to Parse Candidate for:" + entry.getKey());
                } else {
                    long partialTimestamp = Long.parseLong(matcher.group(2));
                    long position = Long.parseLong(matcher.group(3));
                    LOG.info("Found completion for Log:" + candidate._crawlLogName + " TS:" + partialTimestamp
                            + " Pos:" + position);
                    candidate._lastValidPos = position;

                    // ok lookup existing entry if present ... 
                    ParseCandidate existingCandidate = Iterables.find(_candidates.get(candidate._timestamp),
                            Predicates.equalTo(candidate));
                    // if existing candidate found 
                    if (existingCandidate != null) {
                        LOG.info("Found existing candidate with last pos:" + existingCandidate._lastValidPos);
                        if (candidate._lastValidPos > existingCandidate._lastValidPos) {
                            existingCandidate._lastValidPos = candidate._lastValidPos;
                            if (candidate._lastValidPos == candidate._size) {
                                LOG.info("Found last pos == size for candidate:" + candidate._crawlLogName
                                        + ".REMOVING FROM ACTIVE - MOVING TO COMPLETE");
                                _candidates.remove(candidate._timestamp, candidate);
                                _complete.add(candidate._crawlLogName);
                            }
                        }
                    } else {
                        LOG.info("Skipping Completion for CrawlLog:" + candidate._crawlLogName
                                + " because existing candidate was not found.");
                    }
                }
            }
        }
        if (response.isTruncated()) {
            response = s3Client.listNextBatchOfObjects(response);
        } else {
            break;
        }
    } while (true);
}

From source file:org.commoncrawl.util.EC2MetadataTransferUtil.java

License:Open Source License

public static List<S3ObjectSummary> getMetadataPaths(String s3AccessKeyId, String s3SecretKey,
        String bucketName, String segmentPath) throws IOException {

    AmazonS3Client s3Client = new AmazonS3Client(new BasicAWSCredentials(s3AccessKeyId, s3SecretKey));

    ImmutableList.Builder<S3ObjectSummary> listBuilder = new ImmutableList.Builder<S3ObjectSummary>();

    String metadataFilterKey = segmentPath + "metadata-";
    LOG.info("Prefix Search Key is:" + metadataFilterKey);

    ObjectListing response = s3Client
            .listObjects(new ListObjectsRequest().withBucketName(bucketName).withPrefix(metadataFilterKey));

    do {//from   w w w.ja  v a  2 s.  c  om
        LOG.info("Response Key Count:" + response.getObjectSummaries().size());

        for (S3ObjectSummary entry : response.getObjectSummaries()) {
            listBuilder.add(entry);
        }

        if (response.isTruncated()) {
            response = s3Client.listNextBatchOfObjects(response);
        } else {
            break;
        }
    } while (true);

    return listBuilder.build();
}

From source file:org.commoncrawl.util.S3BulkTransferUtil.java

License:Open Source License

public static List<S3ObjectSummary> getPaths(String s3AccessKeyId, String s3SecretKey, String bucketName,
        String segmentPath) throws IOException {

    AmazonS3Client s3Client = new AmazonS3Client(new BasicAWSCredentials(s3AccessKeyId, s3SecretKey));

    ImmutableList.Builder<S3ObjectSummary> listBuilder = new ImmutableList.Builder<S3ObjectSummary>();

    ObjectListing response = s3Client
            .listObjects(new ListObjectsRequest().withBucketName(bucketName).withPrefix(segmentPath));

    do {/*w ww .j av a 2 s  .c  om*/
        LOG.info("Response Key Count:" + response.getObjectSummaries().size());

        for (S3ObjectSummary entry : response.getObjectSummaries()) {
            listBuilder.add(entry);
        }

        if (response.isTruncated()) {
            response = s3Client.listNextBatchOfObjects(response);
        } else {
            break;
        }
    } while (true);

    return listBuilder.build();
}

From source file:org.cto.VVS3Box.S3Sample.java

License:Open Source License

public static void main(String[] args) throws IOException {
    /*/*from w ww.j  a va2  s .  c  om*/
     * This credentials provider implementation loads your AWS credentials
     * from a properties file at the root of your classpath.
     *
     * Important: Be sure to fill in your AWS access credentials in the
     *            AwsCredentials.properties file before you try to run this
     *            sample.
     * http://aws.amazon.com/security-credentials
     */
    AmazonS3 s3 = new AmazonS3Client(new ClasspathPropertiesFileCredentialsProvider());
    Region usWest2 = Region.getRegion(Regions.US_WEST_2);
    s3.setRegion(usWest2);

    String bucketName = "lior.test-" + UUID.randomUUID();
    String key = "MyObjectKey";

    System.out.println("===========================================");
    System.out.println("Getting Started with Amazon S3");
    System.out.println("===========================================\n");

    try {
        /*
         * Create a new S3 bucket - Amazon S3 bucket names are globally unique,
         * so once a bucket name has been taken by any user, you can't create
         * another bucket with that same name.
         *
         * You can optionally specify a location for your bucket if you want to
         * keep your data closer to your applications or users.
         */
        System.out.println("Creating bucket " + bucketName + "\n");
        s3.createBucket(bucketName);

        /*
         * List the buckets in your account
         */
        System.out.println("Listing buckets");
        for (Bucket bucket : s3.listBuckets()) {
            System.out.println(" - " + bucket.getName());
        }
        System.out.println();

        /*
         * Upload an object to your bucket - You can easily upload a file to
         * S3, or upload directly an InputStream if you know the length of
         * the data in the stream. You can also specify your own metadata
         * when uploading to S3, which allows you set a variety of options
         * like content-type and content-encoding, plus additional metadata
         * specific to your applications.
         */
        System.out.println("Uploading a new object to S3 from a file\n");
        s3.putObject(new PutObjectRequest(bucketName, key, createSampleFile()));

        /*
         * Download an object - When you download an object, you get all of
         * the object's metadata and a stream from which to read the contents.
         * It's important to read the contents of the stream as quickly as
         * possibly since the data is streamed directly from Amazon S3 and your
         * network connection will remain open until you read all the data or
         * close the input stream.
         *
         * GetObjectRequest also supports several other options, including
         * conditional downloading of objects based on modification times,
         * ETags, and selectively downloading a range of an object.
         */
        System.out.println("Downloading an object");
        S3Object object = s3.getObject(new GetObjectRequest(bucketName, key));
        System.out.println("Content-Type: " + object.getObjectMetadata().getContentType());
        displayTextInputStream(object.getObjectContent());

        /*
         * List objects in your bucket by prefix - There are many options for
         * listing the objects in your bucket.  Keep in mind that buckets with
         * many objects might truncate their results when listing their objects,
         * so be sure to check if the returned object listing is truncated, and
         * use the AmazonS3.listNextBatchOfObjects(...) operation to retrieve
         * additional results.
         */
        System.out.println("Listing objects");
        ObjectListing objectListing = s3
                .listObjects(new ListObjectsRequest().withBucketName(bucketName).withPrefix("My"));
        for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) {
            System.out.println(
                    " - " + objectSummary.getKey() + "  " + "(size = " + objectSummary.getSize() + ")");
        }
        System.out.println();

        /*
         * Delete an object - Unless versioning has been turned on for your bucket,
         * there is no way to undelete an object, so use caution when deleting objects.
         */
        System.out.println("Deleting an object\n");
        s3.deleteObject(bucketName, key);

        /*
         * Delete a bucket - A bucket must be completely empty before it can be
         * deleted, so remember to delete any objects from your buckets before
         * you try to delete them.
         */
        System.out.println("Deleting bucket " + bucketName + "\n");
        s3.deleteBucket(bucketName);
    } catch (AmazonServiceException ase) {
        System.out.println("Caught an AmazonServiceException, which means your request made it "
                + "to Amazon S3, but was rejected with an error response for some reason.");
        System.out.println("Error Message:    " + ase.getMessage());
        System.out.println("HTTP Status Code: " + ase.getStatusCode());
        System.out.println("AWS Error Code:   " + ase.getErrorCode());
        System.out.println("Error Type:       " + ase.getErrorType());
        System.out.println("Request ID:       " + ase.getRequestId());
    } catch (AmazonClientException ace) {
        System.out.println("Caught an AmazonClientException, which means the client encountered "
                + "a serious internal problem while trying to communicate with S3, "
                + "such as not being able to access the network.");
        System.out.println("Error Message: " + ace.getMessage());
    }
}

From source file:org.deeplearning4j.aws.s3.reader.S3Downloader.java

License:Apache License

/**
 * Return the keys for a bucket//from w w  w .  j av  a 2  s.c om
 * @param bucket the bucket to get the keys for
 * @return the bucket's keys
 */
public List<String> keysForBucket(String bucket) {
    AmazonS3 s3 = getClient();
    List<String> ret = new ArrayList<>();
    ListObjectsRequest listObjectsRequest = new ListObjectsRequest().withBucketName(bucket);
    ObjectListing objectListing;

    do {
        objectListing = s3.listObjects(listObjectsRequest);
        for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) {
            ret.add(objectSummary.getKey());
        }
        listObjectsRequest.setMarker(objectListing.getNextMarker());
    } while (objectListing.isTruncated());

    return ret;
}

From source file:org.deeplearning4j.aws.s3.reader.S3Downloader.java

License:Apache License

/**
 * Paginates through a bucket's keys invoking the listener 
 * at each key/*from w ww .j a  v a  2s. co m*/
 * @param bucket the bucket to iterate
 * @param listener the listener
 */
public void paginate(String bucket, BucketKeyListener listener) {
    AmazonS3 s3 = getClient();
    ObjectListing list = s3.listObjects(bucket);
    for (S3ObjectSummary summary : list.getObjectSummaries()) {
        if (listener != null)
            listener.onKey(s3, bucket, summary.getKey());
    }

    while (list.isTruncated()) {
        list = s3.listNextBatchOfObjects(list);
        for (S3ObjectSummary summary : list.getObjectSummaries()) {
            if (listener != null)
                listener.onKey(s3, bucket, summary.getKey());
        }
    }

}

From source file:org.duracloud.s3storage.S3StorageProvider.java

License:Apache License

private List<S3ObjectSummary> listObjects(String bucketName, String prefix, long maxResults, String marker) {
    int numResults = new Long(maxResults).intValue();
    ListObjectsRequest request = new ListObjectsRequest(bucketName, prefix, marker, null, numResults);
    try {/* w  ww  .  j a  va  2  s .c  om*/
        ObjectListing objectListing = s3Client.listObjects(request);
        return objectListing.getObjectSummaries();
    } catch (AmazonClientException e) {
        String err = "Could not get contents of S3 bucket " + bucketName + " due to error: " + e.getMessage();
        throw new StorageException(err, e, RETRY);
    }
}

From source file:org.elasticsearch.cloud.aws.blobstore.AbstarctS3BlobContainer.java

License:Apache License

@Override
public ImmutableMap<String, BlobMetaData> listBlobsByPrefix(@Nullable String blobNamePrefix)
        throws IOException {
    ImmutableMap.Builder<String, BlobMetaData> blobsBuilder = ImmutableMap.builder();
    ObjectListing prevListing = null;//w w w  .  j a  v  a2  s .com
    while (true) {
        ObjectListing list;
        if (prevListing != null) {
            list = blobStore.client().listNextBatchOfObjects(prevListing);
        } else {
            if (blobNamePrefix != null) {
                list = blobStore.client().listObjects(blobStore.bucket(), buildKey(blobNamePrefix));
            } else {
                list = blobStore.client().listObjects(blobStore.bucket(), keyPath);
            }
        }
        for (S3ObjectSummary summary : list.getObjectSummaries()) {
            String name = summary.getKey().substring(keyPath.length());
            blobsBuilder.put(name, new PlainBlobMetaData(name, summary.getSize()));
        }
        if (list.isTruncated()) {
            prevListing = list;
        } else {
            break;
        }
    }
    return blobsBuilder.build();
}

From source file:org.elasticsearch.cloud.aws.blobstore.S3BlobStore.java

License:Apache License

@Override
public void delete(BlobPath path) {
    ObjectListing prevListing = null;/*from w  w  w.j  a  v  a2 s.c o m*/
    //From http://docs.amazonwebservices.com/AmazonS3/latest/dev/DeletingMultipleObjectsUsingJava.html
    //we can do at most 1K objects per delete
    //We don't know the bucket name until first object listing
    DeleteObjectsRequest multiObjectDeleteRequest = null;
    ArrayList<KeyVersion> keys = new ArrayList<KeyVersion>();
    while (true) {
        ObjectListing list;
        if (prevListing != null) {
            list = client.listNextBatchOfObjects(prevListing);
        } else {
            String keyPath = path.buildAsString("/");
            if (!keyPath.isEmpty()) {
                keyPath = keyPath + "/";
            }
            list = client.listObjects(bucket, keyPath);
            multiObjectDeleteRequest = new DeleteObjectsRequest(list.getBucketName());
        }
        for (S3ObjectSummary summary : list.getObjectSummaries()) {
            keys.add(new KeyVersion(summary.getKey()));
            //Every 500 objects batch the delete request
            if (keys.size() > 500) {
                multiObjectDeleteRequest.setKeys(keys);
                client.deleteObjects(multiObjectDeleteRequest);
                multiObjectDeleteRequest = new DeleteObjectsRequest(list.getBucketName());
                keys.clear();
            }
        }
        if (list.isTruncated()) {
            prevListing = list;
        } else {
            break;
        }
    }
    if (!keys.isEmpty()) {
        multiObjectDeleteRequest.setKeys(keys);
        client.deleteObjects(multiObjectDeleteRequest);
    }
}