Example usage for com.amazonaws.services.s3.model ObjectListing getObjectSummaries

List of usage examples for com.amazonaws.services.s3.model ObjectListing getObjectSummaries

Introduction

In this page you can find the example usage for com.amazonaws.services.s3.model ObjectListing getObjectSummaries.

Prototype

public List<S3ObjectSummary> getObjectSummaries() 

Source Link

Document

Gets the list of object summaries describing the objects stored in the S3 bucket.

Usage

From source file:org.commoncrawl.service.parser.ec2.EC2ParserMaster.java

License:Open Source License

private boolean doScan(boolean initialScan) throws IOException {
    try {/*w  ww  . ja v a  2 s.c  o m*/
        LOG.info("Scanner Thread Starting");
        AmazonS3Client s3Client = new AmazonS3Client(new BasicAWSCredentials(s3AccessKeyId, s3SecretKey));

        ObjectListing response = s3Client.listObjects(new ListObjectsRequest()
                .withBucketName("aws-publicdatasets").withPrefix(CC_BUCKET_ROOT + CC_CRAWLLOG_SOURCE));

        do {

            LOG.info("Response Key Count:" + response.getObjectSummaries().size());

            for (S3ObjectSummary entry : response.getObjectSummaries()) {

                Matcher matcher = crawlLogPattern.matcher(entry.getKey());
                if (matcher.matches()) {
                    ParseCandidate candidate = ParseCandidate.candidateFromBucketEntry(entry.getKey());
                    if (candidate == null) {
                        LOG.error("Failed to Parse Candidate for:" + entry.getKey());
                    } else {
                        LOG.info("Candidate is:" + candidate);
                        synchronized (this) {
                            if (_complete.contains(candidate._crawlLogName)) {
                                LOG.info("Skipping completed Candidate:" + candidate);
                            } else {
                                if (!_candidates.containsEntry(candidate._timestamp, candidate)
                                        && !_active.containsKey(candidate)) {
                                    // update candidate size here ... 
                                    candidate._size = entry.getSize();
                                    LOG.info("New Candidate:" + candidate._crawlLogName + " Found");
                                    _candidates.put(candidate._timestamp, candidate);
                                } else {
                                    LOG.info("Skipping Existing Candidate:" + candidate._crawlLogName);
                                }
                            }
                        }
                    }
                }
            }

            if (response.isTruncated()) {
                response = s3Client.listNextBatchOfObjects(response);
            } else {
                break;
            }
        } while (!shutdownFlag.get());

        if (initialScan) {
            // search for completions 
            synchronized (this) {
                scanForCompletions();
            }
        }

        return true;
    } catch (IOException e) {
        LOG.error(CCStringUtils.stringifyException(e));
        return false;
    }
}

From source file:org.commoncrawl.service.parser.ec2.EC2ParserMaster.java

License:Open Source License

public void scanForCompletions() throws IOException {
    AmazonS3Client s3Client = new AmazonS3Client(new BasicAWSCredentials(s3AccessKeyId, s3SecretKey));

    ObjectListing response = s3Client.listObjects(new ListObjectsRequest().withBucketName("aws-publicdatasets")
            .withPrefix(CC_BUCKET_ROOT + CC_PARSER_INTERMEDIATE));

    do {/*from  w  ww. j  a va  2s .c om*/

        LOG.info("Response Key Count:" + response.getObjectSummaries().size());

        for (S3ObjectSummary entry : response.getObjectSummaries()) {
            Matcher matcher = doneFilePattern.matcher(entry.getKey());
            if (matcher.matches()) {
                ParseCandidate candidate = ParseCandidate.candidateFromBucketEntry(entry.getKey());
                if (candidate == null) {
                    LOG.error("Failed to Parse Candidate for:" + entry.getKey());
                } else {
                    long partialTimestamp = Long.parseLong(matcher.group(2));
                    long position = Long.parseLong(matcher.group(3));
                    LOG.info("Found completion for Log:" + candidate._crawlLogName + " TS:" + partialTimestamp
                            + " Pos:" + position);
                    candidate._lastValidPos = position;

                    // ok lookup existing entry if present ... 
                    ParseCandidate existingCandidate = Iterables.find(_candidates.get(candidate._timestamp),
                            Predicates.equalTo(candidate));
                    // if existing candidate found 
                    if (existingCandidate != null) {
                        LOG.info("Found existing candidate with last pos:" + existingCandidate._lastValidPos);
                        if (candidate._lastValidPos > existingCandidate._lastValidPos) {
                            existingCandidate._lastValidPos = candidate._lastValidPos;
                            if (candidate._lastValidPos == candidate._size) {
                                LOG.info("Found last pos == size for candidate:" + candidate._crawlLogName
                                        + ".REMOVING FROM ACTIVE - MOVING TO COMPLETE");
                                _candidates.remove(candidate._timestamp, candidate);
                                _complete.add(candidate._crawlLogName);
                            }
                        }
                    } else {
                        LOG.info("Skipping Completion for CrawlLog:" + candidate._crawlLogName
                                + " because existing candidate was not found.");
                    }
                }
            }
        }
        if (response.isTruncated()) {
            response = s3Client.listNextBatchOfObjects(response);
        } else {
            break;
        }
    } while (true);
}

From source file:org.commoncrawl.util.EC2MetadataTransferUtil.java

License:Open Source License

public static List<S3ObjectSummary> getMetadataPaths(String s3AccessKeyId, String s3SecretKey,
        String bucketName, String segmentPath) throws IOException {

    AmazonS3Client s3Client = new AmazonS3Client(new BasicAWSCredentials(s3AccessKeyId, s3SecretKey));

    ImmutableList.Builder<S3ObjectSummary> listBuilder = new ImmutableList.Builder<S3ObjectSummary>();

    String metadataFilterKey = segmentPath + "metadata-";
    LOG.info("Prefix Search Key is:" + metadataFilterKey);

    ObjectListing response = s3Client
            .listObjects(new ListObjectsRequest().withBucketName(bucketName).withPrefix(metadataFilterKey));

    do {//from   w w w.ja  v a  2 s.  c  om
        LOG.info("Response Key Count:" + response.getObjectSummaries().size());

        for (S3ObjectSummary entry : response.getObjectSummaries()) {
            listBuilder.add(entry);
        }

        if (response.isTruncated()) {
            response = s3Client.listNextBatchOfObjects(response);
        } else {
            break;
        }
    } while (true);

    return listBuilder.build();
}

From source file:org.commoncrawl.util.S3BulkTransferUtil.java

License:Open Source License

public static List<S3ObjectSummary> getPaths(String s3AccessKeyId, String s3SecretKey, String bucketName,
        String segmentPath) throws IOException {

    AmazonS3Client s3Client = new AmazonS3Client(new BasicAWSCredentials(s3AccessKeyId, s3SecretKey));

    ImmutableList.Builder<S3ObjectSummary> listBuilder = new ImmutableList.Builder<S3ObjectSummary>();

    ObjectListing response = s3Client
            .listObjects(new ListObjectsRequest().withBucketName(bucketName).withPrefix(segmentPath));

    do {/*w ww .j av a 2 s  .c  om*/
        LOG.info("Response Key Count:" + response.getObjectSummaries().size());

        for (S3ObjectSummary entry : response.getObjectSummaries()) {
            listBuilder.add(entry);
        }

        if (response.isTruncated()) {
            response = s3Client.listNextBatchOfObjects(response);
        } else {
            break;
        }
    } while (true);

    return listBuilder.build();
}

From source file:org.cto.VVS3Box.S3Sample.java

License:Open Source License

public static void main(String[] args) throws IOException {
    /*/*from w ww.j  a va2  s .  c  om*/
     * This credentials provider implementation loads your AWS credentials
     * from a properties file at the root of your classpath.
     *
     * Important: Be sure to fill in your AWS access credentials in the
     *            AwsCredentials.properties file before you try to run this
     *            sample.
     * http://aws.amazon.com/security-credentials
     */
    AmazonS3 s3 = new AmazonS3Client(new ClasspathPropertiesFileCredentialsProvider());
    Region usWest2 = Region.getRegion(Regions.US_WEST_2);
    s3.setRegion(usWest2);

    String bucketName = "lior.test-" + UUID.randomUUID();
    String key = "MyObjectKey";

    System.out.println("===========================================");
    System.out.println("Getting Started with Amazon S3");
    System.out.println("===========================================\n");

    try {
        /*
         * Create a new S3 bucket - Amazon S3 bucket names are globally unique,
         * so once a bucket name has been taken by any user, you can't create
         * another bucket with that same name.
         *
         * You can optionally specify a location for your bucket if you want to
         * keep your data closer to your applications or users.
         */
        System.out.println("Creating bucket " + bucketName + "\n");
        s3.createBucket(bucketName);

        /*
         * List the buckets in your account
         */
        System.out.println("Listing buckets");
        for (Bucket bucket : s3.listBuckets()) {
            System.out.println(" - " + bucket.getName());
        }
        System.out.println();

        /*
         * Upload an object to your bucket - You can easily upload a file to
         * S3, or upload directly an InputStream if you know the length of
         * the data in the stream. You can also specify your own metadata
         * when uploading to S3, which allows you set a variety of options
         * like content-type and content-encoding, plus additional metadata
         * specific to your applications.
         */
        System.out.println("Uploading a new object to S3 from a file\n");
        s3.putObject(new PutObjectRequest(bucketName, key, createSampleFile()));

        /*
         * Download an object - When you download an object, you get all of
         * the object's metadata and a stream from which to read the contents.
         * It's important to read the contents of the stream as quickly as
         * possibly since the data is streamed directly from Amazon S3 and your
         * network connection will remain open until you read all the data or
         * close the input stream.
         *
         * GetObjectRequest also supports several other options, including
         * conditional downloading of objects based on modification times,
         * ETags, and selectively downloading a range of an object.
         */
        System.out.println("Downloading an object");
        S3Object object = s3.getObject(new GetObjectRequest(bucketName, key));
        System.out.println("Content-Type: " + object.getObjectMetadata().getContentType());
        displayTextInputStream(object.getObjectContent());

        /*
         * List objects in your bucket by prefix - There are many options for
         * listing the objects in your bucket.  Keep in mind that buckets with
         * many objects might truncate their results when listing their objects,
         * so be sure to check if the returned object listing is truncated, and
         * use the AmazonS3.listNextBatchOfObjects(...) operation to retrieve
         * additional results.
         */
        System.out.println("Listing objects");
        ObjectListing objectListing = s3
                .listObjects(new ListObjectsRequest().withBucketName(bucketName).withPrefix("My"));
        for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) {
            System.out.println(
                    " - " + objectSummary.getKey() + "  " + "(size = " + objectSummary.getSize() + ")");
        }
        System.out.println();

        /*
         * Delete an object - Unless versioning has been turned on for your bucket,
         * there is no way to undelete an object, so use caution when deleting objects.
         */
        System.out.println("Deleting an object\n");
        s3.deleteObject(bucketName, key);

        /*
         * Delete a bucket - A bucket must be completely empty before it can be
         * deleted, so remember to delete any objects from your buckets before
         * you try to delete them.
         */
        System.out.println("Deleting bucket " + bucketName + "\n");
        s3.deleteBucket(bucketName);
    } catch (AmazonServiceException ase) {
        System.out.println("Caught an AmazonServiceException, which means your request made it "
                + "to Amazon S3, but was rejected with an error response for some reason.");
        System.out.println("Error Message:    " + ase.getMessage());
        System.out.println("HTTP Status Code: " + ase.getStatusCode());
        System.out.println("AWS Error Code:   " + ase.getErrorCode());
        System.out.println("Error Type:       " + ase.getErrorType());
        System.out.println("Request ID:       " + ase.getRequestId());
    } catch (AmazonClientException ace) {
        System.out.println("Caught an AmazonClientException, which means the client encountered "
                + "a serious internal problem while trying to communicate with S3, "
                + "such as not being able to access the network.");
        System.out.println("Error Message: " + ace.getMessage());
    }
}

From source file:org.deeplearning4j.aws.s3.reader.S3Downloader.java

License:Apache License

/**
 * Return the keys for a bucket//from w w  w .  j av  a 2  s.c om
 * @param bucket the bucket to get the keys for
 * @return the bucket's keys
 */
public List<String> keysForBucket(String bucket) {
    AmazonS3 s3 = getClient();
    List<String> ret = new ArrayList<>();
    ListObjectsRequest listObjectsRequest = new ListObjectsRequest().withBucketName(bucket);
    ObjectListing objectListing;

    do {
        objectListing = s3.listObjects(listObjectsRequest);
        for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) {
            ret.add(objectSummary.getKey());
        }
        listObjectsRequest.setMarker(objectListing.getNextMarker());
    } while (objectListing.isTruncated());

    return ret;
}

From source file:org.deeplearning4j.aws.s3.reader.S3Downloader.java

License:Apache License

/**
 * Paginates through a bucket's keys invoking the listener 
 * at each key/*from w ww .j a  v a  2s. co m*/
 * @param bucket the bucket to iterate
 * @param listener the listener
 */
public void paginate(String bucket, BucketKeyListener listener) {
    AmazonS3 s3 = getClient();
    ObjectListing list = s3.listObjects(bucket);
    for (S3ObjectSummary summary : list.getObjectSummaries()) {
        if (listener != null)
            listener.onKey(s3, bucket, summary.getKey());
    }

    while (list.isTruncated()) {
        list = s3.listNextBatchOfObjects(list);
        for (S3ObjectSummary summary : list.getObjectSummaries()) {
            if (listener != null)
                listener.onKey(s3, bucket, summary.getKey());
        }
    }

}

From source file:org.duracloud.s3storage.S3StorageProvider.java

License:Apache License

private List<S3ObjectSummary> listObjects(String bucketName, String prefix, long maxResults, String marker) {
    int numResults = new Long(maxResults).intValue();
    ListObjectsRequest request = new ListObjectsRequest(bucketName, prefix, marker, null, numResults);
    try {/* w  ww  .  j a  va  2  s .c  om*/
        ObjectListing objectListing = s3Client.listObjects(request);
        return objectListing.getObjectSummaries();
    } catch (AmazonClientException e) {
        String err = "Could not get contents of S3 bucket " + bucketName + " due to error: " + e.getMessage();
        throw new StorageException(err, e, RETRY);
    }
}

From source file:org.elasticsearch.cloud.aws.blobstore.AbstarctS3BlobContainer.java

License:Apache License

@Override
public ImmutableMap<String, BlobMetaData> listBlobsByPrefix(@Nullable String blobNamePrefix)
        throws IOException {
    ImmutableMap.Builder<String, BlobMetaData> blobsBuilder = ImmutableMap.builder();
    ObjectListing prevListing = null;//w w w  .  j a  v  a2  s .com
    while (true) {
        ObjectListing list;
        if (prevListing != null) {
            list = blobStore.client().listNextBatchOfObjects(prevListing);
        } else {
            if (blobNamePrefix != null) {
                list = blobStore.client().listObjects(blobStore.bucket(), buildKey(blobNamePrefix));
            } else {
                list = blobStore.client().listObjects(blobStore.bucket(), keyPath);
            }
        }
        for (S3ObjectSummary summary : list.getObjectSummaries()) {
            String name = summary.getKey().substring(keyPath.length());
            blobsBuilder.put(name, new PlainBlobMetaData(name, summary.getSize()));
        }
        if (list.isTruncated()) {
            prevListing = list;
        } else {
            break;
        }
    }
    return blobsBuilder.build();
}

From source file:org.elasticsearch.cloud.aws.blobstore.S3BlobStore.java

License:Apache License

@Override
public void delete(BlobPath path) {
    ObjectListing prevListing = null;/*from w  w  w.j  a  v  a2 s.c o m*/
    //From http://docs.amazonwebservices.com/AmazonS3/latest/dev/DeletingMultipleObjectsUsingJava.html
    //we can do at most 1K objects per delete
    //We don't know the bucket name until first object listing
    DeleteObjectsRequest multiObjectDeleteRequest = null;
    ArrayList<KeyVersion> keys = new ArrayList<KeyVersion>();
    while (true) {
        ObjectListing list;
        if (prevListing != null) {
            list = client.listNextBatchOfObjects(prevListing);
        } else {
            String keyPath = path.buildAsString("/");
            if (!keyPath.isEmpty()) {
                keyPath = keyPath + "/";
            }
            list = client.listObjects(bucket, keyPath);
            multiObjectDeleteRequest = new DeleteObjectsRequest(list.getBucketName());
        }
        for (S3ObjectSummary summary : list.getObjectSummaries()) {
            keys.add(new KeyVersion(summary.getKey()));
            //Every 500 objects batch the delete request
            if (keys.size() > 500) {
                multiObjectDeleteRequest.setKeys(keys);
                client.deleteObjects(multiObjectDeleteRequest);
                multiObjectDeleteRequest = new DeleteObjectsRequest(list.getBucketName());
                keys.clear();
            }
        }
        if (list.isTruncated()) {
            prevListing = list;
        } else {
            break;
        }
    }
    if (!keys.isEmpty()) {
        multiObjectDeleteRequest.setKeys(keys);
        client.deleteObjects(multiObjectDeleteRequest);
    }
}