List of usage examples for com.amazonaws.services.s3.model ObjectListing getObjectSummaries
public List<S3ObjectSummary> getObjectSummaries()
From source file:org.commoncrawl.service.parser.ec2.EC2ParserMaster.java
License:Open Source License
private boolean doScan(boolean initialScan) throws IOException { try {/*w ww . ja v a 2 s.c o m*/ LOG.info("Scanner Thread Starting"); AmazonS3Client s3Client = new AmazonS3Client(new BasicAWSCredentials(s3AccessKeyId, s3SecretKey)); ObjectListing response = s3Client.listObjects(new ListObjectsRequest() .withBucketName("aws-publicdatasets").withPrefix(CC_BUCKET_ROOT + CC_CRAWLLOG_SOURCE)); do { LOG.info("Response Key Count:" + response.getObjectSummaries().size()); for (S3ObjectSummary entry : response.getObjectSummaries()) { Matcher matcher = crawlLogPattern.matcher(entry.getKey()); if (matcher.matches()) { ParseCandidate candidate = ParseCandidate.candidateFromBucketEntry(entry.getKey()); if (candidate == null) { LOG.error("Failed to Parse Candidate for:" + entry.getKey()); } else { LOG.info("Candidate is:" + candidate); synchronized (this) { if (_complete.contains(candidate._crawlLogName)) { LOG.info("Skipping completed Candidate:" + candidate); } else { if (!_candidates.containsEntry(candidate._timestamp, candidate) && !_active.containsKey(candidate)) { // update candidate size here ... candidate._size = entry.getSize(); LOG.info("New Candidate:" + candidate._crawlLogName + " Found"); _candidates.put(candidate._timestamp, candidate); } else { LOG.info("Skipping Existing Candidate:" + candidate._crawlLogName); } } } } } } if (response.isTruncated()) { response = s3Client.listNextBatchOfObjects(response); } else { break; } } while (!shutdownFlag.get()); if (initialScan) { // search for completions synchronized (this) { scanForCompletions(); } } return true; } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); return false; } }
From source file:org.commoncrawl.service.parser.ec2.EC2ParserMaster.java
License:Open Source License
public void scanForCompletions() throws IOException { AmazonS3Client s3Client = new AmazonS3Client(new BasicAWSCredentials(s3AccessKeyId, s3SecretKey)); ObjectListing response = s3Client.listObjects(new ListObjectsRequest().withBucketName("aws-publicdatasets") .withPrefix(CC_BUCKET_ROOT + CC_PARSER_INTERMEDIATE)); do {/*from w ww. j a va 2s .c om*/ LOG.info("Response Key Count:" + response.getObjectSummaries().size()); for (S3ObjectSummary entry : response.getObjectSummaries()) { Matcher matcher = doneFilePattern.matcher(entry.getKey()); if (matcher.matches()) { ParseCandidate candidate = ParseCandidate.candidateFromBucketEntry(entry.getKey()); if (candidate == null) { LOG.error("Failed to Parse Candidate for:" + entry.getKey()); } else { long partialTimestamp = Long.parseLong(matcher.group(2)); long position = Long.parseLong(matcher.group(3)); LOG.info("Found completion for Log:" + candidate._crawlLogName + " TS:" + partialTimestamp + " Pos:" + position); candidate._lastValidPos = position; // ok lookup existing entry if present ... ParseCandidate existingCandidate = Iterables.find(_candidates.get(candidate._timestamp), Predicates.equalTo(candidate)); // if existing candidate found if (existingCandidate != null) { LOG.info("Found existing candidate with last pos:" + existingCandidate._lastValidPos); if (candidate._lastValidPos > existingCandidate._lastValidPos) { existingCandidate._lastValidPos = candidate._lastValidPos; if (candidate._lastValidPos == candidate._size) { LOG.info("Found last pos == size for candidate:" + candidate._crawlLogName + ".REMOVING FROM ACTIVE - MOVING TO COMPLETE"); _candidates.remove(candidate._timestamp, candidate); _complete.add(candidate._crawlLogName); } } } else { LOG.info("Skipping Completion for CrawlLog:" + candidate._crawlLogName + " because existing candidate was not found."); } } } } if (response.isTruncated()) { response = s3Client.listNextBatchOfObjects(response); } else { break; } } while (true); }
From source file:org.commoncrawl.util.EC2MetadataTransferUtil.java
License:Open Source License
public static List<S3ObjectSummary> getMetadataPaths(String s3AccessKeyId, String s3SecretKey, String bucketName, String segmentPath) throws IOException { AmazonS3Client s3Client = new AmazonS3Client(new BasicAWSCredentials(s3AccessKeyId, s3SecretKey)); ImmutableList.Builder<S3ObjectSummary> listBuilder = new ImmutableList.Builder<S3ObjectSummary>(); String metadataFilterKey = segmentPath + "metadata-"; LOG.info("Prefix Search Key is:" + metadataFilterKey); ObjectListing response = s3Client .listObjects(new ListObjectsRequest().withBucketName(bucketName).withPrefix(metadataFilterKey)); do {//from w w w.ja v a 2 s. c om LOG.info("Response Key Count:" + response.getObjectSummaries().size()); for (S3ObjectSummary entry : response.getObjectSummaries()) { listBuilder.add(entry); } if (response.isTruncated()) { response = s3Client.listNextBatchOfObjects(response); } else { break; } } while (true); return listBuilder.build(); }
From source file:org.commoncrawl.util.S3BulkTransferUtil.java
License:Open Source License
public static List<S3ObjectSummary> getPaths(String s3AccessKeyId, String s3SecretKey, String bucketName, String segmentPath) throws IOException { AmazonS3Client s3Client = new AmazonS3Client(new BasicAWSCredentials(s3AccessKeyId, s3SecretKey)); ImmutableList.Builder<S3ObjectSummary> listBuilder = new ImmutableList.Builder<S3ObjectSummary>(); ObjectListing response = s3Client .listObjects(new ListObjectsRequest().withBucketName(bucketName).withPrefix(segmentPath)); do {/*w ww .j av a 2 s .c om*/ LOG.info("Response Key Count:" + response.getObjectSummaries().size()); for (S3ObjectSummary entry : response.getObjectSummaries()) { listBuilder.add(entry); } if (response.isTruncated()) { response = s3Client.listNextBatchOfObjects(response); } else { break; } } while (true); return listBuilder.build(); }
From source file:org.cto.VVS3Box.S3Sample.java
License:Open Source License
public static void main(String[] args) throws IOException { /*/*from w ww.j a va2 s . c om*/ * This credentials provider implementation loads your AWS credentials * from a properties file at the root of your classpath. * * Important: Be sure to fill in your AWS access credentials in the * AwsCredentials.properties file before you try to run this * sample. * http://aws.amazon.com/security-credentials */ AmazonS3 s3 = new AmazonS3Client(new ClasspathPropertiesFileCredentialsProvider()); Region usWest2 = Region.getRegion(Regions.US_WEST_2); s3.setRegion(usWest2); String bucketName = "lior.test-" + UUID.randomUUID(); String key = "MyObjectKey"; System.out.println("==========================================="); System.out.println("Getting Started with Amazon S3"); System.out.println("===========================================\n"); try { /* * Create a new S3 bucket - Amazon S3 bucket names are globally unique, * so once a bucket name has been taken by any user, you can't create * another bucket with that same name. * * You can optionally specify a location for your bucket if you want to * keep your data closer to your applications or users. */ System.out.println("Creating bucket " + bucketName + "\n"); s3.createBucket(bucketName); /* * List the buckets in your account */ System.out.println("Listing buckets"); for (Bucket bucket : s3.listBuckets()) { System.out.println(" - " + bucket.getName()); } System.out.println(); /* * Upload an object to your bucket - You can easily upload a file to * S3, or upload directly an InputStream if you know the length of * the data in the stream. You can also specify your own metadata * when uploading to S3, which allows you set a variety of options * like content-type and content-encoding, plus additional metadata * specific to your applications. */ System.out.println("Uploading a new object to S3 from a file\n"); s3.putObject(new PutObjectRequest(bucketName, key, createSampleFile())); /* * Download an object - When you download an object, you get all of * the object's metadata and a stream from which to read the contents. * It's important to read the contents of the stream as quickly as * possibly since the data is streamed directly from Amazon S3 and your * network connection will remain open until you read all the data or * close the input stream. * * GetObjectRequest also supports several other options, including * conditional downloading of objects based on modification times, * ETags, and selectively downloading a range of an object. */ System.out.println("Downloading an object"); S3Object object = s3.getObject(new GetObjectRequest(bucketName, key)); System.out.println("Content-Type: " + object.getObjectMetadata().getContentType()); displayTextInputStream(object.getObjectContent()); /* * List objects in your bucket by prefix - There are many options for * listing the objects in your bucket. Keep in mind that buckets with * many objects might truncate their results when listing their objects, * so be sure to check if the returned object listing is truncated, and * use the AmazonS3.listNextBatchOfObjects(...) operation to retrieve * additional results. */ System.out.println("Listing objects"); ObjectListing objectListing = s3 .listObjects(new ListObjectsRequest().withBucketName(bucketName).withPrefix("My")); for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) { System.out.println( " - " + objectSummary.getKey() + " " + "(size = " + objectSummary.getSize() + ")"); } System.out.println(); /* * Delete an object - Unless versioning has been turned on for your bucket, * there is no way to undelete an object, so use caution when deleting objects. */ System.out.println("Deleting an object\n"); s3.deleteObject(bucketName, key); /* * Delete a bucket - A bucket must be completely empty before it can be * deleted, so remember to delete any objects from your buckets before * you try to delete them. */ System.out.println("Deleting bucket " + bucketName + "\n"); s3.deleteBucket(bucketName); } catch (AmazonServiceException ase) { System.out.println("Caught an AmazonServiceException, which means your request made it " + "to Amazon S3, but was rejected with an error response for some reason."); System.out.println("Error Message: " + ase.getMessage()); System.out.println("HTTP Status Code: " + ase.getStatusCode()); System.out.println("AWS Error Code: " + ase.getErrorCode()); System.out.println("Error Type: " + ase.getErrorType()); System.out.println("Request ID: " + ase.getRequestId()); } catch (AmazonClientException ace) { System.out.println("Caught an AmazonClientException, which means the client encountered " + "a serious internal problem while trying to communicate with S3, " + "such as not being able to access the network."); System.out.println("Error Message: " + ace.getMessage()); } }
From source file:org.deeplearning4j.aws.s3.reader.S3Downloader.java
License:Apache License
/** * Return the keys for a bucket//from w w w . j av a 2 s.c om * @param bucket the bucket to get the keys for * @return the bucket's keys */ public List<String> keysForBucket(String bucket) { AmazonS3 s3 = getClient(); List<String> ret = new ArrayList<>(); ListObjectsRequest listObjectsRequest = new ListObjectsRequest().withBucketName(bucket); ObjectListing objectListing; do { objectListing = s3.listObjects(listObjectsRequest); for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) { ret.add(objectSummary.getKey()); } listObjectsRequest.setMarker(objectListing.getNextMarker()); } while (objectListing.isTruncated()); return ret; }
From source file:org.deeplearning4j.aws.s3.reader.S3Downloader.java
License:Apache License
/** * Paginates through a bucket's keys invoking the listener * at each key/*from w ww .j a v a 2s. co m*/ * @param bucket the bucket to iterate * @param listener the listener */ public void paginate(String bucket, BucketKeyListener listener) { AmazonS3 s3 = getClient(); ObjectListing list = s3.listObjects(bucket); for (S3ObjectSummary summary : list.getObjectSummaries()) { if (listener != null) listener.onKey(s3, bucket, summary.getKey()); } while (list.isTruncated()) { list = s3.listNextBatchOfObjects(list); for (S3ObjectSummary summary : list.getObjectSummaries()) { if (listener != null) listener.onKey(s3, bucket, summary.getKey()); } } }
From source file:org.duracloud.s3storage.S3StorageProvider.java
License:Apache License
private List<S3ObjectSummary> listObjects(String bucketName, String prefix, long maxResults, String marker) { int numResults = new Long(maxResults).intValue(); ListObjectsRequest request = new ListObjectsRequest(bucketName, prefix, marker, null, numResults); try {/* w ww . j a va 2 s .c om*/ ObjectListing objectListing = s3Client.listObjects(request); return objectListing.getObjectSummaries(); } catch (AmazonClientException e) { String err = "Could not get contents of S3 bucket " + bucketName + " due to error: " + e.getMessage(); throw new StorageException(err, e, RETRY); } }
From source file:org.elasticsearch.cloud.aws.blobstore.AbstarctS3BlobContainer.java
License:Apache License
@Override public ImmutableMap<String, BlobMetaData> listBlobsByPrefix(@Nullable String blobNamePrefix) throws IOException { ImmutableMap.Builder<String, BlobMetaData> blobsBuilder = ImmutableMap.builder(); ObjectListing prevListing = null;//w w w . j a v a2 s .com while (true) { ObjectListing list; if (prevListing != null) { list = blobStore.client().listNextBatchOfObjects(prevListing); } else { if (blobNamePrefix != null) { list = blobStore.client().listObjects(blobStore.bucket(), buildKey(blobNamePrefix)); } else { list = blobStore.client().listObjects(blobStore.bucket(), keyPath); } } for (S3ObjectSummary summary : list.getObjectSummaries()) { String name = summary.getKey().substring(keyPath.length()); blobsBuilder.put(name, new PlainBlobMetaData(name, summary.getSize())); } if (list.isTruncated()) { prevListing = list; } else { break; } } return blobsBuilder.build(); }
From source file:org.elasticsearch.cloud.aws.blobstore.S3BlobStore.java
License:Apache License
@Override public void delete(BlobPath path) { ObjectListing prevListing = null;/*from w w w.j a v a2 s.c o m*/ //From http://docs.amazonwebservices.com/AmazonS3/latest/dev/DeletingMultipleObjectsUsingJava.html //we can do at most 1K objects per delete //We don't know the bucket name until first object listing DeleteObjectsRequest multiObjectDeleteRequest = null; ArrayList<KeyVersion> keys = new ArrayList<KeyVersion>(); while (true) { ObjectListing list; if (prevListing != null) { list = client.listNextBatchOfObjects(prevListing); } else { String keyPath = path.buildAsString("/"); if (!keyPath.isEmpty()) { keyPath = keyPath + "/"; } list = client.listObjects(bucket, keyPath); multiObjectDeleteRequest = new DeleteObjectsRequest(list.getBucketName()); } for (S3ObjectSummary summary : list.getObjectSummaries()) { keys.add(new KeyVersion(summary.getKey())); //Every 500 objects batch the delete request if (keys.size() > 500) { multiObjectDeleteRequest.setKeys(keys); client.deleteObjects(multiObjectDeleteRequest); multiObjectDeleteRequest = new DeleteObjectsRequest(list.getBucketName()); keys.clear(); } } if (list.isTruncated()) { prevListing = list; } else { break; } } if (!keys.isEmpty()) { multiObjectDeleteRequest.setKeys(keys); client.deleteObjects(multiObjectDeleteRequest); } }