List of usage examples for com.amazonaws.services.s3.model S3ObjectSummary getKey
public String getKey()
From source file:org.caboclo.clients.AmazonClient.java
License:Open Source License
public ArrayList<String> getAllChildren(String folderName) throws IOException { ListObjectsRequest listRequest = new ListObjectsRequest(); listRequest.setBucketName(getBucketName()); listRequest.setPrefix(folderName);//from ww w.j a va 2s . c o m ObjectListing listing = s3.listObjects(listRequest); ArrayList<String> list = new ArrayList<String>(); System.out.println(listing.getObjectSummaries().size()); for (S3ObjectSummary summ : listing.getObjectSummaries()) { list.add(summ.getKey()); } return list; }
From source file:org.caboclo.clients.AmazonClient.java
License:Open Source License
public List<String> getAllChildren(String folderName, String bucket) throws IOException { ListObjectsRequest listRequest = new ListObjectsRequest(); listRequest.setBucketName(bucket);// ww w. j a v a2s . c o m if (!(folderName == null || folderName.equals(""))) { listRequest.setPrefix(folderName); } ObjectListing listing = s3.listObjects(listRequest); ArrayList<String> list = new ArrayList<String>(); for (S3ObjectSummary summ : listing.getObjectSummaries()) { list.add(summ.getKey()); } return list; }
From source file:org.caboclo.clients.AmazonClient.java
License:Open Source License
public void deleteBucketContents(String bucket) { ObjectListing listing = s3.listObjects(bucket); System.out.println(listing.getObjectSummaries().size()); for (S3ObjectSummary summ : listing.getObjectSummaries()) { s3.deleteObject(bucket, summ.getKey()); }/* ww w . jav a 2 s .c o m*/ }
From source file:org.caboclo.clients.AmazonClient.java
License:Open Source License
public List<String> listBucket(String bkt, String prefix, String delimiter) throws IOException { ListObjectsRequest listRequest = new ListObjectsRequest(); listRequest.setBucketName(bkt);/*from ww w. ja v a 2s . c o m*/ listRequest.setDelimiter(delimiter); listRequest.setPrefix(prefix); ObjectListing listing = s3.listObjects(listRequest); ArrayList<String> list = new ArrayList<String>(); for (S3ObjectSummary summ : listing.getObjectSummaries()) { list.add(summ.getKey()); } return list; }
From source file:org.caboclo.clients.AmazonClient.java
License:Open Source License
@Override public List<RemoteFile> getChildren(String folderName) throws IOException { if (!folderName.endsWith("/")) { folderName = folderName + "/"; }/* w w w. j a v a 2s .com*/ ListObjectsRequest listRequest = new ListObjectsRequest(); listRequest.setBucketName(getBucketName()); listRequest.setDelimiter("/"); listRequest.setPrefix(folderName); ObjectListing listing = s3.listObjects(listRequest); ArrayList<RemoteFile> list = new ArrayList<>(); for (S3ObjectSummary summ : listing.getObjectSummaries()) { String name = summ.getKey(); long size = summ.getSize(); boolean isDirectory = isFolder(name); RemoteFile file = new RemoteFile(name, isDirectory, size); list.add(file); } return list; }
From source file:org.commoncrawl.service.parser.ec2.EC2ParserMaster.java
License:Open Source License
private boolean doScan(boolean initialScan) throws IOException { try {/*from w ww . j av a 2 s . co m*/ LOG.info("Scanner Thread Starting"); AmazonS3Client s3Client = new AmazonS3Client(new BasicAWSCredentials(s3AccessKeyId, s3SecretKey)); ObjectListing response = s3Client.listObjects(new ListObjectsRequest() .withBucketName("aws-publicdatasets").withPrefix(CC_BUCKET_ROOT + CC_CRAWLLOG_SOURCE)); do { LOG.info("Response Key Count:" + response.getObjectSummaries().size()); for (S3ObjectSummary entry : response.getObjectSummaries()) { Matcher matcher = crawlLogPattern.matcher(entry.getKey()); if (matcher.matches()) { ParseCandidate candidate = ParseCandidate.candidateFromBucketEntry(entry.getKey()); if (candidate == null) { LOG.error("Failed to Parse Candidate for:" + entry.getKey()); } else { LOG.info("Candidate is:" + candidate); synchronized (this) { if (_complete.contains(candidate._crawlLogName)) { LOG.info("Skipping completed Candidate:" + candidate); } else { if (!_candidates.containsEntry(candidate._timestamp, candidate) && !_active.containsKey(candidate)) { // update candidate size here ... candidate._size = entry.getSize(); LOG.info("New Candidate:" + candidate._crawlLogName + " Found"); _candidates.put(candidate._timestamp, candidate); } else { LOG.info("Skipping Existing Candidate:" + candidate._crawlLogName); } } } } } } if (response.isTruncated()) { response = s3Client.listNextBatchOfObjects(response); } else { break; } } while (!shutdownFlag.get()); if (initialScan) { // search for completions synchronized (this) { scanForCompletions(); } } return true; } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); return false; } }
From source file:org.commoncrawl.service.parser.ec2.EC2ParserMaster.java
License:Open Source License
public void scanForCompletions() throws IOException { AmazonS3Client s3Client = new AmazonS3Client(new BasicAWSCredentials(s3AccessKeyId, s3SecretKey)); ObjectListing response = s3Client.listObjects(new ListObjectsRequest().withBucketName("aws-publicdatasets") .withPrefix(CC_BUCKET_ROOT + CC_PARSER_INTERMEDIATE)); do {//from w w w . j a v a 2s. c o m LOG.info("Response Key Count:" + response.getObjectSummaries().size()); for (S3ObjectSummary entry : response.getObjectSummaries()) { Matcher matcher = doneFilePattern.matcher(entry.getKey()); if (matcher.matches()) { ParseCandidate candidate = ParseCandidate.candidateFromBucketEntry(entry.getKey()); if (candidate == null) { LOG.error("Failed to Parse Candidate for:" + entry.getKey()); } else { long partialTimestamp = Long.parseLong(matcher.group(2)); long position = Long.parseLong(matcher.group(3)); LOG.info("Found completion for Log:" + candidate._crawlLogName + " TS:" + partialTimestamp + " Pos:" + position); candidate._lastValidPos = position; // ok lookup existing entry if present ... ParseCandidate existingCandidate = Iterables.find(_candidates.get(candidate._timestamp), Predicates.equalTo(candidate)); // if existing candidate found if (existingCandidate != null) { LOG.info("Found existing candidate with last pos:" + existingCandidate._lastValidPos); if (candidate._lastValidPos > existingCandidate._lastValidPos) { existingCandidate._lastValidPos = candidate._lastValidPos; if (candidate._lastValidPos == candidate._size) { LOG.info("Found last pos == size for candidate:" + candidate._crawlLogName + ".REMOVING FROM ACTIVE - MOVING TO COMPLETE"); _candidates.remove(candidate._timestamp, candidate); _complete.add(candidate._crawlLogName); } } } else { LOG.info("Skipping Completion for CrawlLog:" + candidate._crawlLogName + " because existing candidate was not found."); } } } } if (response.isTruncated()) { response = s3Client.listNextBatchOfObjects(response); } else { break; } } while (true); }
From source file:org.commoncrawl.util.EC2MetadataTransferUtil.java
License:Open Source License
EC2MetadataTransferUtil(String bucketName, JsonArray pathList) throws IOException { _conf = new Configuration(); _fs = FileSystem.get(_conf);// www .j a v a 2 s .c om LOG.info("Initializing Downloader"); _downloader = new S3Downloader(bucketName, s3AccessKeyId, s3SecretKey, false); _downloader.setMaxParallelStreams(150); _downloader.initialize(this); LOG.info("Got JSON Array with:" + pathList.size() + " elements"); for (int i = 0; i < pathList.size(); ++i) { LOG.info("Collection metadata files from path:" + pathList.get(i).toString()); List<S3ObjectSummary> metadataFiles = getMetadataPaths(s3AccessKeyId, s3SecretKey, bucketName, pathList.get(i).getAsString()); LOG.info("Got:" + metadataFiles.size() + " total files"); for (S3ObjectSummary metadataFile : metadataFiles) { Matcher segmentNameMatcher = metadataInfoPattern.matcher(metadataFile.getKey()); if (segmentNameMatcher.matches()) { String segmentId = segmentNameMatcher.group(1); String partExtension = segmentNameMatcher.group(2); Path finalSegmentPath = new Path(finalSegmentOutputDir, segmentId); Path finalPath = new Path(finalSegmentPath, "metadata-" + partExtension); FileStatus fileStatus = _fs.getFileStatus(finalPath); if (fileStatus != null && fileStatus.getLen() != metadataFile.getSize()) { LOG.error("SRC-DEST SIZE MISMATCH!! SRC:" + metadataFile + " SRC-SIZE:" + metadataFile.getSize() + " DEST:" + finalPath + " DEST-SIZE:" + fileStatus.getLen()); // ok delete the destination _fs.delete(finalPath, false); // null file status so that the item gets requeued ... fileStatus = null; } if (fileStatus == null) { LOG.info("Queueing Item:" + metadataFile); ++_totalQueuedItemsCount; _downloader.fetchItem(metadataFile.getKey()); } else { LOG.info("Skipping Already Download Item:" + metadataFile + " Found at:" + finalPath); } } } } LOG.info("Waiting for shutdown event"); _downloader.waitForCompletion(); }
From source file:org.commoncrawl.util.S3BulkTransferUtil.java
License:Open Source License
S3BulkTransferUtil(String bucketName, String s3AccessKeyId, String s3SecretKey, JsonArray pathList, final Path outputPath) throws IOException { _conf = new Configuration(); _fs = FileSystem.get(_conf);//from www . ja v a 2 s . c om LOG.info("Initializing Downloader"); _downloader = new S3Downloader(bucketName, s3AccessKeyId, s3SecretKey, false); _downloader.setMaxParallelStreams(150); _downloader.initialize(this); LOG.info("Got JSON Array with:" + pathList.size() + " elements"); for (int i = 0; i < pathList.size(); ++i) { LOG.info("Collecting files from path:" + pathList.get(i).toString()); List<S3ObjectSummary> metadataFiles = getPaths(s3AccessKeyId, s3SecretKey, bucketName, pathList.get(i).getAsString()); LOG.info("Got:" + metadataFiles.size() + " total files"); for (S3ObjectSummary metadataFile : metadataFiles) { Path s3Path = new Path("/" + metadataFile.getKey()); Path finalPath = new Path(outputPath, s3Path.getName()); FileStatus fileStatus = null; try { fileStatus = _fs.getFileStatus(finalPath); } catch (Exception e) { } if (fileStatus != null && fileStatus.getLen() != metadataFile.getSize()) { LOG.error("SRC-DEST SIZE MISMATCH!! SRC:" + metadataFile + " SRC-SIZE:" + metadataFile.getSize() + " DEST:" + finalPath + " DEST-SIZE:" + fileStatus.getLen()); // ok delete the destination _fs.delete(finalPath, false); // null file status so that the item gets requeued ... fileStatus = null; } if (fileStatus == null) { LOG.info("Queueing Item:" + metadataFile); ++_totalQueuedItemsCount; _pathMapping.put(metadataFile.getKey(), finalPath); _downloader.fetchItem(metadataFile.getKey()); } else { LOG.info("Skipping Already Download Item:" + metadataFile + " Found at:" + finalPath); } } } LOG.info("Waiting for shutdown event"); _downloader.waitForCompletion(); }
From source file:org.cto.VVS3Box.S3Sample.java
License:Open Source License
public static void main(String[] args) throws IOException { /*/* w w w .ja va 2s .c o m*/ * This credentials provider implementation loads your AWS credentials * from a properties file at the root of your classpath. * * Important: Be sure to fill in your AWS access credentials in the * AwsCredentials.properties file before you try to run this * sample. * http://aws.amazon.com/security-credentials */ AmazonS3 s3 = new AmazonS3Client(new ClasspathPropertiesFileCredentialsProvider()); Region usWest2 = Region.getRegion(Regions.US_WEST_2); s3.setRegion(usWest2); String bucketName = "lior.test-" + UUID.randomUUID(); String key = "MyObjectKey"; System.out.println("==========================================="); System.out.println("Getting Started with Amazon S3"); System.out.println("===========================================\n"); try { /* * Create a new S3 bucket - Amazon S3 bucket names are globally unique, * so once a bucket name has been taken by any user, you can't create * another bucket with that same name. * * You can optionally specify a location for your bucket if you want to * keep your data closer to your applications or users. */ System.out.println("Creating bucket " + bucketName + "\n"); s3.createBucket(bucketName); /* * List the buckets in your account */ System.out.println("Listing buckets"); for (Bucket bucket : s3.listBuckets()) { System.out.println(" - " + bucket.getName()); } System.out.println(); /* * Upload an object to your bucket - You can easily upload a file to * S3, or upload directly an InputStream if you know the length of * the data in the stream. You can also specify your own metadata * when uploading to S3, which allows you set a variety of options * like content-type and content-encoding, plus additional metadata * specific to your applications. */ System.out.println("Uploading a new object to S3 from a file\n"); s3.putObject(new PutObjectRequest(bucketName, key, createSampleFile())); /* * Download an object - When you download an object, you get all of * the object's metadata and a stream from which to read the contents. * It's important to read the contents of the stream as quickly as * possibly since the data is streamed directly from Amazon S3 and your * network connection will remain open until you read all the data or * close the input stream. * * GetObjectRequest also supports several other options, including * conditional downloading of objects based on modification times, * ETags, and selectively downloading a range of an object. */ System.out.println("Downloading an object"); S3Object object = s3.getObject(new GetObjectRequest(bucketName, key)); System.out.println("Content-Type: " + object.getObjectMetadata().getContentType()); displayTextInputStream(object.getObjectContent()); /* * List objects in your bucket by prefix - There are many options for * listing the objects in your bucket. Keep in mind that buckets with * many objects might truncate their results when listing their objects, * so be sure to check if the returned object listing is truncated, and * use the AmazonS3.listNextBatchOfObjects(...) operation to retrieve * additional results. */ System.out.println("Listing objects"); ObjectListing objectListing = s3 .listObjects(new ListObjectsRequest().withBucketName(bucketName).withPrefix("My")); for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) { System.out.println( " - " + objectSummary.getKey() + " " + "(size = " + objectSummary.getSize() + ")"); } System.out.println(); /* * Delete an object - Unless versioning has been turned on for your bucket, * there is no way to undelete an object, so use caution when deleting objects. */ System.out.println("Deleting an object\n"); s3.deleteObject(bucketName, key); /* * Delete a bucket - A bucket must be completely empty before it can be * deleted, so remember to delete any objects from your buckets before * you try to delete them. */ System.out.println("Deleting bucket " + bucketName + "\n"); s3.deleteBucket(bucketName); } catch (AmazonServiceException ase) { System.out.println("Caught an AmazonServiceException, which means your request made it " + "to Amazon S3, but was rejected with an error response for some reason."); System.out.println("Error Message: " + ase.getMessage()); System.out.println("HTTP Status Code: " + ase.getStatusCode()); System.out.println("AWS Error Code: " + ase.getErrorCode()); System.out.println("Error Type: " + ase.getErrorType()); System.out.println("Request ID: " + ase.getRequestId()); } catch (AmazonClientException ace) { System.out.println("Caught an AmazonClientException, which means the client encountered " + "a serious internal problem while trying to communicate with S3, " + "such as not being able to access the network."); System.out.println("Error Message: " + ace.getMessage()); } }