List of usage examples for com.amazonaws.services.s3.model S3ObjectSummary getKey
public String getKey()
From source file:com.streamsets.pipeline.stage.origin.s3.AmazonS3Runnable.java
License:Apache License
private void sendLineageEvent(S3ObjectSummary s3Object) { LineageEvent event = context.createLineageEvent(LineageEventType.ENTITY_READ); event.setSpecificAttribute(LineageSpecificAttribute.ENDPOINT_TYPE, EndPointType.S3.name()); event.setSpecificAttribute(LineageSpecificAttribute.ENTITY_NAME, s3Object.getKey()); event.setSpecificAttribute(LineageSpecificAttribute.DESCRIPTION, s3ConfigBean.s3Config.bucket); context.publishLineageEvent(event);// ww w . j a v a 2 s . c o m }
From source file:com.streamsets.pipeline.stage.origin.s3.AmazonS3Source.java
License:Apache License
private S3Offset fetchNextObjectFromSpooler(S3Offset s3Offset) throws StageException { setCurrentObject(null);//from w w w . ja va 2s . co m try { //The next object found in queue is mostly eligible since we process objects in chronological order. //However after processing a few files, if the configuration is changed [say relax the prefix] and an older file //gets selected for processing, it must be ignored. S3ObjectSummary nextAvailObj = null; do { if (nextAvailObj != null) { LOG.warn("Ignoring object '{}' in spool directory as is lesser than offset object '{}'", nextAvailObj.getKey(), s3Offset.getKey()); } nextAvailObj = spooler.poolForObject(s3Offset, s3ConfigBean.basicConfig.maxWaitTime, TimeUnit.MILLISECONDS); } while (!isEligible(nextAvailObj, s3Offset)); if (nextAvailObj == null) { // no object to process LOG.debug("No new object available in spool directory after '{}' secs, producing empty batch", s3ConfigBean.basicConfig.maxWaitTime / 1000); } else { setCurrentObject(nextAvailObj); // if the current offset object is null or the object returned by the spooler is greater than the current offset // object we take the object returned by the spooler as the new object and set the offset to zero. // if not, it means the spooler returned us the current object, we just keep processing it from the last // offset we processed (known via offset tracking) if (s3Offset.getKey() == null || isLaterThan(nextAvailObj.getKey(), nextAvailObj.getLastModified().getTime(), s3Offset.getKey(), Long.parseLong(s3Offset.getTimestamp()))) { s3Offset = new S3Offset(getCurrentObject().getKey(), ZERO, getCurrentObject().getETag(), String.valueOf(getCurrentObject().getLastModified().getTime())); } } } catch (InterruptedException ex) { // the spooler was interrupted while waiting for an object, we log and return, the pipeline agent will invoke us // again to wait for an object again LOG.warn("Pooling interrupted"); } catch (AmazonClientException e) { throw new StageException(Errors.S3_SPOOLDIR_23, e.toString()); } return s3Offset; }
From source file:com.streamsets.pipeline.stage.origin.s3.AmazonS3Source.java
License:Apache License
public String produce(S3ObjectSummary s3Object, String offset, int maxBatchSize, BatchMaker batchMaker) throws StageException, BadSpoolObjectException { try {//from w w w. j av a 2s . com if (parser == null) { //Get S3 object instead of stream because we want to call close on the object when we close the // parser (and stream) if (getContext().isPreview()) { long fetchSize = s3Object.getSize() > DEFAULT_FETCH_SIZE ? DEFAULT_FETCH_SIZE : s3Object.getSize(); if (fetchSize > 0) { object = AmazonS3Util.getObjectRange(s3ConfigBean.s3Config.getS3Client(), s3ConfigBean.s3Config.bucket, s3Object.getKey(), fetchSize); } else { LOG.warn("Size of object with key '{}' is 0", s3Object.getKey()); object = AmazonS3Util.getObject(s3ConfigBean.s3Config.getS3Client(), s3ConfigBean.s3Config.bucket, s3Object.getKey()); } } else { object = AmazonS3Util.getObject(s3ConfigBean.s3Config.getS3Client(), s3ConfigBean.s3Config.bucket, s3Object.getKey()); } String recordId = s3ConfigBean.s3Config.bucket + s3ConfigBean.s3Config.delimiter + s3Object.getKey(); parser = s3ConfigBean.dataFormatConfig.getParserFactory().getParser(recordId, object.getObjectContent(), Long.parseLong(offset)); //we don't use S3 GetObject range capabilities to skip the already process offset because the parsers cannot // pick up from a non root doc depth in the case of a single object with records. } for (int i = 0; i < maxBatchSize; i++) { try { Record record = parser.parse(); if (record != null) { batchMaker.addRecord(record); offset = parser.getOffset(); } else { parser.close(); parser = null; object.close(); object = null; offset = MINUS_ONE; break; } } catch (ObjectLengthException ex) { String exOffset = offset; offset = MINUS_ONE; switch (getContext().getOnErrorRecord()) { case DISCARD: break; case TO_ERROR: getContext().reportError(Errors.S3_SPOOLDIR_02, s3Object.getKey(), exOffset); break; case STOP_PIPELINE: throw new StageException(Errors.S3_SPOOLDIR_02, s3Object.getKey(), exOffset); default: throw new IllegalStateException(Utils.format("It should never happen. OnError '{}'", getContext().getOnErrorRecord(), ex)); } } } } catch (AmazonClientException e) { LOG.error("Error processing object with key '{}' offset '{}'", s3Object.getKey(), offset); throw new StageException(Errors.S3_SPOOLDIR_25, e.toString()); } catch (IOException | DataParserException ex) { if (ex.getCause() instanceof AbortedException) { //If the pipeline was stopped, the amazon s3 client thread catches the interrupt and throws aborted exception //do not treat this as an error. Instead produce what ever you have and move one. } else { offset = MINUS_ONE; String exOffset; if (ex instanceof OverrunException) { exOffset = String.valueOf(((OverrunException) ex).getStreamOffset()); } else { try { exOffset = (parser != null) ? parser.getOffset() : MINUS_ONE; } catch (IOException ex1) { LOG.warn("Could not get the object offset to report with error, reason: {}", ex1.toString(), ex); exOffset = MINUS_ONE; } } switch (getContext().getOnErrorRecord()) { case DISCARD: break; case TO_ERROR: throw new BadSpoolObjectException(s3Object.getKey(), exOffset, ex); case STOP_PIPELINE: getContext().reportError(Errors.S3_SPOOLDIR_03, s3Object.getKey(), exOffset, ex.toString()); throw new StageException(Errors.S3_SPOOLDIR_03, s3Object.getKey(), exOffset, ex.toString(), ex); default: throw new IllegalStateException(Utils.format("It should never happen. OnError '{}'", getContext().getOnErrorRecord(), ex)); } } } finally { if (MINUS_ONE.equals(offset)) { if (parser != null) { try { parser.close(); parser = null; } catch (IOException ex) { LOG.debug("Exception while closing parser : '{}'", ex.toString()); } } if (object != null) { try { object.close(); object = null; } catch (IOException ex) { LOG.debug("Exception while closing S3 object : '{}'", ex.toString()); } } } } return offset; }
From source file:com.streamsets.pipeline.stage.origin.s3.AmazonS3Util.java
License:Apache License
/** * Lists objects from AmazonS3 in chronological order [lexicographical order if 2 files have same timestamp] which are * later than or equal to the timestamp of the previous offset object * * @param s3Client//from w ww. ja v a2s .c o m * @param s3ConfigBean * @param pathMatcher glob patterns to match file name against * @param s3Offset current offset which provides the timestamp of the previous object * @param fetchSize number of objects to fetch in one go * @return * @throws AmazonClientException */ static List<S3ObjectSummary> listObjectsChronologically(AmazonS3Client s3Client, S3ConfigBean s3ConfigBean, PathMatcher pathMatcher, AmazonS3Source.S3Offset s3Offset, int fetchSize) throws AmazonClientException { //Algorithm: // - Full scan all objects that match the file name pattern and which are later than the file in the offset // - Select the oldest "fetchSize" number of files and return them. TreeSet<S3ObjectSummary> treeSet = new TreeSet<>(new Comparator<S3ObjectSummary>() { @Override public int compare(S3ObjectSummary o1, S3ObjectSummary o2) { int result = o1.getLastModified().compareTo(o2.getLastModified()); if (result != 0) { //same modified time. Use name to sort return result; } return o1.getKey().compareTo(o2.getKey()); } }); S3Objects s3ObjectSummaries = S3Objects .withPrefix(s3Client, s3ConfigBean.s3Config.bucket, s3ConfigBean.s3Config.folder) .withBatchSize(BATCH_SIZE); for (S3ObjectSummary s : s3ObjectSummaries) { String fileName = s.getKey().substring(s3ConfigBean.s3Config.folder.length(), s.getKey().length()); if (!fileName.isEmpty()) { //fileName can be empty. //If the user manually creates a folder "myFolder/mySubFolder" in bucket "myBucket" and uploads "myObject", // then the first objects returned here are: // myFolder/mySubFolder // myFolder/mySubFolder/myObject // // All is good when pipeline is run but preview returns with no data. So we should ignore the empty file as it // has no data if (pathMatcher.matches(Paths.get(fileName)) && isEligible(s, s3Offset)) { treeSet.add(s); } if (treeSet.size() > fetchSize) { treeSet.pollLast(); } } } return new ArrayList<>(treeSet); }
From source file:com.streamsets.pipeline.stage.origin.s3.AmazonS3Util.java
License:Apache License
private static boolean isEligible(S3ObjectSummary s, AmazonS3Source.S3Offset s3Offset) { //The object is eligible if //1. The timestamp is greater than that of the current object in offset //2. The timestamp is same but the name is lexicographically greater than the current object [can happen when multiple objects are uploaded in one go] //3. Same timestamp, same name [same as the current object in offset], eligible if it was not completely processed [offset != -1] boolean isEligible = false; if (s.getLastModified().compareTo(new Date(Long.parseLong(s3Offset.getTimestamp()))) > 0) { isEligible = true;/*ww w . ja v a 2 s . co m*/ } else if (s.getLastModified().compareTo(new Date(Long.parseLong(s3Offset.getTimestamp()))) == 0) { //same timestamp //compare names if (s.getKey().compareTo(s3Offset.getKey()) > 0) { isEligible = true; } else if (s.getKey().compareTo(s3Offset.getKey()) == 0) { //same time stamp, same name //If the current offset is not -1, return the file. It means the previous file was partially processed. if (Long.parseLong(s3Offset.getOffset()) != -1) { isEligible = true; } } } return isEligible; }
From source file:com.streamsets.pipeline.stage.origin.s3.AmazonS3Util.java
License:Apache License
static S3ObjectSummary getObjectSummary(AmazonS3Client s3Client, String bucket, String objectKey) { S3ObjectSummary s3ObjectSummary = null; S3Objects s3ObjectSummaries = S3Objects.withPrefix(s3Client, bucket, objectKey); for (S3ObjectSummary s : s3ObjectSummaries) { if (s.getKey().equals(objectKey)) { s3ObjectSummary = s;/*from w w w .j a v a2s . c o m*/ break; } } return s3ObjectSummary; }
From source file:com.streamsets.pipeline.stage.origin.s3.S3Spooler.java
License:Apache License
void addObjectToQueue(S3ObjectSummary objectSummary, boolean checkCurrent) { Preconditions.checkNotNull(objectSummary, "file cannot be null"); if (checkCurrent) { Preconditions.checkState(currentObject == null || currentObject.getLastModified().compareTo(objectSummary.getLastModified()) < 0); }//from w w w . j a v a2 s . c om if (!objectQueue.contains(objectSummary)) { if (objectQueue.size() >= MAX_SPOOL_SIZE) { LOG.warn("Exceeded '{}' of queued files", objectQueue.size()); } objectQueue.add(objectSummary); spoolQueueMeter.mark(objectQueue.size()); } else { LOG.warn("Object '{}' already in queue, ignoring", objectSummary.getKey()); } }
From source file:com.supprema.utils.S3Sample.java
License:Open Source License
public static void main(String[] args) throws IOException { /*/* www . j av a 2 s .co m*/ * The ProfileCredentialsProvider will return your [fabiano-user-s3] * credential profile by reading from the credentials file located at * (/Users/fabianorodriguesmatias/.aws/credentials). */ AWSCredentials credentials = null; try { credentials = new ProfileCredentialsProvider("fabiano-user-s3").getCredentials(); } catch (Exception e) { throw new AmazonClientException( "Cannot load the credentials from the credential profiles file. " + "Please make sure that your credentials file is at the correct " + "location (/Users/fabianorodriguesmatias/.aws/credentials), and is in valid format.", e); } AmazonS3 s3 = new AmazonS3Client(credentials); Region usWest2 = Region.getRegion(Regions.US_WEST_2); s3.setRegion(usWest2); String bucketName = "my-first-s3-bucket-" + UUID.randomUUID(); String key = "MyObjectKey"; System.out.println("==========================================="); System.out.println("Getting Started with Amazon S3"); System.out.println("===========================================\n"); try { /* * Create a new S3 bucket - Amazon S3 bucket names are globally unique, * so once a bucket name has been taken by any user, you can't create * another bucket with that same name. * * You can optionally specify a location for your bucket if you want to * keep your data closer to your applications or users. */ System.out.println("Creating bucket " + bucketName + "\n"); s3.createBucket(bucketName); /* * List the buckets in your account */ System.out.println("Listing buckets"); for (Bucket bucket : s3.listBuckets()) { System.out.println(" - " + bucket.getName()); } System.out.println(); /* * Upload an object to your bucket - You can easily upload a file to * S3, or upload directly an InputStream if you know the length of * the data in the stream. You can also specify your own metadata * when uploading to S3, which allows you set a variety of options * like content-type and content-encoding, plus additional metadata * specific to your applications. */ System.out.println("Uploading a new object to S3 from a file\n"); s3.putObject(new PutObjectRequest(bucketName, key, createSampleFile())); /* * Download an object - When you download an object, you get all of * the object's metadata and a stream from which to read the contents. * It's important to read the contents of the stream as quickly as * possibly since the data is streamed directly from Amazon S3 and your * network connection will remain open until you read all the data or * close the input stream. * * GetObjectRequest also supports several other options, including * conditional downloading of objects based on modification times, * ETags, and selectively downloading a range of an object. */ System.out.println("Downloading an object"); S3Object object = s3.getObject(new GetObjectRequest(bucketName, key)); System.out.println("Content-Type: " + object.getObjectMetadata().getContentType()); displayTextInputStream(object.getObjectContent()); /* * List objects in your bucket by prefix - There are many options for * listing the objects in your bucket. Keep in mind that buckets with * many objects might truncate their results when listing their objects, * so be sure to check if the returned object listing is truncated, and * use the AmazonS3.listNextBatchOfObjects(...) operation to retrieve * additional results. */ System.out.println("Listing objects"); ObjectListing objectListing = s3 .listObjects(new ListObjectsRequest().withBucketName(bucketName).withPrefix("My")); for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) { System.out.println( " - " + objectSummary.getKey() + " " + "(size = " + objectSummary.getSize() + ")"); } System.out.println(); /* * Delete an object - Unless versioning has been turned on for your bucket, * there is no way to undelete an object, so use caution when deleting objects. */ System.out.println("Deleting an object\n"); s3.deleteObject(bucketName, key); /* * Delete a bucket - A bucket must be completely empty before it can be * deleted, so remember to delete any objects from your buckets before * you try to delete them. */ System.out.println("Deleting bucket " + bucketName + "\n"); s3.deleteBucket(bucketName); } catch (AmazonServiceException ase) { System.out.println("Caught an AmazonServiceException, which means your request made it " + "to Amazon S3, but was rejected with an error response for some reason."); System.out.println("Error Message: " + ase.getMessage()); System.out.println("HTTP Status Code: " + ase.getStatusCode()); System.out.println("AWS Error Code: " + ase.getErrorCode()); System.out.println("Error Type: " + ase.getErrorType()); System.out.println("Request ID: " + ase.getRequestId()); } catch (AmazonClientException ace) { System.out.println("Caught an AmazonClientException, which means the client encountered " + "a serious internal problem while trying to communicate with S3, " + "such as not being able to access the network."); System.out.println("Error Message: " + ace.getMessage()); } }
From source file:com.tfnsnproject.util.S3StorageManager.java
License:Open Source License
/** * Deletes the specified S3 object from the S3 storage service. If a * storage path is passed in that has child S3 objects, it will recursively * delete the underlying objects./*from w ww.j a v a2s .c o m*/ * * @param s3Store the s3 object to be deleted */ public void delete(S3StorageObject s3Store) { if (s3Store.getStoragePath() == null || s3Store.getStoragePath().equals("")) { logger.log(Level.WARNING, "Empty storage path passed to delete method"); return; // We don't want to delete everything in a path } // Go through the store structure and delete child objects ObjectListing listing = s3client.listObjects(s3Store.getBucketName(), s3Store.getStoragePath()); while (true) { List<S3ObjectSummary> objectList = listing.getObjectSummaries(); for (S3ObjectSummary summary : objectList) { s3client.deleteObject(s3Store.getBucketName(), summary.getKey()); } if (listing.isTruncated()) { listing = s3client.listNextBatchOfObjects(listing); } else { break; } } }
From source file:com.topera.epoch.service.S3Util.java
License:Open Source License
public static void main(String[] args) throws IOException { /*//w ww .ja va 2 s. co m * Create your credentials file at ~/.aws/credentials (C:\Users\USER_NAME\.aws\credentials for Windows users) * and save the following lines after replacing the underlined values with your own. * * [default] * aws_access_key_id = YOUR_ACCESS_KEY_ID * aws_secret_access_key = YOUR_SECRET_ACCESS_KEY */ AWSCredentials creds = new AWSCredentials() { public String getAWSSecretKey() { // TODO Auto-generated method stub return "5VVtmI7vcecuVbw8JsG4uo2O1/9RwwLHrTT01Itz"; } public String getAWSAccessKeyId() { // TODO Auto-generated method stub return "AKIAJCMYALI46A2DIPRQ"; } }; AmazonS3 s3 = new AmazonS3Client(creds); Region usWest2 = Region.getRegion(Regions.US_WEST_2); s3.setRegion(usWest2); String bucketName = "my-first-s3-bucket-" + UUID.randomUUID(); String key = "MyObjectKey"; System.out.println("==========================================="); System.out.println("Getting Started with Amazon S3"); System.out.println("===========================================\n"); try { /* * Create a new S3 bucket - Amazon S3 bucket names are globally unique, * so once a bucket name has been taken by any user, you can't create * another bucket with that same name. * * You can optionally specify a location for your bucket if you want to * keep your data closer to your applications or users. */ System.out.println("Creating bucket " + bucketName + "\n"); s3.createBucket(bucketName); /* * List the buckets in your account */ System.out.println("Listing buckets"); for (Bucket bucket : s3.listBuckets()) { System.out.println(" - " + bucket.getName()); } System.out.println(); /* * Upload an object to your bucket - You can easily upload a file to * S3, or upload directly an InputStream if you know the length of * the data in the stream. You can also specify your own metadata * when uploading to S3, which allows you set a variety of options * like content-type and content-encoding, plus additional metadata * specific to your applications. */ System.out.println("Uploading a new object to S3 from a file\n"); s3.putObject(new PutObjectRequest(bucketName, key, createSampleFile())); /* * Download an object - When you download an object, you get all of * the object's metadata and a stream from which to read the contents. * It's important to read the contents of the stream as quickly as * possibly since the data is streamed directly from Amazon S3 and your * network connection will remain open until you read all the data or * close the input stream. * * GetObjectRequest also supports several other options, including * conditional downloading of objects based on modification times, * ETags, and selectively downloading a range of an object. */ System.out.println("Downloading an object"); S3Object object = s3.getObject(new GetObjectRequest(bucketName, key)); System.out.println("Content-Type: " + object.getObjectMetadata().getContentType()); displayTextInputStream(object.getObjectContent()); /* * List objects in your bucket by prefix - There are many options for * listing the objects in your bucket. Keep in mind that buckets with * many objects might truncate their results when listing their objects, * so be sure to check if the returned object listing is truncated, and * use the AmazonS3.listNextBatchOfObjects(...) operation to retrieve * additional results. */ System.out.println("Listing objects"); ObjectListing objectListing = s3 .listObjects(new ListObjectsRequest().withBucketName(bucketName).withPrefix("My")); for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) { System.out.println( " - " + objectSummary.getKey() + " " + "(size = " + objectSummary.getSize() + ")"); } System.out.println(); /* * Delete an object - Unless versioning has been turned on for your bucket, * there is no way to undelete an object, so use caution when deleting objects. */ System.out.println("Deleting an object\n"); s3.deleteObject(bucketName, key); /* * Delete a bucket - A bucket must be completely empty before it can be * deleted, so remember to delete any objects from your buckets before * you try to delete them. */ System.out.println("Deleting bucket " + bucketName + "\n"); s3.deleteBucket(bucketName); } catch (AmazonServiceException ase) { System.out.println("Caught an AmazonServiceException, which means your request made it " + "to Amazon S3, but was rejected with an error response for some reason."); System.out.println("Error Message: " + ase.getMessage()); System.out.println("HTTP Status Code: " + ase.getStatusCode()); System.out.println("AWS Error Code: " + ase.getErrorCode()); System.out.println("Error Type: " + ase.getErrorType()); System.out.println("Request ID: " + ase.getRequestId()); } catch (AmazonClientException ace) { System.out.println("Caught an AmazonClientException, which means the client encountered " + "a serious internal problem while trying to communicate with S3, " + "such as not being able to access the network."); System.out.println("Error Message: " + ace.getMessage()); } }