List of usage examples for com.amazonaws.services.s3.model S3ObjectSummary getSize
public long getSize()
From source file:com.mesosphere.dcos.cassandra.executor.backup.S3StorageDriver.java
License:Apache License
private static Map<String, Long> listSnapshotFiles(AmazonS3Client amazonS3Client, String bucketName, String backupName) {// w ww. j av a 2 s.co m Map<String, Long> snapshotFiles = new HashMap<>(); final ListObjectsV2Request req = new ListObjectsV2Request().withBucketName(bucketName) .withPrefix(backupName); ListObjectsV2Result result; do { result = amazonS3Client.listObjectsV2(req); for (S3ObjectSummary objectSummary : result.getObjectSummaries()) { snapshotFiles.put(objectSummary.getKey(), objectSummary.getSize()); } req.setContinuationToken(result.getNextContinuationToken()); } while (result.isTruncated()); return snapshotFiles; }
From source file:com.mycompany.mytubeaws.ListServlet.java
/** * Handles the HTTP <code>POST</code> method. * * @param request servlet request//from w w w . j a va2 s . c om * @param response servlet response * @throws ServletException if a servlet-specific error occurs * @throws IOException if an I/O error occurs */ @Override protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { ArrayList<String> nameList = new ArrayList<>(); ArrayList<String> sizeList = new ArrayList<>(); ArrayList<String> dateList = new ArrayList<>(); ObjectListing objects = s3.listObjects(bucketName); do { for (S3ObjectSummary objectSummary : objects.getObjectSummaries()) { nameList.add(objectSummary.getKey()); sizeList.add(Long.toString(objectSummary.getSize())); dateList.add(StringUtils.fromDate(objectSummary.getLastModified())); } objects = s3.listNextBatchOfObjects(objects); } while (objects.isTruncated()); request.setAttribute("nameList", nameList); request.setAttribute("sizeList", sizeList); request.setAttribute("dateList", dateList); request.getRequestDispatcher("/UploadResult.jsp").forward(request, response); }
From source file:com.pinterest.terrapin.TerrapinUtil.java
License:Apache License
static public List<Pair<Path, Long>> getS3FileList(AWSCredentials credentials, String s3Bucket, String s3KeyPrefix) {//from w w w . j a v a2 s . co m List<Pair<Path, Long>> fileSizePairList = Lists.newArrayListWithCapacity(Constants.MAX_ALLOWED_SHARDS); AmazonS3Client s3Client = new AmazonS3Client(credentials); // List files and build the path using the s3n: prefix. // Note that keys > marker are retrieved where the > is by lexicographic order. String prefix = s3KeyPrefix; String marker = prefix; while (true) { boolean reachedEnd = false; ObjectListing listing = s3Client .listObjects(new ListObjectsRequest().withBucketName(s3Bucket).withMarker(marker)); List<S3ObjectSummary> summaries = listing.getObjectSummaries(); if (summaries.isEmpty()) { break; } for (S3ObjectSummary summary : summaries) { if (summary.getKey().startsWith(prefix)) { fileSizePairList.add(new ImmutablePair(new Path("s3n", s3Bucket, "/" + summary.getKey()), summary.getSize())); if (fileSizePairList.size() > Constants.MAX_ALLOWED_SHARDS) { throw new RuntimeException("Too many files " + fileSizePairList.size()); } } else { // We found a key which does not match the prefix, stop. reachedEnd = true; break; } } if (reachedEnd) { break; } marker = summaries.get(summaries.size() - 1).getKey(); } return fileSizePairList; }
From source file:com.proofpoint.event.collector.combiner.S3StorageSystem.java
License:Apache License
@Override public List<StoredObject> listObjects(URI storageArea) { S3StorageHelper.checkValidS3Uri(storageArea); String s3Path = getS3ObjectKey(storageArea); Iterator<S3ObjectSummary> iter = new S3ObjectListing(s3Service, new ListObjectsRequest(getS3Bucket(storageArea), s3Path, null, "/", null)).iterator(); ImmutableList.Builder<StoredObject> builder = ImmutableList.builder(); while (iter.hasNext()) { S3ObjectSummary summary = iter.next(); builder.add(new StoredObject(buildS3Location(storageArea, summary.getKey().substring(s3Path.length())), summary.getETag(), summary.getSize(), summary.getLastModified().getTime())); }//from w w w . j av a 2 s . c o m return builder.build(); }
From source file:com.sjsu.backitup.AwsConsoleApp.java
License:Open Source License
public static void main(String[] args) throws Exception { System.out.println("==========================================="); System.out.println("Welcome to the AWS Java SDK!"); System.out.println("==========================================="); init();/* ww w . ja v a2s . com*/ /* * Amazon EC2 * * The AWS EC2 client allows you to create, delete, and administer * instances programmatically. * * In this sample, we use an EC2 client to get a list of all the * availability zones, and all instances sorted by reservation id. */ try { } catch (AmazonServiceException ase) { System.out.println("Caught Exception: " + ase.getMessage()); System.out.println("Reponse Status Code: " + ase.getStatusCode()); System.out.println("Error Code: " + ase.getErrorCode()); System.out.println("Request ID: " + ase.getRequestId()); } /* * Amazon SimpleDB * * The AWS SimpleDB client allows you to query and manage your data * stored in SimpleDB domains (similar to tables in a relational DB). * * In this sample, we use a SimpleDB client to iterate over all the * domains owned by the current user, and add up the number of items * (similar to rows of data in a relational DB) in each domain. */ /* * Amazon S3 * * The AWS S3 client allows you to manage buckets and programmatically * put and get objects to those buckets. * * In this sample, we use an S3 client to iterate over all the buckets * owned by the current user, and all the object metadata in each * bucket, to obtain a total object and space usage count. This is done * without ever actually downloading a single object -- the requests * work with object metadata only. */ try { List<Bucket> buckets = s3.listBuckets(); long totalSize = 0; int totalItems = 0; for (Bucket bucket : buckets) { /* * In order to save bandwidth, an S3 object listing does not * contain every object in the bucket; after a certain point the * S3ObjectListing is truncated, and further pages must be * obtained with the AmazonS3Client.listNextBatchOfObjects() * method. */ ObjectListing objects = s3.listObjects(bucket.getName()); do { for (S3ObjectSummary objectSummary : objects.getObjectSummaries()) { totalSize += objectSummary.getSize(); totalItems++; } objects = s3.listNextBatchOfObjects(objects); } while (objects.isTruncated()); } System.out.println("You have " + buckets.size() + " Amazon S3 bucket(s), " + "containing " + totalItems + " objects with a total size of " + totalSize + " bytes."); } catch (AmazonServiceException ase) { /* * AmazonServiceExceptions represent an error response from an AWS * services, i.e. your request made it to AWS, but the AWS service * either found it invalid or encountered an error trying to execute * it. */ System.out.println("Error Message: " + ase.getMessage()); System.out.println("HTTP Status Code: " + ase.getStatusCode()); System.out.println("AWS Error Code: " + ase.getErrorCode()); System.out.println("Error Type: " + ase.getErrorType()); System.out.println("Request ID: " + ase.getRequestId()); } catch (AmazonClientException ace) { /* * AmazonClientExceptions represent an error that occurred inside * the client on the local host, either while trying to send the * request to AWS or interpret the response. For example, if no * network connection is available, the client won't be able to * connect to AWS to execute a request and will throw an * AmazonClientException. */ System.out.println("Error Message: " + ace.getMessage()); } }
From source file:com.sjsu.faceit.example.S3Sample.java
License:Open Source License
public static void main(String[] args) throws IOException { /*//from w w w.j av a 2 s .c o m * Important: Be sure to fill in your AWS access credentials in the * AwsCredentials.properties file before you try to run this * sample. * http://aws.amazon.com/security-credentials */ System.out.println(new File(".").getAbsolutePath()); AmazonS3 s3 = new AmazonS3Client( new PropertiesCredentials(S3Sample.class.getResourceAsStream("AwsCredentials.properties"))); String bucketName = "my-first-s3-bucket-" + UUID.randomUUID(); String key = "MyObjectKey"; System.out.println("==========================================="); System.out.println("Getting Started with Amazon S3"); System.out.println("===========================================\n"); try { /* * Create a new S3 bucket - Amazon S3 bucket names are globally unique, * so once a bucket name has been taken by any user, you can't create * another bucket with that same name. * * You can optionally specify a location for your bucket if you want to * keep your data closer to your applications or users. */ System.out.println("Creating bucket " + bucketName + "\n"); s3.createBucket(bucketName); /* * List the buckets in your account */ System.out.println("Listing buckets"); for (Bucket bucket : s3.listBuckets()) { System.out.println(" - " + bucket.getName()); } System.out.println(); /* * Upload an object to your bucket - You can easily upload a file to * S3, or upload directly an InputStream if you know the length of * the data in the stream. You can also specify your own metadata * when uploading to S3, which allows you set a variety of options * like content-type and content-encoding, plus additional metadata * specific to your applications. */ System.out.println("Uploading a new object to S3 from a file\n"); s3.putObject(new PutObjectRequest(bucketName, "abc/" + key, new File("/Users/prayag/Desktop/2.jpg"))); /* * Download an object - When you download an object, you get all of * the object's metadata and a stream from which to read the contents. * It's important to read the contents of the stream as quickly as * possibly since the data is streamed directly from Amazon S3 and your * network connection will remain open until you read all the data or * close the input stream. * * GetObjectRequest also supports several other options, including * conditional downloading of objects based on modification times, * ETags, and selectively downloading a range of an object. */ System.out.println("Downloading an object"); S3Object object = s3.getObject(new GetObjectRequest(bucketName, "abc/" + key)); System.out.println("Content-Type: " + object.getObjectMetadata().getContentType()); displayTextInputStream(object.getObjectContent()); /* * List objects in your bucket by prefix - There are many options for * listing the objects in your bucket. Keep in mind that buckets with * many objects might truncate their results when listing their objects, * so be sure to check if the returned object listing is truncated, and * use the AmazonS3.listNextBatchOfObjects(...) operation to retrieve * additional results. */ System.out.println("Listing objects"); ObjectListing objectListing = s3 .listObjects(new ListObjectsRequest().withBucketName(bucketName).withPrefix("My")); for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) { System.out.println( " - " + objectSummary.getKey() + " " + "(size = " + objectSummary.getSize() + ")"); } System.out.println(); /* * Delete an object - Unless versioning has been turned on for your bucket, * there is no way to undelete an object, so use caution when deleting objects. */ // System.out.println("Deleting an object\n"); // s3.deleteObject(bucketName, key); /* * Delete a bucket - A bucket must be completely empty before it can be * deleted, so remember to delete any objects from your buckets before * you try to delete them. */ // System.out.println("Deleting bucket " + bucketName + "\n"); // s3.deleteBucket(bucketName); } catch (AmazonServiceException ase) { System.out.println("Caught an AmazonServiceException, which means your request made it " + "to Amazon S3, but was rejected with an error response for some reason."); System.out.println("Error Message: " + ase.getMessage()); System.out.println("HTTP Status Code: " + ase.getStatusCode()); System.out.println("AWS Error Code: " + ase.getErrorCode()); System.out.println("Error Type: " + ase.getErrorType()); System.out.println("Request ID: " + ase.getRequestId()); } catch (AmazonClientException ace) { System.out.println("Caught an AmazonClientException, which means the client encountered " + "a serious internal problem while trying to communicate with S3, " + "such as not being able to access the network."); System.out.println("Error Message: " + ace.getMessage()); } }
From source file:com.springboot.demo.framework.aws.s3.S3Sample.java
License:Open Source License
public static void main(String[] args) throws IOException { /*/*from w w w .j ava 2s. c o m*/ * The ProfileCredentialsProvider will return your [default] * credential profile by reading from the credentials file located at * (~/.aws/credentials). */ AWSCredentials basicCredentials = new BasicAWSCredentials(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY); AWSCredentials credentials = null; try { credentials = new ProfileCredentialsProvider().getCredentials(); } catch (Exception e) { throw new AmazonClientException("Cannot load the credentials from the credential profiles file. " + "Please make sure that your credentials file is at the correct " + "location (~/.aws/credentials), and is in valid format.", e); } /* * Create S3 Client */ AmazonS3 s3 = AmazonS3ClientBuilder.defaultClient(); Region usWest2 = Region.getRegion(Regions.US_WEST_2); s3.setRegion(usWest2); String bucketName = "my-first-s3-bucket-" + UUID.randomUUID(); String key = "MyObjectKey"; System.out.println("==========================================="); System.out.println("Getting Started with Amazon S3"); System.out.println("===========================================\n"); try { /* * Create a new S3 bucket - Amazon S3 bucket names are globally unique, * so once a bucket name has been taken by any user, you can't create * another bucket with that same name. * * You can optionally specify a location for your bucket if you want to * keep your data closer to your applications or users. */ System.out.println("Creating bucket " + bucketName + "\n"); s3.createBucket(bucketName); /* * List the buckets in your account */ System.out.println("Listing buckets"); for (Bucket bucket : s3.listBuckets()) { System.out.println(" - " + bucket.getName()); } System.out.println(); /* * Upload an object to your bucket - You can easily upload a file to * S3, or upload directly an InputStream if you know the length of * the data in the stream. You can also specify your own metadata * when uploading to S3, which allows you set a variety of options * like content-type and content-encoding, plus additional metadata * specific to your applications. */ System.out.println("Uploading a new object to S3 from a file\n"); s3.putObject(new PutObjectRequest(bucketName, key, createSampleFile())); /* * Returns an URL for the object stored in the specified bucket and key */ URL url = s3.getUrl(bucketName, key); System.out.println("upload file url : " + url.toString()); /* * Download an object - When you download an object, you get all of * the object's metadata and a stream from which to read the contents. * It's important to read the contents of the stream as quickly as * possibly since the data is streamed directly from Amazon S3 and your * network connection will remain open until you read all the data or * close the input stream. * * GetObjectRequest also supports several other options, including * conditional downloading of objects based on modification times, * ETags, and selectively downloading a range of an object. */ System.out.println("Downloading an object"); S3Object object = s3.getObject(new GetObjectRequest(bucketName, key)); System.out.println("Content-Type: " + object.getObjectMetadata().getContentType()); displayTextInputStream(object.getObjectContent()); /* * List objects in your bucket by prefix - There are many options for * listing the objects in your bucket. Keep in mind that buckets with * many objects might truncate their results when listing their objects, * so be sure to check if the returned object listing is truncated, and * use the AmazonS3.listNextBatchOfObjects(...) operation to retrieve * additional results. */ System.out.println("Listing objects"); ObjectListing objectListing = s3 .listObjects(new ListObjectsRequest().withBucketName(bucketName).withPrefix("My")); for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) { System.out.println( " - " + objectSummary.getKey() + " " + "(size = " + objectSummary.getSize() + ")"); } System.out.println(); /* * Delete an object - Unless versioning has been turned on for your bucket, * there is no way to undelete an object, so use caution when deleting objects. */ System.out.println("Deleting an object\n"); s3.deleteObject(bucketName, key); /* * Delete a bucket - A bucket must be completely empty before it can be * deleted, so remember to delete any objects from your buckets before * you try to delete them. */ System.out.println("Deleting bucket " + bucketName + "\n"); s3.deleteBucket(bucketName); } catch (AmazonServiceException ase) { System.out.println("Caught an AmazonServiceException, which means your request made it " + "to Amazon S3, but was rejected with an error response for some reason."); System.out.println("Error Message: " + ase.getMessage()); System.out.println("HTTP Status Code: " + ase.getStatusCode()); System.out.println("AWS Error Code: " + ase.getErrorCode()); System.out.println("Error Type: " + ase.getErrorType()); System.out.println("Request ID: " + ase.getRequestId()); } catch (AmazonClientException ace) { System.out.println("Caught an AmazonClientException, which means the client encountered " + "a serious internal problem while trying to communicate with S3, " + "such as not being able to access the network."); System.out.println("Error Message: " + ace.getMessage()); } }
From source file:com.streamsets.pipeline.stage.origin.s3.AmazonS3Runnable.java
License:Apache License
private S3Offset produce(S3Offset offset, int maxBatchSize, BatchContext batchContext) throws StageException, BadSpoolObjectException { BatchMaker batchMaker = batchContext.getBatchMaker(); S3ObjectSummary s3Object; if (offset != null) { spooler.postProcessOlderObjectIfNeeded(offset); }//from ww w . j ava2 s.c om //check if we have an object to produce records from. Otherwise get from spooler. if (needToFetchNextObjectFromSpooler(offset)) { updateGauge(Status.SPOOLING, null); offset = fetchNextObjectFromSpooler(offset, batchContext); LOG.debug("Object '{}' with offset '{}' fetched from Spooler", offset.getKey(), offset.getOffset()); } else { //check if the current object was modified between batches LOG.debug("Checking if Object '{}' has been modified between batches", getCurrentObject().getKey()); if (!getCurrentObject().getETag().equals(offset.geteTag())) { //send the current object to error archive and get next object from spooler LOG.debug("Object '{}' has been modified between batches. Sending the object to error", getCurrentObject().getKey()); try { spooler.handleCurrentObjectAsError(); } catch (AmazonClientException e) { throw new StageException(Errors.S3_SPOOLDIR_24, e.toString(), e); } offset = fetchNextObjectFromSpooler(offset, batchContext); } } s3Object = getCurrentObject(); if (s3Object != null) { amazonS3Source.updateOffset(runnerId, offset); try { if (parser == null) { String recordId = s3ConfigBean.s3Config.bucket + s3ConfigBean.s3Config.delimiter + s3Object.getKey(); if (dataParser.isWholeFileFormat()) { handleWholeFileDataFormat(s3Object, recordId); } else { //Get S3 object instead of stream because we want to call close on the object when we close the // parser (and stream) if (context.isPreview()) { long fetchSize = s3Object.getSize() > S3Constants.DEFAULT_FETCH_SIZE ? S3Constants.DEFAULT_FETCH_SIZE : s3Object.getSize(); if (fetchSize > 0) { object = AmazonS3Util.getObjectRange(s3Client, s3ConfigBean.s3Config.bucket, s3Object.getKey(), fetchSize, s3ConfigBean.sseConfig.useCustomerSSEKey, s3ConfigBean.sseConfig.customerKey, s3ConfigBean.sseConfig.customerKeyMd5); } else { LOG.warn("Size of object with key '{}' is 0", s3Object.getKey()); object = AmazonS3Util.getObject(s3Client, s3ConfigBean.s3Config.bucket, s3Object.getKey(), s3ConfigBean.sseConfig.useCustomerSSEKey, s3ConfigBean.sseConfig.customerKey, s3ConfigBean.sseConfig.customerKeyMd5); } } else { object = AmazonS3Util.getObject(s3Client, s3ConfigBean.s3Config.bucket, s3Object.getKey(), s3ConfigBean.sseConfig.useCustomerSSEKey, s3ConfigBean.sseConfig.customerKey, s3ConfigBean.sseConfig.customerKeyMd5); } parser = dataParser.getParser(recordId, object.getObjectContent(), offset.getOffset()); } sendLineageEvent(s3Object); //we don't use S3 GetObject range capabilities to skip the already process offset because the parsers cannot // pick up from a non root doc depth in the case of a single object with records. } int i = 0; updateGauge(Status.READING, offset.toString()); while (i < maxBatchSize) { try { Record record; try { record = parser.parse(); } catch (RecoverableDataParserException ex) { // Propagate partially parsed record to error stream record = ex.getUnparsedRecord(); setHeaders(record, object); errorRecordHandler .onError(new OnRecordErrorException(record, ex.getErrorCode(), ex.getParams())); // We'll simply continue reading pass this recoverable error continue; } if (record != null) { setHeaders(record, object); batchMaker.addRecord(record); amazonS3Source.incrementNoMoreDataRecordCount(); i++; offset.setOffset(parser.getOffset()); } else { parser.close(); parser = null; if (object != null) { object.close(); object = null; } amazonS3Source.incrementNoMoreDataFileCount(); offset.setOffset(S3Constants.MINUS_ONE); break; } } catch (ObjectLengthException ex) { errorRecordHandler.onError(Errors.S3_SPOOLDIR_02, s3Object.getKey(), offset.toString(), ex); amazonS3Source.incrementNoMoreDataErrorCount(); offset.setOffset(S3Constants.MINUS_ONE); } } } catch (AmazonClientException e) { LOG.error("Error processing object with key '{}' offset '{}'", s3Object.getKey(), offset, e); throw new StageException(Errors.S3_SPOOLDIR_25, e.toString(), e); } catch (IOException | DataParserException ex) { if (!(ex.getCause() instanceof AbortedException)) { offset.setOffset(S3Constants.MINUS_ONE); String exOffset; if (ex instanceof OverrunException) { exOffset = String.valueOf(((OverrunException) ex).getStreamOffset()); } else { try { exOffset = (parser != null) ? parser.getOffset() : S3Constants.MINUS_ONE; } catch (IOException ex1) { LOG.warn("Could not get the object offset to report with error, reason: {}", ex1.toString(), ex); exOffset = S3Constants.MINUS_ONE; } } offset.setOffset(exOffset); switch (context.getOnErrorRecord()) { case DISCARD: break; case TO_ERROR: // we failed to produce a record, which leaves the input file in an unknown state. all we can do here is // throw an exception. throw new BadSpoolObjectException(s3Object.getKey(), exOffset, ex); case STOP_PIPELINE: context.reportError(Errors.S3_SPOOLDIR_03, s3Object.getKey(), exOffset, ex.toString(), ex); throw new StageException(Errors.S3_SPOOLDIR_03, s3Object.getKey(), exOffset, ex.toString(), ex); default: throw new IllegalStateException( Utils.format("Unknown OnError value '{}'", context.getOnErrorRecord(), ex)); } } } finally { if (S3Constants.MINUS_ONE.equals(offset.getOffset())) { if (parser != null) { try { parser.close(); parser = null; } catch (IOException ex) { LOG.debug("Exception while closing parser : '{}'", ex.toString(), ex); } } if (object != null) { try { object.close(); object = null; } catch (IOException ex) { LOG.debug("Exception while closing S3 object : '{}'", ex.toString(), ex); } } } } } context.processBatch(batchContext); updateGauge(Status.BATCH_GENERATED, offset.toString()); return offset; }
From source file:com.streamsets.pipeline.stage.origin.s3.AmazonS3Runnable.java
License:Apache License
private void handleWholeFileDataFormat(S3ObjectSummary s3ObjectSummary, String recordId) throws StageException { S3Object partialS3ObjectForMetadata; //partialObject with fetchSize 1 byte. //This is mostly used for extracting metadata and such. partialS3ObjectForMetadata = AmazonS3Util.getObjectRange(s3Client, s3ConfigBean.s3Config.bucket, s3ObjectSummary.getKey(), 1, s3ConfigBean.sseConfig.useCustomerSSEKey, s3ConfigBean.sseConfig.customerKey, s3ConfigBean.sseConfig.customerKeyMd5); S3FileRef.Builder s3FileRefBuilder = new S3FileRef.Builder().s3Client(s3Client) .s3ObjectSummary(s3ObjectSummary).useSSE(s3ConfigBean.sseConfig.useCustomerSSEKey) .customerKey(s3ConfigBean.sseConfig.customerKey) .customerKeyMd5(s3ConfigBean.sseConfig.customerKeyMd5) .bufferSize((int) dataParser.suggestedWholeFileBufferSize()).createMetrics(true) .totalSizeInBytes(s3ObjectSummary.getSize()).rateLimit(dataParser.wholeFileRateLimit()); if (dataParser.isWholeFileChecksumRequired()) { s3FileRefBuilder.verifyChecksum(true).checksumAlgorithm(HashingUtil.HashType.MD5) //128 bit hex encoded md5 checksum. .checksum(partialS3ObjectForMetadata.getObjectMetadata().getETag()); }// w ww .ja v a2 s.c o m Map<String, Object> metadata = AmazonS3Util.getMetaData(partialS3ObjectForMetadata); metadata.put(S3Constants.BUCKET, s3ObjectSummary.getBucketName()); metadata.put(S3Constants.OBJECT_KEY, s3ObjectSummary.getKey()); metadata.put(S3Constants.OWNER, s3ObjectSummary.getOwner()); metadata.put(S3Constants.SIZE, s3ObjectSummary.getSize()); metadata.put(HeaderAttributeConstants.FILE_NAME, s3ObjectSummary.getKey()); metadata.remove(S3Constants.CONTENT_LENGTH); parser = dataParser.getParser(recordId, metadata, s3FileRefBuilder.build()); //Object is assigned so that setHeaders() function can use this to get metadata //information about the object object = partialS3ObjectForMetadata; }
From source file:com.streamsets.pipeline.stage.origin.s3.AmazonS3Source.java
License:Apache License
public String produce(S3ObjectSummary s3Object, String offset, int maxBatchSize, BatchMaker batchMaker) throws StageException, BadSpoolObjectException { try {/*from w w w . ja v a 2s . co m*/ if (parser == null) { //Get S3 object instead of stream because we want to call close on the object when we close the // parser (and stream) if (getContext().isPreview()) { long fetchSize = s3Object.getSize() > DEFAULT_FETCH_SIZE ? DEFAULT_FETCH_SIZE : s3Object.getSize(); if (fetchSize > 0) { object = AmazonS3Util.getObjectRange(s3ConfigBean.s3Config.getS3Client(), s3ConfigBean.s3Config.bucket, s3Object.getKey(), fetchSize); } else { LOG.warn("Size of object with key '{}' is 0", s3Object.getKey()); object = AmazonS3Util.getObject(s3ConfigBean.s3Config.getS3Client(), s3ConfigBean.s3Config.bucket, s3Object.getKey()); } } else { object = AmazonS3Util.getObject(s3ConfigBean.s3Config.getS3Client(), s3ConfigBean.s3Config.bucket, s3Object.getKey()); } String recordId = s3ConfigBean.s3Config.bucket + s3ConfigBean.s3Config.delimiter + s3Object.getKey(); parser = s3ConfigBean.dataFormatConfig.getParserFactory().getParser(recordId, object.getObjectContent(), Long.parseLong(offset)); //we don't use S3 GetObject range capabilities to skip the already process offset because the parsers cannot // pick up from a non root doc depth in the case of a single object with records. } for (int i = 0; i < maxBatchSize; i++) { try { Record record = parser.parse(); if (record != null) { batchMaker.addRecord(record); offset = parser.getOffset(); } else { parser.close(); parser = null; object.close(); object = null; offset = MINUS_ONE; break; } } catch (ObjectLengthException ex) { String exOffset = offset; offset = MINUS_ONE; switch (getContext().getOnErrorRecord()) { case DISCARD: break; case TO_ERROR: getContext().reportError(Errors.S3_SPOOLDIR_02, s3Object.getKey(), exOffset); break; case STOP_PIPELINE: throw new StageException(Errors.S3_SPOOLDIR_02, s3Object.getKey(), exOffset); default: throw new IllegalStateException(Utils.format("It should never happen. OnError '{}'", getContext().getOnErrorRecord(), ex)); } } } } catch (AmazonClientException e) { LOG.error("Error processing object with key '{}' offset '{}'", s3Object.getKey(), offset); throw new StageException(Errors.S3_SPOOLDIR_25, e.toString()); } catch (IOException | DataParserException ex) { if (ex.getCause() instanceof AbortedException) { //If the pipeline was stopped, the amazon s3 client thread catches the interrupt and throws aborted exception //do not treat this as an error. Instead produce what ever you have and move one. } else { offset = MINUS_ONE; String exOffset; if (ex instanceof OverrunException) { exOffset = String.valueOf(((OverrunException) ex).getStreamOffset()); } else { try { exOffset = (parser != null) ? parser.getOffset() : MINUS_ONE; } catch (IOException ex1) { LOG.warn("Could not get the object offset to report with error, reason: {}", ex1.toString(), ex); exOffset = MINUS_ONE; } } switch (getContext().getOnErrorRecord()) { case DISCARD: break; case TO_ERROR: throw new BadSpoolObjectException(s3Object.getKey(), exOffset, ex); case STOP_PIPELINE: getContext().reportError(Errors.S3_SPOOLDIR_03, s3Object.getKey(), exOffset, ex.toString()); throw new StageException(Errors.S3_SPOOLDIR_03, s3Object.getKey(), exOffset, ex.toString(), ex); default: throw new IllegalStateException(Utils.format("It should never happen. OnError '{}'", getContext().getOnErrorRecord(), ex)); } } } finally { if (MINUS_ONE.equals(offset)) { if (parser != null) { try { parser.close(); parser = null; } catch (IOException ex) { LOG.debug("Exception while closing parser : '{}'", ex.toString()); } } if (object != null) { try { object.close(); object = null; } catch (IOException ex) { LOG.debug("Exception while closing S3 object : '{}'", ex.toString()); } } } } return offset; }