List of usage examples for com.amazonaws.services.s3.model S3ObjectSummary getKey
public String getKey()
From source file:org.alanwilliamson.amazon.s3.List.java
License:Open Source License
public cfData execute(cfSession _session, cfArgStructData argStruct) throws cfmRunTimeException { AmazonKey amazonKey = getAmazonKey(_session, argStruct); AmazonS3 s3Client = getAmazonS3(amazonKey); String bucket = getNamedStringParam(argStruct, "bucket", null); String prefix = getNamedStringParam(argStruct, "prefix", ""); if (bucket == null) throwException(_session, "Please specify a bucket"); try {/* ww w . ja v a 2 s .co m*/ // Create the results cfQueryResultData qD = new cfQueryResultData(new String[] { "key", "size", "modified", "etag" }, null); qD.setQuerySource("AmazonS3." + amazonKey.getDataSource()); ListObjectsRequest listObjectsRequest = new ListObjectsRequest().withBucketName(bucket) .withDelimiter("/").withPrefix(prefix); ObjectListing objectListing; do { objectListing = s3Client.listObjects(listObjectsRequest); java.util.List<String> prefixes = objectListing.getCommonPrefixes(); // first add the prefixes for (String nextPrefix : prefixes) { qD.addRow(1); qD.setCurrentRow(qD.getSize()); qD.setCell(1, new cfStringData(nextPrefix)); qD.setCell(2, new cfNumberData(0)); qD.setCell(3, cfNullData.NULL); qD.setCell(4, cfNullData.NULL); } for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) { // don't include the prefix being listed if (objectSummary.getKey().equals(prefix)) { continue; } qD.addRow(1); qD.setCurrentRow(qD.getSize()); qD.setCell(1, new cfStringData(objectSummary.getKey())); qD.setCell(2, new cfNumberData(objectSummary.getSize())); qD.setCell(3, new cfDateData(objectSummary.getLastModified())); qD.setCell(4, new cfStringData(objectSummary.getETag())); } listObjectsRequest.setMarker(objectListing.getNextMarker()); } while (objectListing.isTruncated()); return qD; } catch (Exception e) { throwException(_session, "AmazonS3: " + e.getMessage()); return cfBooleanData.FALSE; } }
From source file:org.apache.beam.sdk.io.aws.s3.S3FileSystem.java
License:Apache License
private ExpandedGlob expandGlob(S3ResourceId glob) { // The S3 API can list objects, filtered by prefix, but not by wildcard. // Here, we find the longest prefix without wildcard "*", // then filter the results with a regex. checkArgument(glob.isWildcard(), "isWildcard"); String keyPrefix = glob.getKeyNonWildcardPrefix(); Pattern wildcardRegexp = Pattern.compile(wildcardToRegexp(glob.getKey())); LOG.debug("expanding bucket {}, prefix {}, against pattern {}", glob.getBucket(), keyPrefix, wildcardRegexp.toString());/*from w ww . j a v a2 s . c om*/ ImmutableList.Builder<S3ResourceId> expandedPaths = ImmutableList.builder(); String continuationToken = null; do { ListObjectsV2Request request = new ListObjectsV2Request().withBucketName(glob.getBucket()) .withPrefix(keyPrefix).withContinuationToken(continuationToken); ListObjectsV2Result result; try { result = amazonS3.get().listObjectsV2(request); } catch (AmazonClientException e) { return ExpandedGlob.create(glob, new IOException(e)); } continuationToken = result.getNextContinuationToken(); for (S3ObjectSummary objectSummary : result.getObjectSummaries()) { // Filter against regex. if (wildcardRegexp.matcher(objectSummary.getKey()).matches()) { S3ResourceId expandedPath = S3ResourceId .fromComponents(objectSummary.getBucketName(), objectSummary.getKey()) .withSize(objectSummary.getSize()).withLastModified(objectSummary.getLastModified()); LOG.debug("Expanded S3 object path {}", expandedPath); expandedPaths.add(expandedPath); } } } while (continuationToken != null); return ExpandedGlob.create(glob, expandedPaths.build()); }
From source file:org.apache.camel.component.aws.s3.S3Consumer.java
License:Apache License
protected Queue<Exchange> createExchanges(List<S3ObjectSummary> s3ObjectSummaries) { if (LOG.isTraceEnabled()) { LOG.trace("Received {} messages in this poll", s3ObjectSummaries.size()); }// w ww .j a va 2s .c o m Queue<Exchange> answer = new LinkedList<Exchange>(); for (S3ObjectSummary s3ObjectSummary : s3ObjectSummaries) { S3Object s3Object = getAmazonS3Client().getObject(s3ObjectSummary.getBucketName(), s3ObjectSummary.getKey()); Exchange exchange = getEndpoint().createExchange(s3Object); answer.add(exchange); } return answer; }
From source file:org.apache.druid.firehose.s3.StaticS3FirehoseFactory.java
License:Apache License
/** * Create an {@link URI} from the given {@link S3ObjectSummary}. The result URI is composed as below. * * <pre>// w ww . j av a2s . c o m * {@code s3://{BUCKET_NAME}/{OBJECT_KEY}} * </pre> */ private static URI toUri(S3ObjectSummary object) { final String originalAuthority = object.getBucketName(); final String originalPath = object.getKey(); final String authority = originalAuthority.endsWith("/") ? originalAuthority.substring(0, originalAuthority.length() - 1) : originalAuthority; final String path = originalPath.startsWith("/") ? originalPath.substring(1) : originalPath; return URI.create(StringUtils.format("s3://%s/%s", authority, path)); }
From source file:org.apache.druid.storage.s3.S3DataSegmentFinder.java
License:Apache License
@Override public Set<DataSegment> findSegments(String workingDirPath, boolean updateDescriptor) throws SegmentLoadingException { final Map<String, Pair<DataSegment, Long>> timestampedSegments = new HashMap<>(); try {// w w w. j a v a 2 s . co m final Iterator<S3ObjectSummary> objectSummaryIterator = S3Utils.objectSummaryIterator(s3Client, config.getBucket(), workingDirPath.length() == 0 ? config.getBaseKey() : workingDirPath, config.getMaxListingLength()); while (objectSummaryIterator.hasNext()) { final S3ObjectSummary objectSummary = objectSummaryIterator.next(); if ("descriptor.json".equals(S3Utils.toFilename(objectSummary.getKey()))) { final String descriptorJson = objectSummary.getKey(); String indexZip = S3Utils.indexZipForSegmentPath(descriptorJson); if (S3Utils.isObjectInBucketIgnoringPermission(s3Client, config.getBucket(), indexZip)) { try (S3Object indexObject = s3Client.getObject(config.getBucket(), descriptorJson); S3ObjectInputStream is = indexObject.getObjectContent()) { final ObjectMetadata objectMetadata = indexObject.getObjectMetadata(); final DataSegment dataSegment = jsonMapper.readValue(is, DataSegment.class); log.info("Found segment [%s] located at [%s]", dataSegment.getIdentifier(), indexZip); final Map<String, Object> loadSpec = dataSegment.getLoadSpec(); if (!S3StorageDruidModule.SCHEME.equals(loadSpec.get("type")) || !indexZip.equals(loadSpec.get("key")) || !config.getBucket().equals(loadSpec.get("bucket"))) { loadSpec.put("type", S3StorageDruidModule.SCHEME); loadSpec.put("key", indexZip); loadSpec.put("bucket", config.getBucket()); if (updateDescriptor) { log.info("Updating loadSpec in descriptor.json at [%s] with new path [%s]", descriptorJson, indexObject); final ByteArrayInputStream bais = new ByteArrayInputStream( StringUtils.toUtf8(jsonMapper.writeValueAsString(dataSegment))); s3Client.putObject(config.getBucket(), descriptorJson, bais, objectMetadata); } } DataSegmentFinder.putInMapRetainingNewest(timestampedSegments, dataSegment, objectMetadata.getLastModified() == null ? 0 : objectMetadata.getLastModified().getTime()); } } else { throw new SegmentLoadingException( "index.zip didn't exist at [%s] while descriptor.json exists!?", indexZip); } } } } catch (AmazonServiceException e) { throw new SegmentLoadingException(e, "Problem interacting with S3"); } catch (IOException e) { throw new SegmentLoadingException(e, "IO exception"); } catch (Exception e) { Throwables.propagateIfInstanceOf(e, SegmentLoadingException.class); Throwables.propagate(e); } return timestampedSegments.values().stream().map(x -> x.lhs).collect(Collectors.toSet()); }
From source file:org.apache.druid.storage.s3.S3DataSegmentPuller.java
License:Apache License
private FileObject buildFileObject(final URI uri) throws AmazonServiceException { final S3Coords coords = new S3Coords(checkURI(uri)); final S3ObjectSummary objectSummary = S3Utils.getSingleObjectSummary(s3Client, coords.bucket, coords.path); final String path = uri.getPath(); return new FileObject() { S3Object s3Object = null; @Override//from ww w.j a v a 2 s .c o m public URI toUri() { return uri; } @Override public String getName() { final String ext = Files.getFileExtension(path); return Files.getNameWithoutExtension(path) + (Strings.isNullOrEmpty(ext) ? "" : ("." + ext)); } /** * Returns an input stream for a s3 object. The returned input stream is not thread-safe. */ @Override public InputStream openInputStream() throws IOException { try { if (s3Object == null) { // lazily promote to full GET s3Object = s3Client.getObject(objectSummary.getBucketName(), objectSummary.getKey()); } final InputStream in = s3Object.getObjectContent(); final Closer closer = Closer.create(); closer.register(in); closer.register(s3Object); return new FilterInputStream(in) { @Override public void close() throws IOException { closer.close(); } }; } catch (AmazonServiceException e) { throw new IOE(e, "Could not load S3 URI [%s]", uri); } } @Override public OutputStream openOutputStream() { throw new UOE("Cannot stream S3 output"); } @Override public Reader openReader(boolean ignoreEncodingErrors) { throw new UOE("Cannot open reader"); } @Override public CharSequence getCharContent(boolean ignoreEncodingErrors) { throw new UOE("Cannot open character sequence"); } @Override public Writer openWriter() { throw new UOE("Cannot open writer"); } @Override public long getLastModified() { return objectSummary.getLastModified().getTime(); } @Override public boolean delete() { throw new UOE( "Cannot delete S3 items anonymously. jetS3t doesn't support authenticated deletes easily."); } }; }
From source file:org.apache.druid.storage.s3.S3TimestampVersionedDataFinder.java
License:Apache License
/** * Gets the key with the most recently modified timestamp. * `pattern` is evaluated against the entire key AFTER the path given in `uri`. * The substring `pattern` is matched against will have a leading `/` removed. * For example `s3://some_bucket/some_prefix/some_key` with a URI of `s3://some_bucket/some_prefix` will match against `some_key`. * `s3://some_bucket/some_prefixsome_key` with a URI of `s3://some_bucket/some_prefix` will match against `some_key` * `s3://some_bucket/some_prefix//some_key` with a URI of `s3://some_bucket/some_prefix` will match against `/some_key` * * @param uri The URI of in the form of `s3://some_bucket/some_key` * @param pattern The pattern matcher to determine if a *key* is of interest, or `null` to match everything. * * @return A URI to the most recently modified object which matched the pattern. *//* w ww.ja va 2 s . c om*/ @Override public URI getLatestVersion(final URI uri, final @Nullable Pattern pattern) { try { return RetryUtils.retry(() -> { final S3Coords coords = new S3Coords(checkURI(uri)); long mostRecent = Long.MIN_VALUE; URI latest = null; final Iterator<S3ObjectSummary> objectSummaryIterator = S3Utils.objectSummaryIterator(s3Client, coords.bucket, coords.path, MAX_LISTING_KEYS); while (objectSummaryIterator.hasNext()) { final S3ObjectSummary objectSummary = objectSummaryIterator.next(); String keyString = objectSummary.getKey().substring(coords.path.length()); if (keyString.startsWith("/")) { keyString = keyString.substring(1); } if (pattern != null && !pattern.matcher(keyString).matches()) { continue; } final long latestModified = objectSummary.getLastModified().getTime(); if (latestModified >= mostRecent) { mostRecent = latestModified; latest = new URI(StringUtils.format("s3://%s/%s", objectSummary.getBucketName(), objectSummary.getKey())); } } return latest; }, shouldRetryPredicate(), DEFAULT_RETRY_COUNT); } catch (Exception e) { throw new RuntimeException(e); } }
From source file:org.apache.druid.storage.s3.S3Utils.java
License:Apache License
/** * Gets a single {@link S3ObjectSummary} from s3. Since this method might return a wrong object if there are multiple * objects that match the given key, this method should be used only when it's guaranteed that the given key is unique * in the given bucket.//from ww w. ja va2 s . c o m * * @param s3Client s3 client * @param bucket s3 bucket * @param key unique key for the object to be retrieved */ public static S3ObjectSummary getSingleObjectSummary(ServerSideEncryptingAmazonS3 s3Client, String bucket, String key) { final ListObjectsV2Request request = new ListObjectsV2Request().withBucketName(bucket).withPrefix(key) .withMaxKeys(1); final ListObjectsV2Result result = s3Client.listObjectsV2(request); // Using getObjectSummaries().size() instead of getKeyCount as, in some cases // it is observed that even though the getObjectSummaries returns some data // keyCount is still zero. if (result.getObjectSummaries().size() == 0) { throw new ISE("Cannot find object for bucket[%s] and key[%s]", bucket, key); } final S3ObjectSummary objectSummary = result.getObjectSummaries().get(0); if (!objectSummary.getBucketName().equals(bucket) || !objectSummary.getKey().equals(key)) { throw new ISE("Wrong object[%s] for bucket[%s] and key[%s]", objectSummary, bucket, key); } return objectSummary; }
From source file:org.apache.flink.cloudsort.io.aws.AwsInput.java
License:Apache License
@Override public List<InputSplit> list() { Preconditions.checkNotNull(bucket);//from ww w. java 2s.c o m Preconditions.checkNotNull(prefix); List<InputSplit> objectNames = new ArrayList<>(); // this will read credentials from user's home directory AmazonS3 s3client = new AmazonS3Client(new ProfileCredentialsProvider()); final ListObjectsV2Request req = new ListObjectsV2Request().withBucketName(bucket).withPrefix(prefix); ListObjectsV2Result result; int index = 0; do { result = s3client.listObjectsV2(req); for (S3ObjectSummary objectSummary : result.getObjectSummaries()) { String objectName = objectSummary.getKey(); long objectSize = objectSummary.getSize(); objectNames.add(new InputSplit(index++, objectName, objectSize)); } req.setContinuationToken(result.getNextContinuationToken()); } while (result.isTruncated()); return objectNames; }
From source file:org.apache.hadoop.fs.s3a.S3AFileSystem.java
License:Apache License
/** * Renames Path src to Path dst. Can take place on local fs * or remote DFS./*from w w w. j a v a 2 s.c o m*/ * * Warning: S3 does not support renames. This method does a copy which can take S3 some time to execute with large * files and directories. Since there is no Progressable passed in, this can time out jobs. * * Note: This implementation differs with other S3 drivers. Specifically: * Fails if src is a file and dst is a directory. * Fails if src is a directory and dst is a file. * Fails if the parent of dst does not exist or is a file. * Fails if dst is a directory that is not empty. * * @param src path to be renamed * @param dst new path after rename * @throws IOException on failure * @return true if rename is successful */ public boolean rename(Path src, Path dst) throws IOException { LOG.info("Rename path " + src + " to " + dst); String srcKey = pathToKey(src); String dstKey = pathToKey(dst); if (srcKey.length() == 0 || dstKey.length() == 0) { LOG.info("rename: src or dst are empty"); return false; } if (srcKey.equals(dstKey)) { LOG.info("rename: src and dst refer to the same file"); return true; } S3AFileStatus srcStatus; try { srcStatus = getFileStatus(src); } catch (FileNotFoundException e) { LOG.info("rename: src not found " + src); return false; } S3AFileStatus dstStatus = null; try { dstStatus = getFileStatus(dst); if (srcStatus.isFile() && dstStatus.isDirectory()) { LOG.info("rename: src is a file and dst is a directory"); return false; } if (srcStatus.isDirectory() && dstStatus.isFile()) { LOG.info("rename: src is a directory and dst is a file"); return false; } if (dstStatus.isDirectory() && !dstStatus.isEmptyDirectory()) { return false; } } catch (FileNotFoundException e) { // Parent must exist Path parent = dst.getParent(); if (!pathToKey(parent).isEmpty()) { try { S3AFileStatus dstParentStatus = getFileStatus(dst.getParent()); if (!dstParentStatus.isDirectory()) { return false; } } catch (FileNotFoundException e2) { return false; } } } // Ok! Time to start if (srcStatus.isFile()) { if (LOG.isDebugEnabled()) { LOG.debug("rename: renaming file " + src + " to " + dst); } copyFile(srcKey, dstKey); delete(src, false); } else { if (LOG.isDebugEnabled()) { LOG.debug("rename: renaming directory " + src + " to " + dst); } // This is a directory to directory copy if (!dstKey.endsWith("/")) { dstKey = dstKey + "/"; } if (!srcKey.endsWith("/")) { srcKey = srcKey + "/"; } List<DeleteObjectsRequest.KeyVersion> keysToDelete = new ArrayList<DeleteObjectsRequest.KeyVersion>(); if (dstStatus != null && dstStatus.isEmptyDirectory()) { copyFile(srcKey, dstKey); statistics.incrementWriteOps(1); keysToDelete.add(new DeleteObjectsRequest.KeyVersion(srcKey)); } ListObjectsRequest request = new ListObjectsRequest(); request.setBucketName(bucket); request.setPrefix(srcKey); request.setMaxKeys(maxKeys); ObjectListing objects = s3.listObjects(request); statistics.incrementReadOps(1); while (true) { for (S3ObjectSummary summary : objects.getObjectSummaries()) { keysToDelete.add(new DeleteObjectsRequest.KeyVersion(summary.getKey())); String newDstKey = dstKey + summary.getKey().substring(srcKey.length()); copyFile(summary.getKey(), newDstKey); if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) { DeleteObjectsRequest deleteRequest = new DeleteObjectsRequest(bucket) .withKeys(keysToDelete); s3.deleteObjects(deleteRequest); statistics.incrementWriteOps(1); keysToDelete.clear(); } } if (objects.isTruncated()) { objects = s3.listNextBatchOfObjects(objects); statistics.incrementReadOps(1); } else { break; } } if (!keysToDelete.isEmpty()) { DeleteObjectsRequest deleteRequest = new DeleteObjectsRequest(bucket); deleteRequest.setKeys(keysToDelete); s3.deleteObjects(deleteRequest); statistics.incrementWriteOps(1); } } if (src.getParent() != dst.getParent()) { deleteUnnecessaryFakeDirectories(dst.getParent()); createFakeDirectoryIfNecessary(src.getParent()); } return true; }