List of usage examples for com.amazonaws.services.s3.model S3ObjectSummary getSize
public long getSize()
From source file:org.apache.hadoop.fs.s3a.S3AUtils.java
License:Apache License
/** * Create a files status instance from a listing. * @param keyPath path to entry//www .j a v a 2 s . c o m * @param summary summary from AWS * @param blockSize block size to declare. * @return a status entry */ public static S3AFileStatus createFileStatus(Path keyPath, S3ObjectSummary summary, long blockSize) { if (objectRepresentsDirectory(summary.getKey(), summary.getSize())) { return new S3AFileStatus(true, true, keyPath); } else { return new S3AFileStatus(summary.getSize(), dateToLong(summary.getLastModified()), keyPath, blockSize); } }
From source file:org.apache.hadoop.fs.s3r.S3RFileSystem.java
License:Apache License
/** * List the statuses of the files/directories in the given path if the path is * a directory.//from w ww .j av a 2 s . c om * * @param f given path * @return the statuses of the files/directories in the given patch * @throws FileNotFoundException when the path does not exist; * IOException see specific implementation */ public FileStatus[] listStatus(Path f) throws FileNotFoundException, IOException { String key = pathToKey(f); if (LOG.isDebugEnabled()) { LOG.debug("List status for path: " + f); } final List<FileStatus> result = new ArrayList<FileStatus>(); final FileStatus fileStatus = getFileStatus(f); if (fileStatus.isDirectory()) { if (!key.isEmpty()) { key = key + "/"; } ListObjectsRequest request = new ListObjectsRequest(); request.setBucketName(bucket); request.setPrefix(key); request.setDelimiter("/"); request.setMaxKeys(maxKeys); if (LOG.isDebugEnabled()) { LOG.debug("listStatus: doing listObjects for directory " + key); } ObjectListing objects = s3.listObjects(request); statistics.incrementReadOps(1); while (true) { for (S3ObjectSummary summary : objects.getObjectSummaries()) { Path keyPath = keyToPath(summary.getKey()).makeQualified(uri, workingDir); // Skip over keys that are ourselves and old S3N _$folder$ files if (keyPath.equals(f) || summary.getKey().endsWith(S3N_FOLDER_SUFFIX)) { if (LOG.isDebugEnabled()) { LOG.debug("Ignoring: " + keyPath); } continue; } if (objectRepresentsDirectory(summary.getKey(), summary.getSize())) { result.add(new S3RFileStatus(true, true, keyPath)); if (LOG.isDebugEnabled()) { LOG.debug("Adding: fd: " + keyPath); } } else { result.add(new S3RFileStatus(summary.getSize(), dateToLong(summary.getLastModified()), keyPath, getDefaultBlockSize(f.makeQualified(uri, workingDir)))); if (LOG.isDebugEnabled()) { LOG.debug("Adding: fi: " + keyPath); } } } for (String prefix : objects.getCommonPrefixes()) { Path keyPath = keyToPath(prefix).makeQualified(uri, workingDir); if (keyPath.equals(f)) { continue; } result.add(new S3RFileStatus(true, false, keyPath)); if (LOG.isDebugEnabled()) { LOG.debug("Adding: rd: " + keyPath); } } if (objects.isTruncated()) { if (LOG.isDebugEnabled()) { LOG.debug("listStatus: list truncated - getting next batch"); } objects = s3.listNextBatchOfObjects(objects); statistics.incrementReadOps(1); } else { break; } } } else { if (LOG.isDebugEnabled()) { LOG.debug("Adding: rd (not a dir): " + f); } result.add(fileStatus); } return result.toArray(new FileStatus[result.size()]); }
From source file:org.apache.hadoop.fs.s3r.S3RFileSystem.java
License:Apache License
/** * Return a file status object that represents the path. * @param f The path we want information from * @return a FileStatus object//from ww w . j a v a2 s . co m * @throws FileNotFoundException when the path does not exist; * IOException see specific implementation */ public S3RFileStatus getFileStatus(Path f) throws IOException { String key = pathToKey(f); if (LOG.isDebugEnabled()) { LOG.debug("Getting path status for " + f + " (" + key + ")"); } if (!key.isEmpty()) { try { ObjectMetadata meta = s3.getObjectMetadata(bucket, key); statistics.incrementReadOps(1); if (objectRepresentsDirectory(key, meta.getContentLength())) { if (LOG.isDebugEnabled()) { LOG.debug("Found exact file: fake directory"); } return new S3RFileStatus(true, true, f.makeQualified(uri, workingDir)); } else { if (LOG.isDebugEnabled()) { LOG.debug("Found exact file: normal file"); } return new S3RFileStatus(meta.getContentLength(), dateToLong(meta.getLastModified()), f.makeQualified(uri, workingDir), getDefaultBlockSize(f.makeQualified(uri, workingDir))); } } catch (AmazonServiceException e) { if (e.getStatusCode() != 404) { printAmazonServiceException(e); throw e; } } catch (AmazonClientException e) { printAmazonClientException(e); throw e; } // Necessary? if (!key.endsWith("/")) { try { String newKey = key + "/"; ObjectMetadata meta = s3.getObjectMetadata(bucket, newKey); statistics.incrementReadOps(1); if (objectRepresentsDirectory(newKey, meta.getContentLength())) { if (LOG.isDebugEnabled()) { LOG.debug("Found file (with /): fake directory"); } return new S3RFileStatus(true, true, f.makeQualified(uri, workingDir)); } else { LOG.warn("Found file (with /): real file? should not happen: {}", key); return new S3RFileStatus(meta.getContentLength(), dateToLong(meta.getLastModified()), f.makeQualified(uri, workingDir), getDefaultBlockSize(f.makeQualified(uri, workingDir))); } } catch (AmazonServiceException e) { if (e.getStatusCode() != 404) { printAmazonServiceException(e); throw e; } } catch (AmazonClientException e) { printAmazonClientException(e); throw e; } } } try { if (!key.isEmpty() && !key.endsWith("/")) { key = key + "/"; } ListObjectsRequest request = new ListObjectsRequest(); request.setBucketName(bucket); request.setPrefix(key); request.setDelimiter("/"); request.setMaxKeys(1); ObjectListing objects = s3.listObjects(request); statistics.incrementReadOps(1); if (!objects.getCommonPrefixes().isEmpty() || objects.getObjectSummaries().size() > 0) { if (LOG.isDebugEnabled()) { LOG.debug("Found path as directory (with /): " + objects.getCommonPrefixes().size() + "/" + objects.getObjectSummaries().size()); for (S3ObjectSummary summary : objects.getObjectSummaries()) { LOG.debug("Summary: " + summary.getKey() + " " + summary.getSize()); } for (String prefix : objects.getCommonPrefixes()) { LOG.debug("Prefix: " + prefix); } } return new S3RFileStatus(true, false, f.makeQualified(uri, workingDir)); } } catch (AmazonServiceException e) { if (e.getStatusCode() != 404) { printAmazonServiceException(e); throw e; } } catch (AmazonClientException e) { printAmazonClientException(e); throw e; } if (LOG.isDebugEnabled()) { LOG.debug("Not Found: " + f); } throw new FileNotFoundException("No such file or directory: " + f); }
From source file:org.apache.jackrabbit.oak.blob.cloud.aws.s3.S3Backend.java
License:Apache License
public List<DataRecord> getAllMetadataRecords(String prefix) { List<DataRecord> metadataList = new ArrayList<DataRecord>(); ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader(); try {//from w w w.ja v a2 s . c om Thread.currentThread().setContextClassLoader(getClass().getClassLoader()); ListObjectsRequest listObjectsRequest = new ListObjectsRequest().withBucketName(bucket) .withPrefix(addMetaKeyPrefix(prefix)); ObjectListing prevObjectListing = s3service.listObjects(listObjectsRequest); for (final S3ObjectSummary s3ObjSumm : prevObjectListing.getObjectSummaries()) { metadataList.add(new S3DataRecord(s3service, bucket, stripMetaKeyPrefix(s3ObjSumm.getKey()), s3ObjSumm.getLastModified().getTime(), s3ObjSumm.getSize())); } } finally { if (contextClassLoader != null) { Thread.currentThread().setContextClassLoader(contextClassLoader); } } return metadataList; }
From source file:org.apache.jackrabbit.oak.blob.cloud.aws.s3.S3Backend.java
License:Apache License
@Override public Iterator<DataRecord> getAllRecords() { return new RecordsIterator<DataRecord>(new Function<S3ObjectSummary, DataRecord>() { @Override// w w w . j av a2s . c o m public DataRecord apply(S3ObjectSummary input) { return new S3DataRecord(s3service, bucket, getIdentifierName(input.getKey()), input.getLastModified().getTime(), input.getSize()); } }); }
From source file:org.apache.tajo.storage.s3.S3TableSpace.java
License:Apache License
/** * Calculate the total size of all objects in the indicated bucket * * @param path to use/* www . ja va2s.c o m*/ * @return calculated size * @throws IOException */ @Override public long calculateSize(Path path) throws IOException { long totalBucketSize = 0L; if (s3Enabled) { String key = pathToKey(path); final FileStatus fileStatus = fs.getFileStatus(path); if (fileStatus.isDirectory()) { if (!key.isEmpty()) { key = key + "/"; } ListObjectsRequest request = new ListObjectsRequest(); request.setBucketName(uri.getHost()); request.setPrefix(key); request.setMaxKeys(maxKeys); if (LOG.isDebugEnabled()) { LOG.debug("listStatus: doing listObjects for directory " + key); } ObjectListing objects = s3.listObjects(request); while (true) { for (S3ObjectSummary summary : objects.getObjectSummaries()) { Path keyPath = keyToPath(summary.getKey()).makeQualified(uri, fs.getWorkingDirectory()); // Skip over keys that are ourselves and old S3N _$folder$ files if (keyPath.equals(path) || summary.getKey().endsWith(S3N_FOLDER_SUFFIX)) { if (LOG.isDebugEnabled()) { LOG.debug("Ignoring: " + keyPath); } continue; } if (!objectRepresentsDirectory(summary.getKey(), summary.getSize())) { totalBucketSize += summary.getSize(); } } if (objects.isTruncated()) { if (LOG.isDebugEnabled()) { LOG.debug("listStatus: list truncated - getting next batch"); } objects = s3.listNextBatchOfObjects(objects); } else { break; } } } else { return fileStatus.getLen(); } } else { totalBucketSize = fs.getContentSummary(path).getLength(); } return totalBucketSize; }
From source file:org.boriken.s3fileuploader.S3SampleRefactored.java
License:Open Source License
public static void listFiles(AmazonS3 s3, String bucketName, String prefix) { /*/*from ww w .ja v a 2s . co m*/ * List objects in your bucket by prefix - There are many options for * listing the objects in your bucket. Keep in mind that buckets with * many objects might truncate their results when listing their objects, * so be sure to check if the returned object listing is truncated, and * use the AmazonS3.listNextBatchOfObjects(...) operation to retrieve * additional results. */ System.out.println("Listing objects"); ObjectListing objectListing = s3 .listObjects(new ListObjectsRequest().withBucketName(bucketName).withPrefix(prefix)); for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) { System.out.println(" - " + objectSummary.getKey() + " " + "(size = " + objectSummary.getSize() + ")"); } System.out.println(); }
From source file:org.caboclo.clients.AmazonClient.java
License:Open Source License
@Override public List<RemoteFile> getChildren(String folderName) throws IOException { if (!folderName.endsWith("/")) { folderName = folderName + "/"; }/*from w w w . j ava2s .c o m*/ ListObjectsRequest listRequest = new ListObjectsRequest(); listRequest.setBucketName(getBucketName()); listRequest.setDelimiter("/"); listRequest.setPrefix(folderName); ObjectListing listing = s3.listObjects(listRequest); ArrayList<RemoteFile> list = new ArrayList<>(); for (S3ObjectSummary summ : listing.getObjectSummaries()) { String name = summ.getKey(); long size = summ.getSize(); boolean isDirectory = isFolder(name); RemoteFile file = new RemoteFile(name, isDirectory, size); list.add(file); } return list; }
From source file:org.commoncrawl.service.parser.ec2.EC2ParserMaster.java
License:Open Source License
private boolean doScan(boolean initialScan) throws IOException { try {/*w w w .ja v a2s.c om*/ LOG.info("Scanner Thread Starting"); AmazonS3Client s3Client = new AmazonS3Client(new BasicAWSCredentials(s3AccessKeyId, s3SecretKey)); ObjectListing response = s3Client.listObjects(new ListObjectsRequest() .withBucketName("aws-publicdatasets").withPrefix(CC_BUCKET_ROOT + CC_CRAWLLOG_SOURCE)); do { LOG.info("Response Key Count:" + response.getObjectSummaries().size()); for (S3ObjectSummary entry : response.getObjectSummaries()) { Matcher matcher = crawlLogPattern.matcher(entry.getKey()); if (matcher.matches()) { ParseCandidate candidate = ParseCandidate.candidateFromBucketEntry(entry.getKey()); if (candidate == null) { LOG.error("Failed to Parse Candidate for:" + entry.getKey()); } else { LOG.info("Candidate is:" + candidate); synchronized (this) { if (_complete.contains(candidate._crawlLogName)) { LOG.info("Skipping completed Candidate:" + candidate); } else { if (!_candidates.containsEntry(candidate._timestamp, candidate) && !_active.containsKey(candidate)) { // update candidate size here ... candidate._size = entry.getSize(); LOG.info("New Candidate:" + candidate._crawlLogName + " Found"); _candidates.put(candidate._timestamp, candidate); } else { LOG.info("Skipping Existing Candidate:" + candidate._crawlLogName); } } } } } } if (response.isTruncated()) { response = s3Client.listNextBatchOfObjects(response); } else { break; } } while (!shutdownFlag.get()); if (initialScan) { // search for completions synchronized (this) { scanForCompletions(); } } return true; } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); return false; } }
From source file:org.commoncrawl.util.EC2MetadataTransferUtil.java
License:Open Source License
EC2MetadataTransferUtil(String bucketName, JsonArray pathList) throws IOException { _conf = new Configuration(); _fs = FileSystem.get(_conf);/* www .j a v a 2 s . c o m*/ LOG.info("Initializing Downloader"); _downloader = new S3Downloader(bucketName, s3AccessKeyId, s3SecretKey, false); _downloader.setMaxParallelStreams(150); _downloader.initialize(this); LOG.info("Got JSON Array with:" + pathList.size() + " elements"); for (int i = 0; i < pathList.size(); ++i) { LOG.info("Collection metadata files from path:" + pathList.get(i).toString()); List<S3ObjectSummary> metadataFiles = getMetadataPaths(s3AccessKeyId, s3SecretKey, bucketName, pathList.get(i).getAsString()); LOG.info("Got:" + metadataFiles.size() + " total files"); for (S3ObjectSummary metadataFile : metadataFiles) { Matcher segmentNameMatcher = metadataInfoPattern.matcher(metadataFile.getKey()); if (segmentNameMatcher.matches()) { String segmentId = segmentNameMatcher.group(1); String partExtension = segmentNameMatcher.group(2); Path finalSegmentPath = new Path(finalSegmentOutputDir, segmentId); Path finalPath = new Path(finalSegmentPath, "metadata-" + partExtension); FileStatus fileStatus = _fs.getFileStatus(finalPath); if (fileStatus != null && fileStatus.getLen() != metadataFile.getSize()) { LOG.error("SRC-DEST SIZE MISMATCH!! SRC:" + metadataFile + " SRC-SIZE:" + metadataFile.getSize() + " DEST:" + finalPath + " DEST-SIZE:" + fileStatus.getLen()); // ok delete the destination _fs.delete(finalPath, false); // null file status so that the item gets requeued ... fileStatus = null; } if (fileStatus == null) { LOG.info("Queueing Item:" + metadataFile); ++_totalQueuedItemsCount; _downloader.fetchItem(metadataFile.getKey()); } else { LOG.info("Skipping Already Download Item:" + metadataFile + " Found at:" + finalPath); } } } } LOG.info("Waiting for shutdown event"); _downloader.waitForCompletion(); }