Example usage for com.amazonaws.services.s3.model S3ObjectSummary getSize

Introduction

In this page you can find the example usage for com.amazonaws.services.s3.model S3ObjectSummary getSize.

Prototype

public long getSize()

Source Link

Document

Gets the size of this object in bytes.

Usage

From source file:org.apache.hadoop.fs.s3a.S3AUtils.java

License:Apache License

/**
 * Create a files status instance from a listing.
 * @param keyPath path to entry//www .j a  v a 2 s  . c o  m
 * @param summary summary from AWS
 * @param blockSize block size to declare.
 * @return a status entry
 */
public static S3AFileStatus createFileStatus(Path keyPath, S3ObjectSummary summary, long blockSize) {
    if (objectRepresentsDirectory(summary.getKey(), summary.getSize())) {
        return new S3AFileStatus(true, true, keyPath);
    } else {
        return new S3AFileStatus(summary.getSize(), dateToLong(summary.getLastModified()), keyPath, blockSize);
    }
}

From source file:org.apache.hadoop.fs.s3r.S3RFileSystem.java

License:Apache License

/**
 * List the statuses of the files/directories in the given path if the path is
 * a directory.//from  w  ww  .j av  a  2 s  .  c om
 *
 * @param f given path
 * @return the statuses of the files/directories in the given patch
 * @throws FileNotFoundException when the path does not exist;
 *         IOException see specific implementation
 */
public FileStatus[] listStatus(Path f) throws FileNotFoundException, IOException {
    String key = pathToKey(f);
    if (LOG.isDebugEnabled()) {
        LOG.debug("List status for path: " + f);
    }

    final List<FileStatus> result = new ArrayList<FileStatus>();
    final FileStatus fileStatus = getFileStatus(f);

    if (fileStatus.isDirectory()) {
        if (!key.isEmpty()) {
            key = key + "/";
        }

        ListObjectsRequest request = new ListObjectsRequest();
        request.setBucketName(bucket);
        request.setPrefix(key);
        request.setDelimiter("/");
        request.setMaxKeys(maxKeys);

        if (LOG.isDebugEnabled()) {
            LOG.debug("listStatus: doing listObjects for directory " + key);
        }

        ObjectListing objects = s3.listObjects(request);
        statistics.incrementReadOps(1);

        while (true) {
            for (S3ObjectSummary summary : objects.getObjectSummaries()) {
                Path keyPath = keyToPath(summary.getKey()).makeQualified(uri, workingDir);
                // Skip over keys that are ourselves and old S3N _$folder$ files
                if (keyPath.equals(f) || summary.getKey().endsWith(S3N_FOLDER_SUFFIX)) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Ignoring: " + keyPath);
                    }
                    continue;
                }

                if (objectRepresentsDirectory(summary.getKey(), summary.getSize())) {
                    result.add(new S3RFileStatus(true, true, keyPath));
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Adding: fd: " + keyPath);
                    }
                } else {
                    result.add(new S3RFileStatus(summary.getSize(), dateToLong(summary.getLastModified()),
                            keyPath, getDefaultBlockSize(f.makeQualified(uri, workingDir))));
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Adding: fi: " + keyPath);
                    }
                }
            }

            for (String prefix : objects.getCommonPrefixes()) {
                Path keyPath = keyToPath(prefix).makeQualified(uri, workingDir);
                if (keyPath.equals(f)) {
                    continue;
                }
                result.add(new S3RFileStatus(true, false, keyPath));
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Adding: rd: " + keyPath);
                }
            }

            if (objects.isTruncated()) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("listStatus: list truncated - getting next batch");
                }

                objects = s3.listNextBatchOfObjects(objects);
                statistics.incrementReadOps(1);
            } else {
                break;
            }
        }
    } else {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Adding: rd (not a dir): " + f);
        }
        result.add(fileStatus);
    }

    return result.toArray(new FileStatus[result.size()]);
}

From source file:org.apache.hadoop.fs.s3r.S3RFileSystem.java

License:Apache License

/**
 * Return a file status object that represents the path.
 * @param f The path we want information from
 * @return a FileStatus object//from ww  w . j  a  v  a2  s  .  co m
 * @throws FileNotFoundException when the path does not exist;
 *         IOException see specific implementation
 */
public S3RFileStatus getFileStatus(Path f) throws IOException {
    String key = pathToKey(f);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Getting path status for " + f + " (" + key + ")");
    }

    if (!key.isEmpty()) {
        try {
            ObjectMetadata meta = s3.getObjectMetadata(bucket, key);
            statistics.incrementReadOps(1);

            if (objectRepresentsDirectory(key, meta.getContentLength())) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Found exact file: fake directory");
                }
                return new S3RFileStatus(true, true, f.makeQualified(uri, workingDir));
            } else {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Found exact file: normal file");
                }
                return new S3RFileStatus(meta.getContentLength(), dateToLong(meta.getLastModified()),
                        f.makeQualified(uri, workingDir),
                        getDefaultBlockSize(f.makeQualified(uri, workingDir)));
            }
        } catch (AmazonServiceException e) {
            if (e.getStatusCode() != 404) {
                printAmazonServiceException(e);
                throw e;
            }
        } catch (AmazonClientException e) {
            printAmazonClientException(e);
            throw e;
        }

        // Necessary?
        if (!key.endsWith("/")) {
            try {
                String newKey = key + "/";
                ObjectMetadata meta = s3.getObjectMetadata(bucket, newKey);
                statistics.incrementReadOps(1);

                if (objectRepresentsDirectory(newKey, meta.getContentLength())) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Found file (with /): fake directory");
                    }
                    return new S3RFileStatus(true, true, f.makeQualified(uri, workingDir));
                } else {
                    LOG.warn("Found file (with /): real file? should not happen: {}", key);

                    return new S3RFileStatus(meta.getContentLength(), dateToLong(meta.getLastModified()),
                            f.makeQualified(uri, workingDir),
                            getDefaultBlockSize(f.makeQualified(uri, workingDir)));
                }
            } catch (AmazonServiceException e) {
                if (e.getStatusCode() != 404) {
                    printAmazonServiceException(e);
                    throw e;
                }
            } catch (AmazonClientException e) {
                printAmazonClientException(e);
                throw e;
            }
        }
    }

    try {
        if (!key.isEmpty() && !key.endsWith("/")) {
            key = key + "/";
        }
        ListObjectsRequest request = new ListObjectsRequest();
        request.setBucketName(bucket);
        request.setPrefix(key);
        request.setDelimiter("/");
        request.setMaxKeys(1);

        ObjectListing objects = s3.listObjects(request);
        statistics.incrementReadOps(1);

        if (!objects.getCommonPrefixes().isEmpty() || objects.getObjectSummaries().size() > 0) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Found path as directory (with /): " + objects.getCommonPrefixes().size() + "/"
                        + objects.getObjectSummaries().size());

                for (S3ObjectSummary summary : objects.getObjectSummaries()) {
                    LOG.debug("Summary: " + summary.getKey() + " " + summary.getSize());
                }
                for (String prefix : objects.getCommonPrefixes()) {
                    LOG.debug("Prefix: " + prefix);
                }
            }

            return new S3RFileStatus(true, false, f.makeQualified(uri, workingDir));
        }
    } catch (AmazonServiceException e) {
        if (e.getStatusCode() != 404) {
            printAmazonServiceException(e);
            throw e;
        }
    } catch (AmazonClientException e) {
        printAmazonClientException(e);
        throw e;
    }

    if (LOG.isDebugEnabled()) {
        LOG.debug("Not Found: " + f);
    }
    throw new FileNotFoundException("No such file or directory: " + f);
}

From source file:org.apache.jackrabbit.oak.blob.cloud.aws.s3.S3Backend.java

License:Apache License

public List<DataRecord> getAllMetadataRecords(String prefix) {
    List<DataRecord> metadataList = new ArrayList<DataRecord>();
    ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
    try {//from   w  w w.ja v  a2  s .  c  om
        Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
        ListObjectsRequest listObjectsRequest = new ListObjectsRequest().withBucketName(bucket)
                .withPrefix(addMetaKeyPrefix(prefix));
        ObjectListing prevObjectListing = s3service.listObjects(listObjectsRequest);
        for (final S3ObjectSummary s3ObjSumm : prevObjectListing.getObjectSummaries()) {
            metadataList.add(new S3DataRecord(s3service, bucket, stripMetaKeyPrefix(s3ObjSumm.getKey()),
                    s3ObjSumm.getLastModified().getTime(), s3ObjSumm.getSize()));
        }
    } finally {
        if (contextClassLoader != null) {
            Thread.currentThread().setContextClassLoader(contextClassLoader);
        }
    }
    return metadataList;
}

From source file:org.apache.jackrabbit.oak.blob.cloud.aws.s3.S3Backend.java

License:Apache License

@Override
public Iterator<DataRecord> getAllRecords() {
    return new RecordsIterator<DataRecord>(new Function<S3ObjectSummary, DataRecord>() {
        @Override//  w  w  w . j av  a2s  . c o m
        public DataRecord apply(S3ObjectSummary input) {
            return new S3DataRecord(s3service, bucket, getIdentifierName(input.getKey()),
                    input.getLastModified().getTime(), input.getSize());
        }
    });
}

From source file:org.apache.tajo.storage.s3.S3TableSpace.java

License:Apache License

/**
 * Calculate the total size of all objects in the indicated bucket
 *
 * @param path to use/*  www .  ja va2s.c  o  m*/
 * @return calculated size
 * @throws IOException
 */
@Override
public long calculateSize(Path path) throws IOException {
    long totalBucketSize = 0L;

    if (s3Enabled) {
        String key = pathToKey(path);

        final FileStatus fileStatus = fs.getFileStatus(path);

        if (fileStatus.isDirectory()) {
            if (!key.isEmpty()) {
                key = key + "/";
            }

            ListObjectsRequest request = new ListObjectsRequest();
            request.setBucketName(uri.getHost());
            request.setPrefix(key);
            request.setMaxKeys(maxKeys);

            if (LOG.isDebugEnabled()) {
                LOG.debug("listStatus: doing listObjects for directory " + key);
            }

            ObjectListing objects = s3.listObjects(request);

            while (true) {
                for (S3ObjectSummary summary : objects.getObjectSummaries()) {
                    Path keyPath = keyToPath(summary.getKey()).makeQualified(uri, fs.getWorkingDirectory());

                    // Skip over keys that are ourselves and old S3N _$folder$ files
                    if (keyPath.equals(path) || summary.getKey().endsWith(S3N_FOLDER_SUFFIX)) {
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("Ignoring: " + keyPath);
                        }
                        continue;
                    }

                    if (!objectRepresentsDirectory(summary.getKey(), summary.getSize())) {
                        totalBucketSize += summary.getSize();
                    }
                }

                if (objects.isTruncated()) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("listStatus: list truncated - getting next batch");
                    }
                    objects = s3.listNextBatchOfObjects(objects);
                } else {
                    break;
                }
            }
        } else {
            return fileStatus.getLen();
        }
    } else {
        totalBucketSize = fs.getContentSummary(path).getLength();
    }

    return totalBucketSize;
}

From source file:org.boriken.s3fileuploader.S3SampleRefactored.java

License:Open Source License

public static void listFiles(AmazonS3 s3, String bucketName, String prefix) {
    /*/*from  ww w  .ja  v  a 2s . co m*/
      * List objects in your bucket by prefix - There are many options for
      * listing the objects in your bucket.  Keep in mind that buckets with
      * many objects might truncate their results when listing their objects,
      * so be sure to check if the returned object listing is truncated, and
      * use the AmazonS3.listNextBatchOfObjects(...) operation to retrieve
      * additional results.
      */
    System.out.println("Listing objects");
    ObjectListing objectListing = s3
            .listObjects(new ListObjectsRequest().withBucketName(bucketName).withPrefix(prefix));

    for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) {
        System.out.println(" - " + objectSummary.getKey() + "  " + "(size = " + objectSummary.getSize() + ")");
    }
    System.out.println();

}

From source file:org.caboclo.clients.AmazonClient.java

License:Open Source License

@Override
public List<RemoteFile> getChildren(String folderName) throws IOException {
    if (!folderName.endsWith("/")) {
        folderName = folderName + "/";
    }/*from   w  w  w  . j ava2s  .c o  m*/

    ListObjectsRequest listRequest = new ListObjectsRequest();
    listRequest.setBucketName(getBucketName());
    listRequest.setDelimiter("/");
    listRequest.setPrefix(folderName);

    ObjectListing listing = s3.listObjects(listRequest);

    ArrayList<RemoteFile> list = new ArrayList<>();

    for (S3ObjectSummary summ : listing.getObjectSummaries()) {
        String name = summ.getKey();
        long size = summ.getSize();

        boolean isDirectory = isFolder(name);

        RemoteFile file = new RemoteFile(name, isDirectory, size);
        list.add(file);
    }

    return list;
}

From source file:org.commoncrawl.service.parser.ec2.EC2ParserMaster.java

License:Open Source License

private boolean doScan(boolean initialScan) throws IOException {
    try {/*w  w w  .ja v a2s.c  om*/
        LOG.info("Scanner Thread Starting");
        AmazonS3Client s3Client = new AmazonS3Client(new BasicAWSCredentials(s3AccessKeyId, s3SecretKey));

        ObjectListing response = s3Client.listObjects(new ListObjectsRequest()
                .withBucketName("aws-publicdatasets").withPrefix(CC_BUCKET_ROOT + CC_CRAWLLOG_SOURCE));

        do {

            LOG.info("Response Key Count:" + response.getObjectSummaries().size());

            for (S3ObjectSummary entry : response.getObjectSummaries()) {

                Matcher matcher = crawlLogPattern.matcher(entry.getKey());
                if (matcher.matches()) {
                    ParseCandidate candidate = ParseCandidate.candidateFromBucketEntry(entry.getKey());
                    if (candidate == null) {
                        LOG.error("Failed to Parse Candidate for:" + entry.getKey());
                    } else {
                        LOG.info("Candidate is:" + candidate);
                        synchronized (this) {
                            if (_complete.contains(candidate._crawlLogName)) {
                                LOG.info("Skipping completed Candidate:" + candidate);
                            } else {
                                if (!_candidates.containsEntry(candidate._timestamp, candidate)
                                        && !_active.containsKey(candidate)) {
                                    // update candidate size here ... 
                                    candidate._size = entry.getSize();
                                    LOG.info("New Candidate:" + candidate._crawlLogName + " Found");
                                    _candidates.put(candidate._timestamp, candidate);
                                } else {
                                    LOG.info("Skipping Existing Candidate:" + candidate._crawlLogName);
                                }
                            }
                        }
                    }
                }
            }

            if (response.isTruncated()) {
                response = s3Client.listNextBatchOfObjects(response);
            } else {
                break;
            }
        } while (!shutdownFlag.get());

        if (initialScan) {
            // search for completions 
            synchronized (this) {
                scanForCompletions();
            }
        }

        return true;
    } catch (IOException e) {
        LOG.error(CCStringUtils.stringifyException(e));
        return false;
    }
}

From source file:org.commoncrawl.util.EC2MetadataTransferUtil.java

License:Open Source License

EC2MetadataTransferUtil(String bucketName, JsonArray pathList) throws IOException {
    _conf = new Configuration();
    _fs = FileSystem.get(_conf);/* www  .j  a  v  a  2  s . c o m*/
    LOG.info("Initializing Downloader");
    _downloader = new S3Downloader(bucketName, s3AccessKeyId, s3SecretKey, false);
    _downloader.setMaxParallelStreams(150);
    _downloader.initialize(this);

    LOG.info("Got JSON Array with:" + pathList.size() + " elements");
    for (int i = 0; i < pathList.size(); ++i) {
        LOG.info("Collection metadata files from path:" + pathList.get(i).toString());
        List<S3ObjectSummary> metadataFiles = getMetadataPaths(s3AccessKeyId, s3SecretKey, bucketName,
                pathList.get(i).getAsString());
        LOG.info("Got:" + metadataFiles.size() + " total files");
        for (S3ObjectSummary metadataFile : metadataFiles) {

            Matcher segmentNameMatcher = metadataInfoPattern.matcher(metadataFile.getKey());

            if (segmentNameMatcher.matches()) {

                String segmentId = segmentNameMatcher.group(1);
                String partExtension = segmentNameMatcher.group(2);
                Path finalSegmentPath = new Path(finalSegmentOutputDir, segmentId);
                Path finalPath = new Path(finalSegmentPath, "metadata-" + partExtension);

                FileStatus fileStatus = _fs.getFileStatus(finalPath);

                if (fileStatus != null && fileStatus.getLen() != metadataFile.getSize()) {
                    LOG.error("SRC-DEST SIZE MISMATCH!! SRC:" + metadataFile + " SRC-SIZE:"
                            + metadataFile.getSize() + " DEST:" + finalPath + " DEST-SIZE:"
                            + fileStatus.getLen());

                    // ok delete the destination 
                    _fs.delete(finalPath, false);
                    // null file status so that the item gets requeued ... 
                    fileStatus = null;
                }

                if (fileStatus == null) {
                    LOG.info("Queueing Item:" + metadataFile);
                    ++_totalQueuedItemsCount;
                    _downloader.fetchItem(metadataFile.getKey());
                } else {
                    LOG.info("Skipping Already Download Item:" + metadataFile + " Found at:" + finalPath);
                }
            }
        }
    }
    LOG.info("Waiting for shutdown event");
    _downloader.waitForCompletion();
}