Example usage for com.amazonaws.services.s3.model S3ObjectSummary getKey

List of usage examples for com.amazonaws.services.s3.model S3ObjectSummary getKey

Introduction

In this page you can find the example usage for com.amazonaws.services.s3.model S3ObjectSummary getKey.

Prototype

public String getKey() 

Source Link

Document

Gets the key under which this object is stored in Amazon S3.

Usage

From source file:org.caboclo.clients.AmazonClient.java

License:Open Source License

public ArrayList<String> getAllChildren(String folderName) throws IOException {
    ListObjectsRequest listRequest = new ListObjectsRequest();
    listRequest.setBucketName(getBucketName());
    listRequest.setPrefix(folderName);//from ww w.j  a  va 2s  .  c o m

    ObjectListing listing = s3.listObjects(listRequest);

    ArrayList<String> list = new ArrayList<String>();

    System.out.println(listing.getObjectSummaries().size());

    for (S3ObjectSummary summ : listing.getObjectSummaries()) {
        list.add(summ.getKey());
    }

    return list;
}

From source file:org.caboclo.clients.AmazonClient.java

License:Open Source License

public List<String> getAllChildren(String folderName, String bucket) throws IOException {
    ListObjectsRequest listRequest = new ListObjectsRequest();
    listRequest.setBucketName(bucket);//  ww w. j  a v  a2s  .  c  o  m

    if (!(folderName == null || folderName.equals(""))) {
        listRequest.setPrefix(folderName);
    }

    ObjectListing listing = s3.listObjects(listRequest);

    ArrayList<String> list = new ArrayList<String>();

    for (S3ObjectSummary summ : listing.getObjectSummaries()) {
        list.add(summ.getKey());
    }

    return list;
}

From source file:org.caboclo.clients.AmazonClient.java

License:Open Source License

public void deleteBucketContents(String bucket) {
    ObjectListing listing = s3.listObjects(bucket);

    System.out.println(listing.getObjectSummaries().size());

    for (S3ObjectSummary summ : listing.getObjectSummaries()) {
        s3.deleteObject(bucket, summ.getKey());
    }/*  ww w  .  jav  a  2 s .c  o  m*/
}

From source file:org.caboclo.clients.AmazonClient.java

License:Open Source License

public List<String> listBucket(String bkt, String prefix, String delimiter) throws IOException {

    ListObjectsRequest listRequest = new ListObjectsRequest();
    listRequest.setBucketName(bkt);/*from  ww w. ja v a  2s  . c o m*/
    listRequest.setDelimiter(delimiter);
    listRequest.setPrefix(prefix);

    ObjectListing listing = s3.listObjects(listRequest);

    ArrayList<String> list = new ArrayList<String>();

    for (S3ObjectSummary summ : listing.getObjectSummaries()) {
        list.add(summ.getKey());
    }

    return list;
}

From source file:org.caboclo.clients.AmazonClient.java

License:Open Source License

@Override
public List<RemoteFile> getChildren(String folderName) throws IOException {
    if (!folderName.endsWith("/")) {
        folderName = folderName + "/";
    }/* w w w.  j a v a 2s .com*/

    ListObjectsRequest listRequest = new ListObjectsRequest();
    listRequest.setBucketName(getBucketName());
    listRequest.setDelimiter("/");
    listRequest.setPrefix(folderName);

    ObjectListing listing = s3.listObjects(listRequest);

    ArrayList<RemoteFile> list = new ArrayList<>();

    for (S3ObjectSummary summ : listing.getObjectSummaries()) {
        String name = summ.getKey();
        long size = summ.getSize();

        boolean isDirectory = isFolder(name);

        RemoteFile file = new RemoteFile(name, isDirectory, size);
        list.add(file);
    }

    return list;
}

From source file:org.commoncrawl.service.parser.ec2.EC2ParserMaster.java

License:Open Source License

private boolean doScan(boolean initialScan) throws IOException {
    try {/*from w  ww .  j  av  a  2  s  .  co  m*/
        LOG.info("Scanner Thread Starting");
        AmazonS3Client s3Client = new AmazonS3Client(new BasicAWSCredentials(s3AccessKeyId, s3SecretKey));

        ObjectListing response = s3Client.listObjects(new ListObjectsRequest()
                .withBucketName("aws-publicdatasets").withPrefix(CC_BUCKET_ROOT + CC_CRAWLLOG_SOURCE));

        do {

            LOG.info("Response Key Count:" + response.getObjectSummaries().size());

            for (S3ObjectSummary entry : response.getObjectSummaries()) {

                Matcher matcher = crawlLogPattern.matcher(entry.getKey());
                if (matcher.matches()) {
                    ParseCandidate candidate = ParseCandidate.candidateFromBucketEntry(entry.getKey());
                    if (candidate == null) {
                        LOG.error("Failed to Parse Candidate for:" + entry.getKey());
                    } else {
                        LOG.info("Candidate is:" + candidate);
                        synchronized (this) {
                            if (_complete.contains(candidate._crawlLogName)) {
                                LOG.info("Skipping completed Candidate:" + candidate);
                            } else {
                                if (!_candidates.containsEntry(candidate._timestamp, candidate)
                                        && !_active.containsKey(candidate)) {
                                    // update candidate size here ... 
                                    candidate._size = entry.getSize();
                                    LOG.info("New Candidate:" + candidate._crawlLogName + " Found");
                                    _candidates.put(candidate._timestamp, candidate);
                                } else {
                                    LOG.info("Skipping Existing Candidate:" + candidate._crawlLogName);
                                }
                            }
                        }
                    }
                }
            }

            if (response.isTruncated()) {
                response = s3Client.listNextBatchOfObjects(response);
            } else {
                break;
            }
        } while (!shutdownFlag.get());

        if (initialScan) {
            // search for completions 
            synchronized (this) {
                scanForCompletions();
            }
        }

        return true;
    } catch (IOException e) {
        LOG.error(CCStringUtils.stringifyException(e));
        return false;
    }
}

From source file:org.commoncrawl.service.parser.ec2.EC2ParserMaster.java

License:Open Source License

public void scanForCompletions() throws IOException {
    AmazonS3Client s3Client = new AmazonS3Client(new BasicAWSCredentials(s3AccessKeyId, s3SecretKey));

    ObjectListing response = s3Client.listObjects(new ListObjectsRequest().withBucketName("aws-publicdatasets")
            .withPrefix(CC_BUCKET_ROOT + CC_PARSER_INTERMEDIATE));

    do {//from w w w .  j a v a 2s.  c o  m

        LOG.info("Response Key Count:" + response.getObjectSummaries().size());

        for (S3ObjectSummary entry : response.getObjectSummaries()) {
            Matcher matcher = doneFilePattern.matcher(entry.getKey());
            if (matcher.matches()) {
                ParseCandidate candidate = ParseCandidate.candidateFromBucketEntry(entry.getKey());
                if (candidate == null) {
                    LOG.error("Failed to Parse Candidate for:" + entry.getKey());
                } else {
                    long partialTimestamp = Long.parseLong(matcher.group(2));
                    long position = Long.parseLong(matcher.group(3));
                    LOG.info("Found completion for Log:" + candidate._crawlLogName + " TS:" + partialTimestamp
                            + " Pos:" + position);
                    candidate._lastValidPos = position;

                    // ok lookup existing entry if present ... 
                    ParseCandidate existingCandidate = Iterables.find(_candidates.get(candidate._timestamp),
                            Predicates.equalTo(candidate));
                    // if existing candidate found 
                    if (existingCandidate != null) {
                        LOG.info("Found existing candidate with last pos:" + existingCandidate._lastValidPos);
                        if (candidate._lastValidPos > existingCandidate._lastValidPos) {
                            existingCandidate._lastValidPos = candidate._lastValidPos;
                            if (candidate._lastValidPos == candidate._size) {
                                LOG.info("Found last pos == size for candidate:" + candidate._crawlLogName
                                        + ".REMOVING FROM ACTIVE - MOVING TO COMPLETE");
                                _candidates.remove(candidate._timestamp, candidate);
                                _complete.add(candidate._crawlLogName);
                            }
                        }
                    } else {
                        LOG.info("Skipping Completion for CrawlLog:" + candidate._crawlLogName
                                + " because existing candidate was not found.");
                    }
                }
            }
        }
        if (response.isTruncated()) {
            response = s3Client.listNextBatchOfObjects(response);
        } else {
            break;
        }
    } while (true);
}

From source file:org.commoncrawl.util.EC2MetadataTransferUtil.java

License:Open Source License

EC2MetadataTransferUtil(String bucketName, JsonArray pathList) throws IOException {
    _conf = new Configuration();
    _fs = FileSystem.get(_conf);//  www  .j a  v a 2  s  .c  om
    LOG.info("Initializing Downloader");
    _downloader = new S3Downloader(bucketName, s3AccessKeyId, s3SecretKey, false);
    _downloader.setMaxParallelStreams(150);
    _downloader.initialize(this);

    LOG.info("Got JSON Array with:" + pathList.size() + " elements");
    for (int i = 0; i < pathList.size(); ++i) {
        LOG.info("Collection metadata files from path:" + pathList.get(i).toString());
        List<S3ObjectSummary> metadataFiles = getMetadataPaths(s3AccessKeyId, s3SecretKey, bucketName,
                pathList.get(i).getAsString());
        LOG.info("Got:" + metadataFiles.size() + " total files");
        for (S3ObjectSummary metadataFile : metadataFiles) {

            Matcher segmentNameMatcher = metadataInfoPattern.matcher(metadataFile.getKey());

            if (segmentNameMatcher.matches()) {

                String segmentId = segmentNameMatcher.group(1);
                String partExtension = segmentNameMatcher.group(2);
                Path finalSegmentPath = new Path(finalSegmentOutputDir, segmentId);
                Path finalPath = new Path(finalSegmentPath, "metadata-" + partExtension);

                FileStatus fileStatus = _fs.getFileStatus(finalPath);

                if (fileStatus != null && fileStatus.getLen() != metadataFile.getSize()) {
                    LOG.error("SRC-DEST SIZE MISMATCH!! SRC:" + metadataFile + " SRC-SIZE:"
                            + metadataFile.getSize() + " DEST:" + finalPath + " DEST-SIZE:"
                            + fileStatus.getLen());

                    // ok delete the destination 
                    _fs.delete(finalPath, false);
                    // null file status so that the item gets requeued ... 
                    fileStatus = null;
                }

                if (fileStatus == null) {
                    LOG.info("Queueing Item:" + metadataFile);
                    ++_totalQueuedItemsCount;
                    _downloader.fetchItem(metadataFile.getKey());
                } else {
                    LOG.info("Skipping Already Download Item:" + metadataFile + " Found at:" + finalPath);
                }
            }
        }
    }
    LOG.info("Waiting for shutdown event");
    _downloader.waitForCompletion();
}

From source file:org.commoncrawl.util.S3BulkTransferUtil.java

License:Open Source License

S3BulkTransferUtil(String bucketName, String s3AccessKeyId, String s3SecretKey, JsonArray pathList,
        final Path outputPath) throws IOException {
    _conf = new Configuration();
    _fs = FileSystem.get(_conf);//from www  . ja  v  a 2  s . c om
    LOG.info("Initializing Downloader");
    _downloader = new S3Downloader(bucketName, s3AccessKeyId, s3SecretKey, false);
    _downloader.setMaxParallelStreams(150);
    _downloader.initialize(this);

    LOG.info("Got JSON Array with:" + pathList.size() + " elements");
    for (int i = 0; i < pathList.size(); ++i) {
        LOG.info("Collecting files from path:" + pathList.get(i).toString());
        List<S3ObjectSummary> metadataFiles = getPaths(s3AccessKeyId, s3SecretKey, bucketName,
                pathList.get(i).getAsString());
        LOG.info("Got:" + metadataFiles.size() + " total files");
        for (S3ObjectSummary metadataFile : metadataFiles) {

            Path s3Path = new Path("/" + metadataFile.getKey());
            Path finalPath = new Path(outputPath, s3Path.getName());

            FileStatus fileStatus = null;
            try {
                fileStatus = _fs.getFileStatus(finalPath);
            } catch (Exception e) {

            }

            if (fileStatus != null && fileStatus.getLen() != metadataFile.getSize()) {
                LOG.error("SRC-DEST SIZE MISMATCH!! SRC:" + metadataFile + " SRC-SIZE:" + metadataFile.getSize()
                        + " DEST:" + finalPath + " DEST-SIZE:" + fileStatus.getLen());

                // ok delete the destination 
                _fs.delete(finalPath, false);
                // null file status so that the item gets requeued ... 
                fileStatus = null;
            }

            if (fileStatus == null) {
                LOG.info("Queueing Item:" + metadataFile);
                ++_totalQueuedItemsCount;
                _pathMapping.put(metadataFile.getKey(), finalPath);
                _downloader.fetchItem(metadataFile.getKey());
            } else {
                LOG.info("Skipping Already Download Item:" + metadataFile + " Found at:" + finalPath);
            }
        }
    }
    LOG.info("Waiting for shutdown event");
    _downloader.waitForCompletion();
}

From source file:org.cto.VVS3Box.S3Sample.java

License:Open Source License

public static void main(String[] args) throws IOException {
    /*/*  w w w .ja  va  2s  .c o  m*/
     * This credentials provider implementation loads your AWS credentials
     * from a properties file at the root of your classpath.
     *
     * Important: Be sure to fill in your AWS access credentials in the
     *            AwsCredentials.properties file before you try to run this
     *            sample.
     * http://aws.amazon.com/security-credentials
     */
    AmazonS3 s3 = new AmazonS3Client(new ClasspathPropertiesFileCredentialsProvider());
    Region usWest2 = Region.getRegion(Regions.US_WEST_2);
    s3.setRegion(usWest2);

    String bucketName = "lior.test-" + UUID.randomUUID();
    String key = "MyObjectKey";

    System.out.println("===========================================");
    System.out.println("Getting Started with Amazon S3");
    System.out.println("===========================================\n");

    try {
        /*
         * Create a new S3 bucket - Amazon S3 bucket names are globally unique,
         * so once a bucket name has been taken by any user, you can't create
         * another bucket with that same name.
         *
         * You can optionally specify a location for your bucket if you want to
         * keep your data closer to your applications or users.
         */
        System.out.println("Creating bucket " + bucketName + "\n");
        s3.createBucket(bucketName);

        /*
         * List the buckets in your account
         */
        System.out.println("Listing buckets");
        for (Bucket bucket : s3.listBuckets()) {
            System.out.println(" - " + bucket.getName());
        }
        System.out.println();

        /*
         * Upload an object to your bucket - You can easily upload a file to
         * S3, or upload directly an InputStream if you know the length of
         * the data in the stream. You can also specify your own metadata
         * when uploading to S3, which allows you set a variety of options
         * like content-type and content-encoding, plus additional metadata
         * specific to your applications.
         */
        System.out.println("Uploading a new object to S3 from a file\n");
        s3.putObject(new PutObjectRequest(bucketName, key, createSampleFile()));

        /*
         * Download an object - When you download an object, you get all of
         * the object's metadata and a stream from which to read the contents.
         * It's important to read the contents of the stream as quickly as
         * possibly since the data is streamed directly from Amazon S3 and your
         * network connection will remain open until you read all the data or
         * close the input stream.
         *
         * GetObjectRequest also supports several other options, including
         * conditional downloading of objects based on modification times,
         * ETags, and selectively downloading a range of an object.
         */
        System.out.println("Downloading an object");
        S3Object object = s3.getObject(new GetObjectRequest(bucketName, key));
        System.out.println("Content-Type: " + object.getObjectMetadata().getContentType());
        displayTextInputStream(object.getObjectContent());

        /*
         * List objects in your bucket by prefix - There are many options for
         * listing the objects in your bucket.  Keep in mind that buckets with
         * many objects might truncate their results when listing their objects,
         * so be sure to check if the returned object listing is truncated, and
         * use the AmazonS3.listNextBatchOfObjects(...) operation to retrieve
         * additional results.
         */
        System.out.println("Listing objects");
        ObjectListing objectListing = s3
                .listObjects(new ListObjectsRequest().withBucketName(bucketName).withPrefix("My"));
        for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) {
            System.out.println(
                    " - " + objectSummary.getKey() + "  " + "(size = " + objectSummary.getSize() + ")");
        }
        System.out.println();

        /*
         * Delete an object - Unless versioning has been turned on for your bucket,
         * there is no way to undelete an object, so use caution when deleting objects.
         */
        System.out.println("Deleting an object\n");
        s3.deleteObject(bucketName, key);

        /*
         * Delete a bucket - A bucket must be completely empty before it can be
         * deleted, so remember to delete any objects from your buckets before
         * you try to delete them.
         */
        System.out.println("Deleting bucket " + bucketName + "\n");
        s3.deleteBucket(bucketName);
    } catch (AmazonServiceException ase) {
        System.out.println("Caught an AmazonServiceException, which means your request made it "
                + "to Amazon S3, but was rejected with an error response for some reason.");
        System.out.println("Error Message:    " + ase.getMessage());
        System.out.println("HTTP Status Code: " + ase.getStatusCode());
        System.out.println("AWS Error Code:   " + ase.getErrorCode());
        System.out.println("Error Type:       " + ase.getErrorType());
        System.out.println("Request ID:       " + ase.getRequestId());
    } catch (AmazonClientException ace) {
        System.out.println("Caught an AmazonClientException, which means the client encountered "
                + "a serious internal problem while trying to communicate with S3, "
                + "such as not being able to access the network.");
        System.out.println("Error Message: " + ace.getMessage());
    }
}