Example usage for com.amazonaws.services.s3 AmazonS3Client listObjects

List of usage examples for com.amazonaws.services.s3 AmazonS3Client listObjects

Introduction

In this page you can find the example usage for com.amazonaws.services.s3 AmazonS3Client listObjects.

Prototype

@Override
    public ObjectListing listObjects(ListObjectsRequest listObjectsRequest)
            throws SdkClientException, AmazonServiceException 

Source Link

Usage

From source file:org.commoncrawl.service.parser.ec2.EC2ParserMaster.java

License:Open Source License

private boolean doScan(boolean initialScan) throws IOException {
    try {/*from w w w  .ja v  a 2  s . c o m*/
        LOG.info("Scanner Thread Starting");
        AmazonS3Client s3Client = new AmazonS3Client(new BasicAWSCredentials(s3AccessKeyId, s3SecretKey));

        ObjectListing response = s3Client.listObjects(new ListObjectsRequest()
                .withBucketName("aws-publicdatasets").withPrefix(CC_BUCKET_ROOT + CC_CRAWLLOG_SOURCE));

        do {

            LOG.info("Response Key Count:" + response.getObjectSummaries().size());

            for (S3ObjectSummary entry : response.getObjectSummaries()) {

                Matcher matcher = crawlLogPattern.matcher(entry.getKey());
                if (matcher.matches()) {
                    ParseCandidate candidate = ParseCandidate.candidateFromBucketEntry(entry.getKey());
                    if (candidate == null) {
                        LOG.error("Failed to Parse Candidate for:" + entry.getKey());
                    } else {
                        LOG.info("Candidate is:" + candidate);
                        synchronized (this) {
                            if (_complete.contains(candidate._crawlLogName)) {
                                LOG.info("Skipping completed Candidate:" + candidate);
                            } else {
                                if (!_candidates.containsEntry(candidate._timestamp, candidate)
                                        && !_active.containsKey(candidate)) {
                                    // update candidate size here ... 
                                    candidate._size = entry.getSize();
                                    LOG.info("New Candidate:" + candidate._crawlLogName + " Found");
                                    _candidates.put(candidate._timestamp, candidate);
                                } else {
                                    LOG.info("Skipping Existing Candidate:" + candidate._crawlLogName);
                                }
                            }
                        }
                    }
                }
            }

            if (response.isTruncated()) {
                response = s3Client.listNextBatchOfObjects(response);
            } else {
                break;
            }
        } while (!shutdownFlag.get());

        if (initialScan) {
            // search for completions 
            synchronized (this) {
                scanForCompletions();
            }
        }

        return true;
    } catch (IOException e) {
        LOG.error(CCStringUtils.stringifyException(e));
        return false;
    }
}

From source file:org.commoncrawl.service.parser.ec2.EC2ParserMaster.java

License:Open Source License

public void scanForCompletions() throws IOException {
    AmazonS3Client s3Client = new AmazonS3Client(new BasicAWSCredentials(s3AccessKeyId, s3SecretKey));

    ObjectListing response = s3Client.listObjects(new ListObjectsRequest().withBucketName("aws-publicdatasets")
            .withPrefix(CC_BUCKET_ROOT + CC_PARSER_INTERMEDIATE));

    do {/*from ww w .  ja va  2  s. c  o m*/

        LOG.info("Response Key Count:" + response.getObjectSummaries().size());

        for (S3ObjectSummary entry : response.getObjectSummaries()) {
            Matcher matcher = doneFilePattern.matcher(entry.getKey());
            if (matcher.matches()) {
                ParseCandidate candidate = ParseCandidate.candidateFromBucketEntry(entry.getKey());
                if (candidate == null) {
                    LOG.error("Failed to Parse Candidate for:" + entry.getKey());
                } else {
                    long partialTimestamp = Long.parseLong(matcher.group(2));
                    long position = Long.parseLong(matcher.group(3));
                    LOG.info("Found completion for Log:" + candidate._crawlLogName + " TS:" + partialTimestamp
                            + " Pos:" + position);
                    candidate._lastValidPos = position;

                    // ok lookup existing entry if present ... 
                    ParseCandidate existingCandidate = Iterables.find(_candidates.get(candidate._timestamp),
                            Predicates.equalTo(candidate));
                    // if existing candidate found 
                    if (existingCandidate != null) {
                        LOG.info("Found existing candidate with last pos:" + existingCandidate._lastValidPos);
                        if (candidate._lastValidPos > existingCandidate._lastValidPos) {
                            existingCandidate._lastValidPos = candidate._lastValidPos;
                            if (candidate._lastValidPos == candidate._size) {
                                LOG.info("Found last pos == size for candidate:" + candidate._crawlLogName
                                        + ".REMOVING FROM ACTIVE - MOVING TO COMPLETE");
                                _candidates.remove(candidate._timestamp, candidate);
                                _complete.add(candidate._crawlLogName);
                            }
                        }
                    } else {
                        LOG.info("Skipping Completion for CrawlLog:" + candidate._crawlLogName
                                + " because existing candidate was not found.");
                    }
                }
            }
        }
        if (response.isTruncated()) {
            response = s3Client.listNextBatchOfObjects(response);
        } else {
            break;
        }
    } while (true);
}

From source file:org.commoncrawl.util.EC2MetadataTransferUtil.java

License:Open Source License

public static List<S3ObjectSummary> getMetadataPaths(String s3AccessKeyId, String s3SecretKey,
        String bucketName, String segmentPath) throws IOException {

    AmazonS3Client s3Client = new AmazonS3Client(new BasicAWSCredentials(s3AccessKeyId, s3SecretKey));

    ImmutableList.Builder<S3ObjectSummary> listBuilder = new ImmutableList.Builder<S3ObjectSummary>();

    String metadataFilterKey = segmentPath + "metadata-";
    LOG.info("Prefix Search Key is:" + metadataFilterKey);

    ObjectListing response = s3Client
            .listObjects(new ListObjectsRequest().withBucketName(bucketName).withPrefix(metadataFilterKey));

    do {//w ww  .  j a v  a  2 s  . c om
        LOG.info("Response Key Count:" + response.getObjectSummaries().size());

        for (S3ObjectSummary entry : response.getObjectSummaries()) {
            listBuilder.add(entry);
        }

        if (response.isTruncated()) {
            response = s3Client.listNextBatchOfObjects(response);
        } else {
            break;
        }
    } while (true);

    return listBuilder.build();
}

From source file:org.commoncrawl.util.S3BulkTransferUtil.java

License:Open Source License

public static List<S3ObjectSummary> getPaths(String s3AccessKeyId, String s3SecretKey, String bucketName,
        String segmentPath) throws IOException {

    AmazonS3Client s3Client = new AmazonS3Client(new BasicAWSCredentials(s3AccessKeyId, s3SecretKey));

    ImmutableList.Builder<S3ObjectSummary> listBuilder = new ImmutableList.Builder<S3ObjectSummary>();

    ObjectListing response = s3Client
            .listObjects(new ListObjectsRequest().withBucketName(bucketName).withPrefix(segmentPath));

    do {/*from  www  .java 2  s  .c o m*/
        LOG.info("Response Key Count:" + response.getObjectSummaries().size());

        for (S3ObjectSummary entry : response.getObjectSummaries()) {
            listBuilder.add(entry);
        }

        if (response.isTruncated()) {
            response = s3Client.listNextBatchOfObjects(response);
        } else {
            break;
        }
    } while (true);

    return listBuilder.build();
}

From source file:org.finra.dm.dao.impl.S3OperationsImpl.java

License:Apache License

/**
 * {@inheritDoc}/*from ww w .  java2s  .  c o  m*/
 */
@Override
public ObjectListing listObjects(ListObjectsRequest listObjectsRequest, AmazonS3Client s3Client) {
    return s3Client.listObjects(listObjectsRequest);
}

From source file:org.opendedup.sdfs.filestore.cloud.BatchAwsS3ChunkStore.java

License:Open Source License

public static void deleteBucket(String bucketName, String awsAccessKey, String awsSecretKey) {
    try {/*from   w  w w  .j av  a2 s.  c o m*/
        System.out.println("");
        System.out.print("Deleting Bucket [" + bucketName + "]");
        AWSCredentials bawsCredentials = new BasicAWSCredentials(awsAccessKey, awsSecretKey);
        AmazonS3Client bs3Service = new AmazonS3Client(bawsCredentials);
        ObjectListing ls = bs3Service.listObjects(bucketName);
        for (S3ObjectSummary objectSummary : ls.getObjectSummaries()) {
            bs3Service.deleteObject(bucketName, objectSummary.getKey());
            System.out.print(".");
        }
        bs3Service.deleteBucket(bucketName);
        SDFSLogger.getLog().info("Bucket [" + bucketName + "] deleted");
        System.out.println("Bucket [" + bucketName + "] deleted");
    } catch (Exception e) {
        SDFSLogger.getLog().warn("Unable to delete bucket " + bucketName, e);
    }
}

From source file:org.openflamingo.fs.s3.S3Utils.java

License:Apache License

/**
 * Object  .//from w  w  w  .j  a  v  a2 s  .  co  m
 *
 * @param client     Amazon S3 Client
 * @param bucketName Bucket Name
 */
public static Map<String, String> getDirectory(AmazonS3Client client, String bucketName, String objectKey) {
    S3Object object = client.getObject(bucketName, objectKey);
    ObjectMetadata objectMetadata = object.getObjectMetadata();

    List<FileInfo> filesList = new ArrayList<FileInfo>();
    ListObjectsRequest listObjectsRequest = new ListObjectsRequest().withBucketName(object.getBucketName())
            .withPrefix(objectKey).withDelimiter("/");

    ObjectListing objectListing = null;

    do {
        objectListing = client.listObjects(listObjectsRequest);
        List<String> commonPrefixes = objectListing.getCommonPrefixes();
        List<S3ObjectSummary> summary = objectListing.getObjectSummaries();
        listObjectsRequest.setMarker(objectListing.getNextMarker());
    } while (objectListing.isTruncated());

    Map<String, String> map = new HashMap<String, String>();

    map.put("bucketName", object.getBucketName());
    map.put("name", object.getKey());
    map.put("redirectionLocation", object.getRedirectLocation());

    setValue("version", objectMetadata.getVersionId(), map);
    setValue("contentDisposition", objectMetadata.getContentDisposition(), map);
    setValue("contentType", objectMetadata.getContentType(), map);
    setValue("etag", objectMetadata.getETag(), map);
    setValue("contentEncoding", objectMetadata.getContentEncoding(), map);
    setValue("contentLength", objectMetadata.getContentLength(), map);
    setValue("lastModified", objectMetadata.getLastModified(), map);
    return null;
}

From source file:org.zalando.stups.fullstop.controller.S3Controller.java

License:Apache License

@RequestMapping(method = RequestMethod.GET, value = "/download")
public void downloadFiles(@RequestParam(value = "bucket") final String bucket,
        @RequestParam(value = "location") final String location, @RequestParam(value = "page") final int page) {

    try {//  w  w  w.ja  v a2  s  .co  m
        log.info("Creating fullstop directory here: {}", fullstopLoggingDir);

        boolean mkdirs = new File(fullstopLoggingDir).mkdirs();
    } catch (SecurityException e) {
        // do nothing
    }

    AmazonS3Client amazonS3Client = new AmazonS3Client();
    amazonS3Client.setRegion(Region.getRegion(Regions
            .fromName((String) cloudTrailProcessingLibraryProperties.getAsProperties().get(S3_REGION_KEY))));

    ListObjectsRequest listObjectsRequest = new ListObjectsRequest().withBucketName(bucket) //
            .withPrefix(location) //
            .withMaxKeys(page);

    ObjectListing objectListing = amazonS3Client.listObjects(listObjectsRequest);

    final List<S3ObjectSummary> s3ObjectSummaries = objectListing.getObjectSummaries();

    while (objectListing.isTruncated()) {

        objectListing = amazonS3Client.listNextBatchOfObjects(objectListing);
        s3ObjectSummaries.addAll(objectListing.getObjectSummaries());

    }

    for (S3ObjectSummary s3ObjectSummary : s3ObjectSummaries) {
        String bucketName = s3ObjectSummary.getBucketName();
        String key = s3ObjectSummary.getKey();

        S3Object object = amazonS3Client.getObject(new GetObjectRequest(bucketName, key));
        InputStream inputStream = object.getObjectContent();

        File file = new File(fullstopLoggingDir,
                object.getBucketName() + object.getObjectMetadata().getETag() + JSON_GZ);

        copyInputStreamToFile(inputStream, file);
        log.info("File saved here: {}", file.getAbsolutePath());

    }
}

From source file:org.zalando.stups.fullstop.plugin.SaveSecurityGroupsPlugin.java

License:Apache License

private List<String> listS3Objects(String bucketName, String prefix) {
    final List<String> commonPrefixes = Lists.newArrayList();

    AmazonS3Client s3client = new AmazonS3Client();

    try {/*  w  w  w .  ja  v a2s.  co m*/
        System.out.println("Listing objects");

        ListObjectsRequest listObjectsRequest = new ListObjectsRequest().withDelimiter("/")
                .withBucketName(bucketName).withPrefix(prefix);

        ObjectListing objectListing;

        do {
            objectListing = s3client.listObjects(listObjectsRequest);
            commonPrefixes.addAll(objectListing.getCommonPrefixes());
            for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) {
                System.out.println(
                        " - " + objectSummary.getKey() + "  " + "(size = " + objectSummary.getSize() + ")");
            }
            listObjectsRequest.setMarker(objectListing.getNextMarker());
        } while (objectListing.isTruncated());

    } catch (AmazonServiceException ase) {
        System.out.println("Caught an AmazonServiceException, " + "which means your request made it "
                + "to Amazon S3, but was rejected with an error response " + "for some reason.");
        System.out.println("Error Message:    " + ase.getMessage());
        System.out.println("HTTP Status Code: " + ase.getStatusCode());
        System.out.println("AWS Error Code:   " + ase.getErrorCode());
        System.out.println("Error Type:       " + ase.getErrorType());
        System.out.println("Request ID:       " + ase.getRequestId());
    } catch (AmazonClientException ace) {
        System.out.println("Caught an AmazonClientException, " + "which means the client encountered "
                + "an internal error while trying to communicate" + " with S3, "
                + "such as not being able to access the network.");
        System.out.println("Error Message: " + ace.getMessage());
    }

    return commonPrefixes;
}