Example usage for com.amazonaws.services.s3.model S3ObjectSummary getLastModified

List of usage examples for com.amazonaws.services.s3.model S3ObjectSummary getLastModified

Introduction

In this page you can find the example usage for com.amazonaws.services.s3.model S3ObjectSummary getLastModified.

Prototype

public Date getLastModified() 

Source Link

Document

Gets the date when, according to Amazon S3, this object was last modified.

Usage

From source file:com.streamsets.pipeline.stage.origin.s3.AmazonS3Runnable.java

License:Apache License

private S3Offset fetchNextObjectFromSpooler(S3Offset s3Offset, BatchContext batchContext)
        throws StageException {
    setCurrentObject(null);//from w  w w  . j ava 2s.c  o m
    try {
        //The next object found in queue is mostly eligible since we process objects in chronological order.

        //However after processing a few files, if the configuration is changed [say relax the prefix] and an older file
        //gets selected for processing, it must be ignored.
        S3ObjectSummary nextAvailObj = null;
        do {
            if (nextAvailObj != null) {
                LOG.warn("Ignoring object '{}' in spool directory as is lesser than offset object '{}'",
                        nextAvailObj.getKey(), s3Offset.getKey());
            }
            nextAvailObj = spooler.poolForObject(amazonS3Source, s3ConfigBean.basicConfig.maxWaitTime,
                    TimeUnit.MILLISECONDS, batchContext);
        } while (!isEligible(nextAvailObj, s3Offset));

        if (nextAvailObj == null) {
            // no object to process
            LOG.debug("No new object available in spool queue after '{}' secs, producing empty batch",
                    s3ConfigBean.basicConfig.maxWaitTime / 1000);
        } else {
            setCurrentObject(nextAvailObj);

            // if the current offset object is null or the object returned by the spooler is greater than the current offset
            // object we take the object returned by the spooler as the new object and set the offset to zero.
            // if not, it means the spooler returned us the current object, we just keep processing it from the last
            // offset we processed (known via offset tracking)
            if (s3Offset.getKey() == null || s3Offset.getKey().equals(S3Constants.EMPTY)
                    || isLaterThan(nextAvailObj.getKey(), nextAvailObj.getLastModified().getTime(),
                            s3Offset.getKey(), Long.parseLong(s3Offset.getTimestamp()))) {
                s3Offset = new S3Offset(getCurrentObject().getKey(), S3Constants.ZERO,
                        getCurrentObject().getETag(),
                        String.valueOf(getCurrentObject().getLastModified().getTime()));
            }
        }
    } catch (InterruptedException ex) {
        // the spooler was interrupted while waiting for an object, we log and return, the pipeline agent will invoke us
        // again to wait for an object again
        LOG.warn("Pooling interrupted");
    } catch (AmazonClientException e) {
        throw new StageException(Errors.S3_SPOOLDIR_23, e.toString(), e);
    }
    return s3Offset;
}

From source file:com.streamsets.pipeline.stage.origin.s3.AmazonS3Runnable.java

License:Apache License

private boolean isEligible(S3ObjectSummary nextAvailObj, S3Offset s3Offset) {

    ObjectOrdering objectOrdering = s3ConfigBean.s3FileConfig.objectOrdering;
    switch (objectOrdering) {
    case TIMESTAMP:
        return nextAvailObj == null || s3Offset == null
                || nextAvailObj.getLastModified().getTime() >= Long.parseLong(s3Offset.getTimestamp());
    case LEXICOGRAPHICAL:
        return nextAvailObj == null || s3Offset == null || s3Offset.getKey() == null
                || s3Offset.getKey().equals(S3Constants.EMPTY)
                || nextAvailObj.getKey().compareTo(s3Offset.getKey()) > 0;
    default://from   w w  w.j a v a 2s .c o  m
        throw new IllegalArgumentException("Unknown ordering: " + objectOrdering.getLabel());
    }
}

From source file:com.streamsets.pipeline.stage.origin.s3.AmazonS3Source.java

License:Apache License

private S3Offset fetchNextObjectFromSpooler(S3Offset s3Offset) throws StageException {
    setCurrentObject(null);/*w  ww .  j ava 2  s . com*/
    try {
        //The next object found in queue is mostly eligible since we process objects in chronological order.

        //However after processing a few files, if the configuration is changed [say relax the prefix] and an older file
        //gets selected for processing, it must be ignored.
        S3ObjectSummary nextAvailObj = null;
        do {
            if (nextAvailObj != null) {
                LOG.warn("Ignoring object '{}' in spool directory as is lesser than offset object '{}'",
                        nextAvailObj.getKey(), s3Offset.getKey());
            }
            nextAvailObj = spooler.poolForObject(s3Offset, s3ConfigBean.basicConfig.maxWaitTime,
                    TimeUnit.MILLISECONDS);
        } while (!isEligible(nextAvailObj, s3Offset));

        if (nextAvailObj == null) {
            // no object to process
            LOG.debug("No new object available in spool directory after '{}' secs, producing empty batch",
                    s3ConfigBean.basicConfig.maxWaitTime / 1000);
        } else {
            setCurrentObject(nextAvailObj);

            // if the current offset object is null or the object returned by the spooler is greater than the current offset
            // object we take the object returned by the spooler as the new object and set the offset to zero.
            // if not, it means the spooler returned us the current object, we just keep processing it from the last
            // offset we processed (known via offset tracking)
            if (s3Offset.getKey() == null
                    || isLaterThan(nextAvailObj.getKey(), nextAvailObj.getLastModified().getTime(),
                            s3Offset.getKey(), Long.parseLong(s3Offset.getTimestamp()))) {
                s3Offset = new S3Offset(getCurrentObject().getKey(), ZERO, getCurrentObject().getETag(),
                        String.valueOf(getCurrentObject().getLastModified().getTime()));
            }
        }
    } catch (InterruptedException ex) {
        // the spooler was interrupted while waiting for an object, we log and return, the pipeline agent will invoke us
        // again to wait for an object again
        LOG.warn("Pooling interrupted");
    } catch (AmazonClientException e) {
        throw new StageException(Errors.S3_SPOOLDIR_23, e.toString());
    }
    return s3Offset;
}

From source file:com.streamsets.pipeline.stage.origin.s3.AmazonS3Util.java

License:Apache License

/**
 * Lists objects from AmazonS3 in chronological order [lexicographical order if 2 files have same timestamp] which are
 * later than or equal to the timestamp of the previous offset object
 *
 * @param s3Client/*from ww  w  . ja  v  a 2 s.  com*/
 * @param s3ConfigBean
 * @param pathMatcher glob patterns to match file name against
 * @param s3Offset current offset which provides the timestamp of the previous object
 * @param fetchSize number of objects to fetch in one go
 * @return
 * @throws AmazonClientException
 */
static List<S3ObjectSummary> listObjectsChronologically(AmazonS3Client s3Client, S3ConfigBean s3ConfigBean,
        PathMatcher pathMatcher, AmazonS3Source.S3Offset s3Offset, int fetchSize) throws AmazonClientException {

    //Algorithm:
    // - Full scan all objects that match the file name pattern and which are later than the file in the offset
    // - Select the oldest "fetchSize" number of files and return them.
    TreeSet<S3ObjectSummary> treeSet = new TreeSet<>(new Comparator<S3ObjectSummary>() {
        @Override
        public int compare(S3ObjectSummary o1, S3ObjectSummary o2) {
            int result = o1.getLastModified().compareTo(o2.getLastModified());
            if (result != 0) {
                //same modified time. Use name to sort
                return result;
            }
            return o1.getKey().compareTo(o2.getKey());
        }
    });

    S3Objects s3ObjectSummaries = S3Objects
            .withPrefix(s3Client, s3ConfigBean.s3Config.bucket, s3ConfigBean.s3Config.folder)
            .withBatchSize(BATCH_SIZE);
    for (S3ObjectSummary s : s3ObjectSummaries) {
        String fileName = s.getKey().substring(s3ConfigBean.s3Config.folder.length(), s.getKey().length());
        if (!fileName.isEmpty()) {
            //fileName can be empty.
            //If the user manually creates a folder "myFolder/mySubFolder" in bucket "myBucket" and uploads "myObject",
            // then the first objects returned here are:
            // myFolder/mySubFolder
            // myFolder/mySubFolder/myObject
            //
            // All is good when pipeline is run but preview returns with no data. So we should ignore the empty file as it
            // has no data
            if (pathMatcher.matches(Paths.get(fileName)) && isEligible(s, s3Offset)) {
                treeSet.add(s);
            }
            if (treeSet.size() > fetchSize) {
                treeSet.pollLast();
            }
        }
    }

    return new ArrayList<>(treeSet);
}

From source file:com.streamsets.pipeline.stage.origin.s3.AmazonS3Util.java

License:Apache License

private static boolean isEligible(S3ObjectSummary s, AmazonS3Source.S3Offset s3Offset) {

    //The object is eligible if
    //1. The timestamp is greater than that of the current object in offset
    //2. The timestamp is same but the name is lexicographically greater than the current object [can happen when multiple objects are uploaded in one go]
    //3. Same timestamp, same name [same as the current object in offset], eligible if it was not completely processed [offset != -1]

    boolean isEligible = false;
    if (s.getLastModified().compareTo(new Date(Long.parseLong(s3Offset.getTimestamp()))) > 0) {
        isEligible = true;//from  w ww . jav  a  2s.  c om
    } else if (s.getLastModified().compareTo(new Date(Long.parseLong(s3Offset.getTimestamp()))) == 0) {
        //same timestamp
        //compare names
        if (s.getKey().compareTo(s3Offset.getKey()) > 0) {
            isEligible = true;
        } else if (s.getKey().compareTo(s3Offset.getKey()) == 0) {
            //same time stamp, same name
            //If the current offset is not -1, return the file. It means the previous file was partially processed.
            if (Long.parseLong(s3Offset.getOffset()) != -1) {
                isEligible = true;
            }
        }
    }
    return isEligible;
}

From source file:com.streamsets.pipeline.stage.origin.s3.S3Spooler.java

License:Apache License

void addObjectToQueue(S3ObjectSummary objectSummary, boolean checkCurrent) {
    Preconditions.checkNotNull(objectSummary, "file cannot be null");
    if (checkCurrent) {
        Preconditions.checkState(currentObject == null
                || currentObject.getLastModified().compareTo(objectSummary.getLastModified()) < 0);
    }// w w w .j a v  a2s .  com
    if (!objectQueue.contains(objectSummary)) {
        if (objectQueue.size() >= MAX_SPOOL_SIZE) {
            LOG.warn("Exceeded '{}' of queued files", objectQueue.size());
        }
        objectQueue.add(objectSummary);
        spoolQueueMeter.mark(objectQueue.size());
    } else {
        LOG.warn("Object '{}' already in queue, ignoring", objectSummary.getKey());
    }
}

From source file:com.streamsets.pipeline.stage.origin.s3.S3Spooler.java

License:Apache License

public void postProcessOlderObjectIfNeeded(AmazonS3Source.S3Offset s3Offset) {
    //If sdc was shutdown after reading an object but before post processing it, handle it now.

    //The scenario is detected as follows:
    //  1. the current key must not be null
    //  2. offset must be -1
    //  3. An object with same key must exist in s3
    //  4. The timestamp of the object ins3 must be same as that of the timestamp in offset [It is possible that one
    //    uploads another object with the same name. We can avoid post processing it without producing records by
    //    comparing the timestamp on that object

    if (s3Offset.getKey() != null && "-1".equals(s3Offset.getOffset())) {
        //conditions 1, 2 are met. Check for 3 and 4.
        S3ObjectSummary objectSummary = AmazonS3Util.getObjectSummary(s3Client, s3ConfigBean.s3Config.bucket,
                s3Offset.getKey());//w  ww.j  a  v a 2  s .  c o  m
        if (objectSummary != null && objectSummary.getLastModified()
                .compareTo(new Date(Long.parseLong(s3Offset.getTimestamp()))) == 0) {
            postProcessOrErrorHandle(s3Offset.getKey(), s3ConfigBean.postProcessingConfig.postProcessing,
                    s3ConfigBean.postProcessingConfig.postProcessBucket,
                    s3ConfigBean.postProcessingConfig.postProcessFolder,
                    s3ConfigBean.postProcessingConfig.archivingOption);
        }
    }
    currentObject = null;
}

From source file:com.upplication.s3fs.S3FileSystemProvider.java

License:Open Source License

@Override
public <A extends BasicFileAttributes> A readAttributes(Path path, Class<A> type, LinkOption... options)
        throws IOException {
    Preconditions.checkArgument(path instanceof S3Path, "path must be an instance of %s",
            S3Path.class.getName());
    S3Path s3Path = (S3Path) path;

    if (type == BasicFileAttributes.class) {

        S3ObjectSummary objectSummary = s3ObjectSummaryLookup.lookup(s3Path);

        // parse the data to BasicFileAttributes.
        FileTime lastModifiedTime = null;
        if (objectSummary.getLastModified() != null) {
            lastModifiedTime = FileTime.from(objectSummary.getLastModified().getTime(), TimeUnit.MILLISECONDS);
        }//  w ww. ja va2s .co  m

        long size = objectSummary.getSize();
        boolean directory = false;
        boolean regularFile = false;
        String key = objectSummary.getKey();
        // check if is a directory and exists the key of this directory at amazon s3
        if (objectSummary.getKey().equals(s3Path.getKey() + "/") && objectSummary.getKey().endsWith("/")) {
            directory = true;
        }
        // is a directory but not exists at amazon s3
        else if ((!objectSummary.getKey().equals(s3Path.getKey()) || "".equals(s3Path.getKey()))
                && objectSummary.getKey().startsWith(s3Path.getKey())) {
            directory = true;
            // no metadata, we fake one
            size = 0;
            // delete extra part
            key = s3Path.getKey() + "/";
        }
        // is a file:
        else {
            regularFile = true;
        }

        return type.cast(new S3FileAttributes(key, lastModifiedTime, size, directory, regularFile));
    }

    // not support attribute class
    throw new UnsupportedOperationException(format("only %s supported", BasicFileAttributes.class));
}

From source file:com.yahoo.athenz.zts.store.impl.S3ChangeLogStore.java

License:Apache License

/**
 * list the objects in the zts bucket. If the mod time is specified as 0
 * then we want to list all objects otherwise, we only list objects
 * that are newer than the specified timestamp
 * @param s3 AWS S3 client object/*from w w w . j  a  v a 2  s.co m*/
 * @param domains collection to be updated to include domain names
 * @param modTime only include domains newer than this timestamp
 */
void listObjects(AmazonS3 s3, Collection<String> domains, long modTime) {

    if (LOGGER.isDebugEnabled()) {
        LOGGER.debug("listObjects: Retrieving domains from {} with mod time > {}", s3BucketName, modTime);
    }

    ObjectListing objectListing = s3.listObjects(new ListObjectsRequest().withBucketName(s3BucketName));

    String objectName;
    while (objectListing != null) {

        // process each entry in our result set and add the domain
        // name to our return list

        final List<S3ObjectSummary> objectSummaries = objectListing.getObjectSummaries();
        boolean listTruncated = objectListing.isTruncated();

        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("listObjects: retrieved {} objects, more objects available - {}",
                    objectSummaries.size(), listTruncated);
        }

        for (S3ObjectSummary objectSummary : objectSummaries) {

            // if mod time is specified then make sure we automatically skip
            // any domains older than the specified value

            if (modTime > 0 && objectSummary.getLastModified().getTime() <= modTime) {
                continue;
            }

            // for now skip any folders/objects that start with '.'

            objectName = objectSummary.getKey();
            if (objectName.charAt(0) == '.') {
                continue;
            }
            domains.add(objectName);
        }

        // check if the object listing is truncated or not (break out in this case)
        // technically we can skip this call and just call listNextBatchOfResults
        // since that returns null if the object listing is not truncated but 
        // this direct check here makes the logic easier to follow

        if (!listTruncated) {
            break;
        }

        objectListing = s3.listNextBatchOfObjects(objectListing);
    }
}

From source file:com.yahoo.athenz.zts.store.s3.S3ChangeLogStore.java

License:Apache License

/**
 * list the objects in the zts bucket. If te mod time is specified as 0
 * then we want to list all objects otherwise, we only list objects
 * that are newer than the specified timestamp
 * @param s3 AWS S3 client object/*from w  ww .  j  ava2  s. c  om*/
 * @param domains collection to be updated to include domain names
 * @param modTime only include domains newer than this timestamp
 */
void listObjects(AmazonS3 s3, Collection<String> domains, long modTime) {

    ObjectListing objectListing = s3.listObjects(new ListObjectsRequest().withBucketName(s3BucketName));

    String objectName = null;
    while (objectListing != null) {

        // process each entry in our result set and add the domain
        // name to our return list

        for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) {

            // if mod time is specified then make sure we automatically skip
            // any domains older than the specified value

            if (modTime > 0 && objectSummary.getLastModified().getTime() <= modTime) {
                continue;
            }

            // for now skip any folders/objects that start with '.'

            objectName = objectSummary.getKey();
            if (objectName.charAt(0) == '.') {
                continue;
            }
            domains.add(objectName);
        }

        // check if the object listing is truncated or not (break out in this case)
        // technically we can skip this call and just call listNextBatchOfResults
        // since that returns null if the object listing is not truncated but 
        // this direct check here makes the logic easier to follow

        if (!objectListing.isTruncated()) {
            break;
        }

        objectListing = s3.listNextBatchOfObjects(objectListing);
    }
}