com.epam.dlab.module.aws.S3FileList.java Source code

Java tutorial

Introduction

Here is the source code for com.epam.dlab.module.aws.S3FileList.java

Source

/***************************************************************************
    
 Copyright (c) 2016, EPAM SYSTEMS INC
    
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    
 http://www.apache.org/licenses/LICENSE-2.0
    
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
    
 ****************************************************************************/

package com.epam.dlab.module.aws;

import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.model.ListObjectsV2Request;
import com.amazonaws.services.s3.model.ListObjectsV2Result;
import com.amazonaws.services.s3.model.S3ObjectSummary;
import com.epam.dlab.core.ModuleData;
import com.epam.dlab.exceptions.AdapterException;
import org.apache.commons.lang3.StringUtils;

import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static java.util.stream.Collectors.mapping;
import static java.util.stream.Collectors.toList;

/**
 * Create a file listing of reports from AWS bucket.
 * See details in
 * <a href="http://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/billing-reports.html#detailed-report-with-resources-tags">
 * Detailed billing report with resources and tags</a>.
 */
public class S3FileList {

    /**
     * Report suffix without date.
     */
    private static final String REPORT_SUFIX = ".csv.zip";
    /**
     * Date regex for YYYYMMDD
     */
    private static final String DATE_REGEX = "\\d{4}(0?[1-9]|1[012])(0?[1-9]|[12][0-9]|3[01])";
    private static final String REGEX = String.format("(^.*/.*/%s-%s)/.*/*.\\%s", DATE_REGEX, DATE_REGEX,
            REPORT_SUFIX);

    /**
     * Bucket name.
     */
    private final String bucket;

    /**
     * Name of last file which is loaded or <b>null</b> for loading all files in bucket folder.
     */
    private final ModuleData moduleData;
    private final Pattern reportPattern;
    private final boolean awsJobEnabled;

    /**
     * Instantiate file find class.
     *
     * @param awsJobEnabled
     * @param bucket        the name of bucket.
     * @param moduleData    data for working module
     */
    public S3FileList(boolean awsJobEnabled, String bucket, ModuleData moduleData) {
        this.bucket = bucket;
        this.moduleData = moduleData;
        this.awsJobEnabled = awsJobEnabled;
        this.reportPattern = this.awsJobEnabled ? Pattern.compile(REGEX)
                : Pattern.compile(".*" + REPORT_SUFIX + "$");
    }

    /**
     * Return the list of files for new reports.
     *
     * @param s3Client the S3 client.
     * @return the list of files.
     * @throws AdapterException
     */
    public List<String> getFiles(AmazonS3 s3Client) throws AdapterException {
        final List<S3ObjectSummary> objectSummaries = reportFilesInBillingBucket(s3Client);
        return awsJobEnabled ? lastFilesPerBillingPeriod(objectSummaries)
                : objectSummaries.stream().map(S3ObjectSummary::getKey).sorted().collect(toList());
    }

    private List<S3ObjectSummary> reportFilesInBillingBucket(AmazonS3 s3Client) throws AdapterException {
        ListObjectsV2Request request = new ListObjectsV2Request().withBucketName(bucket);
        ListObjectsV2Result result;
        List<S3ObjectSummary> objectSummaries = new ArrayList<>();
        try {
            do {
                result = s3Client.listObjectsV2(request);
                objectSummaries.addAll(notProcessedFiles(result));
            } while (result.isTruncated());
        } catch (Exception e) {
            throw new AdapterException(
                    "Cannot get the file listing of bucket \"" + bucket + "*\". " + e.getLocalizedMessage(), e);
        }
        return objectSummaries;
    }

    private List<S3ObjectSummary> notProcessedFiles(ListObjectsV2Result result) {
        return result.getObjectSummaries().stream().filter(this::matchBillingRegexAndWasNotProcessed)
                .collect(toList());
    }

    private boolean matchBillingRegexAndWasNotProcessed(S3ObjectSummary o) {
        return reportPattern.matcher(o.getKey()).matches()
                && !moduleData.wasProcessed(o.getKey(), o.getLastModified(), extractDatePrefix(reportPattern, o));
    }

    /**
     * Returns list of files that per billing period
     * For particular billing period file with the biggest modification date will be returned
     *
     * @param objectSummaries amazon s3 objects
     * @return list of file names
     */
    protected List<String> lastFilesPerBillingPeriod(List<S3ObjectSummary> objectSummaries) {
        final Map<String, List<S3ObjectSummary>> months = objectSummaries.stream().collect(
                Collectors.groupingBy(o -> extractDatePrefix(reportPattern, o), mapping(o -> o, toList())));

        return months.entrySet().stream().flatMap(this::lastFileForBillingPeriod).sorted()
                .collect(Collectors.toList());
    }

    private Stream<? extends String> lastFileForBillingPeriod(Map.Entry<String, List<S3ObjectSummary>> entry) {
        final List<S3ObjectSummary> assemblyIds = entry.getValue();
        final S3ObjectSummary lastBillingFile = assemblyIds.stream()
                .max(Comparator.comparing(S3ObjectSummary::getLastModified))
                .orElseThrow(() -> new IllegalStateException("AssemblyId does not contains any file"));
        return assemblyIds.stream()
                .filter(s -> s.getKey().startsWith(StringUtils.substringBeforeLast(lastBillingFile.getKey(), "/")))
                .map(S3ObjectSummary::getKey);
    }

    private String extractDatePrefix(Pattern pattern, S3ObjectSummary o) {
        final String key = o.getKey();
        final Matcher matcher = pattern.matcher(key);
        if (matcher.find() && awsJobEnabled) {
            return matcher.group(1);
        } else {
            return key;
        }
    }
}