org.apache.streams.s3.S3PersistWriter.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.streams.s3.S3PersistWriter.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.streams.s3;

import org.apache.streams.config.ComponentConfigurator;
import org.apache.streams.config.StreamsConfigurator;
import org.apache.streams.converter.LineReadWriteUtil;
import org.apache.streams.core.DatumStatus;
import org.apache.streams.core.DatumStatusCountable;
import org.apache.streams.core.DatumStatusCounter;
import org.apache.streams.core.StreamsDatum;
import org.apache.streams.core.StreamsPersistWriter;
import org.apache.streams.jackson.StreamsJacksonMapper;

import com.amazonaws.ClientConfiguration;
import com.amazonaws.Protocol;
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.regions.Region;
import com.amazonaws.regions.Regions;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.S3ClientOptions;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Preconditions;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.Flushable;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;

/**
 * S3PersistWriter writes documents to s3.
 */
public class S3PersistWriter implements StreamsPersistWriter, DatumStatusCountable {

    public static final String STREAMS_ID = "S3PersistWriter";

    private static final Logger LOGGER = LoggerFactory.getLogger(S3PersistWriter.class);

    private static final char DELIMITER = '\t';

    private ObjectMapper objectMapper;
    private AmazonS3Client amazonS3Client;
    private S3WriterConfiguration s3WriterConfiguration;
    private final List<String> writtenFiles = new ArrayList<>();
    protected LineReadWriteUtil lineWriterUtil;

    private final AtomicLong totalBytesWritten = new AtomicLong();
    private AtomicLong bytesWrittenThisFile = new AtomicLong();

    private final AtomicInteger totalRecordsWritten = new AtomicInteger();
    private AtomicInteger fileLineCounter = new AtomicInteger();

    private static Map<String, String> objectMetaData = new HashMap<>();

    static {
        objectMetaData.put("line[0]", "id");
        objectMetaData.put("line[1]", "timeStamp");
        objectMetaData.put("line[2]", "metaData");
        objectMetaData.put("line[3]", "document");
    }

    private OutputStreamWriter currentWriter = null;

    public AmazonS3Client getAmazonS3Client() {
        return this.amazonS3Client;
    }

    public S3WriterConfiguration getS3WriterConfiguration() {
        return this.s3WriterConfiguration;
    }

    public List<String> getWrittenFiles() {
        return this.writtenFiles;
    }

    public Map<String, String> getObjectMetaData() {
        return objectMetaData;
    }

    public ObjectMapper getObjectMapper() {
        return this.objectMapper;
    }

    public void setObjectMapper(ObjectMapper mapper) {
        this.objectMapper = mapper;
    }

    public void setObjectMetaData(Map<String, String> val) {
        objectMetaData = val;
    }

    public S3PersistWriter() {
        this(new ComponentConfigurator<>(S3WriterConfiguration.class)
                .detectConfiguration(StreamsConfigurator.getConfig().getConfig("s3")));
    }

    public S3PersistWriter(S3WriterConfiguration s3WriterConfiguration) {
        this.s3WriterConfiguration = s3WriterConfiguration;
    }

    /**
     * Instantiator with a pre-existing amazonS3Client, this is used to help with re-use.
     * @param amazonS3Client
     * If you have an existing amazonS3Client, it wont' bother to create another one
     * @param s3WriterConfiguration
     * Configuration of the write paths and instructions are still required.
     */
    public S3PersistWriter(AmazonS3Client amazonS3Client, S3WriterConfiguration s3WriterConfiguration) {
        this.amazonS3Client = amazonS3Client;
        this.s3WriterConfiguration = s3WriterConfiguration;
    }

    @Override
    public String getId() {
        return STREAMS_ID;
    }

    @Override
    public void write(StreamsDatum streamsDatum) {

        synchronized (this) {
            // Check to see if we need to reset the file that we are currently working with
            if (this.currentWriter == null || (this.bytesWrittenThisFile
                    .get() >= (this.s3WriterConfiguration.getMaxFileSize() * 1024 * 1024))) {
                try {
                    LOGGER.info("Resetting the file");
                    this.currentWriter = resetFile();
                } catch (Exception ex) {
                    ex.printStackTrace();
                }
            }

            String line = lineWriterUtil.convertResultToString(streamsDatum);

            try {
                this.currentWriter.write(line);
            } catch (IOException ex) {
                ex.printStackTrace();
            }

            // add the bytes we've written
            int recordSize = line.getBytes().length;
            this.totalBytesWritten.addAndGet(recordSize);
            this.bytesWrittenThisFile.addAndGet(recordSize);

            // increment the record count
            this.totalRecordsWritten.incrementAndGet();
            this.fileLineCounter.incrementAndGet();
        }

    }

    /**
     * Reset File when it's time to create a new file.
     * @return OutputStreamWriter
     * @throws Exception Exception
     */
    public synchronized OutputStreamWriter resetFile() throws Exception {
        // this will keep it thread safe, so we don't create too many files
        if (this.fileLineCounter.get() == 0 && this.currentWriter != null) {
            return this.currentWriter;
        }

        closeAndDestroyWriter();

        // Create the path for where the file is going to live.
        try {
            // generate a file name
            String fileName = this.s3WriterConfiguration.getWriterFilePrefix()
                    + (this.s3WriterConfiguration.getChunk() ? "/" : "-") + new Date().getTime() + ".tsv";

            // create the output stream
            OutputStream outputStream = new S3OutputStreamWrapper(this.amazonS3Client,
                    this.s3WriterConfiguration.getBucket(), this.s3WriterConfiguration.getWriterPath(), fileName,
                    objectMetaData);

            // reset the counter
            this.fileLineCounter = new AtomicInteger();
            this.bytesWrittenThisFile = new AtomicLong();

            // add this to the list of written files
            writtenFiles.add(this.s3WriterConfiguration.getWriterPath() + fileName);

            // Log that we are creating this file
            LOGGER.info("File Created: Bucket[{}] - {}", this.s3WriterConfiguration.getBucket(),
                    this.s3WriterConfiguration.getWriterPath() + fileName);

            // return the output stream
            return new OutputStreamWriter(outputStream);
        } catch (Exception ex) {
            LOGGER.error(ex.getMessage());
            throw ex;
        }
    }

    private synchronized void closeAndDestroyWriter() {
        // if there is a current writer, we must close it first.
        if (this.currentWriter != null) {
            this.safeFlush(this.currentWriter);
            this.closeSafely(this.currentWriter);
            this.currentWriter = null;

            // Logging of information to alert the user to the activities of this class
            LOGGER.debug("File Closed: Records[{}] Bytes[{}] {} ", this.fileLineCounter.get(),
                    this.bytesWrittenThisFile.get(), this.writtenFiles.get(this.writtenFiles.size() - 1));
        }
    }

    private synchronized void closeSafely(Writer writer) {
        if (writer != null) {
            try {
                writer.flush();
                writer.close();
            } catch (Exception ex) {
                LOGGER.trace("closeSafely", ex);
            }
            LOGGER.debug("File Closed");
        }
    }

    private void safeFlush(Flushable flushable) {
        // This is wrapped with a ByteArrayOutputStream, so this is really safe.
        if (flushable != null) {
            try {
                flushable.flush();
            } catch (IOException ex) {
                LOGGER.trace("safeFlush", ex);
            }
        }
    }

    @Override
    public void prepare(Object configurationObject) {

        lineWriterUtil = LineReadWriteUtil.getInstance(s3WriterConfiguration);

        // Connect to S3
        synchronized (this) {

            try {
                // if the user has chosen to not set the object mapper, then set a default object mapper for them.
                if (this.objectMapper == null) {
                    this.objectMapper = StreamsJacksonMapper.getInstance();
                }

                // Create the credentials Object
                if (this.amazonS3Client == null) {
                    AWSCredentials credentials = new BasicAWSCredentials(s3WriterConfiguration.getKey(),
                            s3WriterConfiguration.getSecretKey());

                    ClientConfiguration clientConfig = new ClientConfiguration();
                    clientConfig.setProtocol(Protocol.valueOf(s3WriterConfiguration.getProtocol().toString()));

                    // We do not want path style access
                    S3ClientOptions clientOptions = new S3ClientOptions();
                    clientOptions.setPathStyleAccess(false);

                    this.amazonS3Client = new AmazonS3Client(credentials, clientConfig);
                    if (StringUtils.isNotEmpty(s3WriterConfiguration.getRegion())) {
                        this.amazonS3Client
                                .setRegion(Region.getRegion(Regions.fromName(s3WriterConfiguration.getRegion())));
                    }
                    this.amazonS3Client.setS3ClientOptions(clientOptions);
                }
            } catch (Exception ex) {
                LOGGER.error("Exception while preparing the S3 client: {}", ex);
            }

            Preconditions.checkArgument(this.amazonS3Client != null);
        }
    }

    public void cleanUp() {
        closeAndDestroyWriter();
    }

    @Override
    public DatumStatusCounter getDatumStatusCounter() {
        DatumStatusCounter counters = new DatumStatusCounter();
        counters.incrementAttempt(this.totalRecordsWritten.get());
        counters.incrementStatus(DatumStatus.SUCCESS, this.totalRecordsWritten.get());
        return counters;
    }
}