com.amazonaws.hbase.kinesis.BatchedStreamSource.java Source code

Java tutorial

Introduction

Here is the source code for com.amazonaws.hbase.kinesis.BatchedStreamSource.java

Source

/*
 * Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 *
 * Licensed under the Amazon Software License (the "License").
 * You may not use this file except in compliance with the License.
 * A copy of the License is located at
 *
 * http://aws.amazon.com/asl/
 *
 * or in the "license" file accompanying this file. This file is distributed
 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */
package com.amazonaws.hbase.kinesis;

import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.ObjectOutputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.amazonaws.services.kinesis.connectors.KinesisConnectorConfiguration;
import com.amazonaws.services.kinesis.model.PutRecordRequest;

/**
 * This class is a data source for supplying input to the Amazon Kinesis stream. It reads lines from the
 * input file specified in the constructor and batches up records before emitting them.
 */
public class BatchedStreamSource extends StreamSource {
    private static Log LOG = LogFactory.getLog(BatchedStreamSource.class);

    private static int NUM_BYTES_PER_PUT_REQUEST = 50000;
    List<KinesisMessageModel> buffer;

    public BatchedStreamSource(KinesisConnectorConfiguration config, String inputFile) {
        this(config, inputFile, false);
    }

    public BatchedStreamSource(KinesisConnectorConfiguration config, String inputFile,
            boolean loopOverStreamSource) {
        super(config, inputFile, loopOverStreamSource);
        buffer = new ArrayList<KinesisMessageModel>();
    }

    @Override
    protected void processInputStream(InputStream inputStream, int iteration) throws IOException {
        try (BufferedReader br = new BufferedReader(new InputStreamReader(inputStream))) {
            String line;
            int lines = 0;

            while ((line = br.readLine()) != null) {
                KinesisMessageModel kinesisMessageModel = objectMapper.readValue(line, KinesisMessageModel.class);
                buffer.add(kinesisMessageModel);
                if (numBytesInBuffer() > NUM_BYTES_PER_PUT_REQUEST) {
                    /*
                     * We need to remove the last record to ensure this data blob is accepted by the Amazon Kinesis
                     * client which restricts the data blob to be less than 50 KB.
                     */
                    KinesisMessageModel lastRecord = buffer.remove(buffer.size() - 1);
                    flushBuffer();
                    /*
                     * We add it back so it will be part of the next batch.
                     */
                    buffer.add(lastRecord);
                }
                lines++;
            }
            if (!buffer.isEmpty()) {
                flushBuffer();
            }

            LOG.info("Added " + lines + " records to stream source.");
        }
    }

    private byte[] bufferToBytes() throws IOException {
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        ObjectOutputStream oos = new ObjectOutputStream(bos);
        oos.writeObject(buffer);
        return bos.toByteArray();
    }

    private int numBytesInBuffer() throws IOException {
        return bufferToBytes().length;
    }

    private void flushBuffer() throws IOException {
        PutRecordRequest putRecordRequest = new PutRecordRequest();
        putRecordRequest.setStreamName(config.KINESIS_INPUT_STREAM);
        putRecordRequest.setData(ByteBuffer.wrap(bufferToBytes()));
        putRecordRequest.setPartitionKey(String.valueOf(UUID.randomUUID()));
        kinesisClient.putRecord(putRecordRequest);
        buffer.clear();
    }
}