Java tutorial
/* * The MIT License (MIT) * * Copyright (C) 2008-2017 VoltDB Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package org.voltdb.exportclient; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.util.LinkedList; import java.util.List; import java.util.Properties; import java.util.Queue; import java.util.TimeZone; import java.util.concurrent.TimeUnit; import org.voltcore.utils.CoreUtils; import org.voltdb.VoltDB; import org.voltdb.common.Constants; import org.voltdb.export.AdvertisedDataSource; import org.voltdb.exportclient.decode.CSVStringDecoder; import com.amazonaws.AmazonServiceException; import com.amazonaws.auth.BasicAWSCredentials; import com.amazonaws.regions.Region; import com.amazonaws.regions.RegionUtils; import com.amazonaws.services.kinesisfirehose.AmazonKinesisFirehoseClient; import com.amazonaws.services.kinesisfirehose.model.DescribeDeliveryStreamRequest; import com.amazonaws.services.kinesisfirehose.model.DescribeDeliveryStreamResult; import com.amazonaws.services.kinesisfirehose.model.InvalidArgumentException; import com.amazonaws.services.kinesisfirehose.model.Record; import com.amazonaws.services.kinesisfirehose.model.ResourceNotFoundException; import com.amazonaws.services.kinesisfirehose.model.ServiceUnavailableException; import com.google_voltpatches.common.base.Throwables; import com.google_voltpatches.common.util.concurrent.ListeningExecutorService; public class KinesisFirehoseExportClient extends ExportClientBase { private static final FirehoseExportLogger LOG = new FirehoseExportLogger(); private Region m_region; private String m_streamName; private String m_accessKey; private String m_secretKey; private TimeZone m_timeZone; private AmazonKinesisFirehoseClient m_firehoseClient; private FirehoseSink m_sink; private String m_recordSeparator; private int m_backOffCap; private int m_backOffBase; private int m_streamLimit; private int m_concurrentWriter; private String m_backOffStrategy; private BackOff m_backOff; private boolean m_batchMode; private int m_batchSize; public static final String ROW_LENGTH_LIMIT = "row.length.limit"; public static final String RECORD_SEPARATOR = "record.separator"; public static final String BACKOFF_CAP = "backoff.cap"; public static final String STREAM_LIMIT = "stream.limit"; public static final String BACKOFF_TYPE = "backoff.type"; public static final String CONCURRENT_WRITER = "concurrent.writers"; public static final String BATCH_MODE = "batch.mode"; public static final String BATCH_SIZE = "batch.size"; public static final int BATCH_NUMBER_LIMIT = 500; public static final int BATCH_SIZE_LIMIT = 4 * 1024 * 1024; @Override public void configure(Properties config) throws Exception { String regionName = config.getProperty("region", "").trim(); if (regionName.isEmpty()) { throw new IllegalArgumentException("KinesisFirehoseExportClient: must provide a region"); } m_region = RegionUtils.getRegion(regionName); m_streamName = config.getProperty("stream.name", "").trim(); if (m_streamName.isEmpty()) { throw new IllegalArgumentException("KinesisFirehoseExportClient: must provide a stream.name"); } m_accessKey = config.getProperty("access.key", "").trim(); if (m_accessKey.isEmpty()) { throw new IllegalArgumentException("KinesisFirehoseExportClient: must provide an access.key"); } m_secretKey = config.getProperty("secret.key", "").trim(); if (m_secretKey.isEmpty()) { throw new IllegalArgumentException("KinesisFirehoseExportClient: must provide a secret.key"); } m_timeZone = TimeZone.getTimeZone(config.getProperty("timezone", VoltDB.REAL_DEFAULT_TIMEZONE.getID())); m_recordSeparator = config.getProperty(RECORD_SEPARATOR, "\n"); config.setProperty(ROW_LENGTH_LIMIT, config.getProperty(ROW_LENGTH_LIMIT, Integer.toString(1024000 - m_recordSeparator.length()))); m_backOffCap = Integer.parseInt(config.getProperty(BACKOFF_CAP, "1000")); // minimal interval between each putRecordsBatch api call; // for small records (row length < 1KB): records/s is the bottleneck // for large records (row length > 1KB): data throughput is the bottleneck // for orignal limit, (5000 records/s divie by 500 records per call = 10 calls) // interval is 1000 ms / 10 = 100 ms m_streamLimit = Integer.parseInt(config.getProperty(STREAM_LIMIT, "5000")); m_backOffBase = Math.max(2, 1000 / (m_streamLimit / BATCH_NUMBER_LIMIT)); // concurrent aws client = number of export table to this stream * number of voltdb partition m_concurrentWriter = Integer.parseInt(config.getProperty(CONCURRENT_WRITER, "0")); m_backOffStrategy = config.getProperty(BACKOFF_TYPE, "equal"); m_firehoseClient = new AmazonKinesisFirehoseClient(new BasicAWSCredentials(m_accessKey, m_secretKey)); m_firehoseClient.setRegion(m_region); m_backOff = BackOffFactory.getBackOff(m_backOffStrategy, m_backOffBase, m_backOffCap); m_sink = new FirehoseSink(m_streamName, m_firehoseClient, m_concurrentWriter, m_backOff); m_batchMode = Boolean.parseBoolean(config.getProperty(BATCH_MODE, "true")); m_batchSize = Math.min(BATCH_NUMBER_LIMIT, Integer.parseInt(config.getProperty(BATCH_SIZE, "200"))); } @Override public ExportDecoderBase constructExportDecoder(AdvertisedDataSource source) { return new KinesisFirehoseExportDecoder(source); } class KinesisFirehoseExportDecoder extends ExportDecoderBase { private final ListeningExecutorService m_es; private final CSVStringDecoder m_decoder; private boolean m_primed = false; private Queue<List<Record>> m_records; private List<Record> currentBatch; private int m_currentBatchSize; @Override public ListeningExecutorService getExecutor() { return m_es; } public KinesisFirehoseExportDecoder(AdvertisedDataSource source) { super(source); CSVStringDecoder.Builder builder = CSVStringDecoder.builder(); builder.dateFormatter(Constants.ODBC_DATE_FORMAT_STRING).timeZone(m_timeZone) .columnNames(source.columnNames).columnTypes(source.columnTypes); m_es = CoreUtils .getListeningSingleThreadExecutor( "Kinesis Firehose Export decoder for partition " + source.partitionId + " table " + source.tableName + " generation " + source.m_generation, CoreUtils.MEDIUM_STACK_SIZE); m_decoder = builder.build(); } private void validateStream() throws RestartBlockException, InterruptedException { DescribeDeliveryStreamRequest describeHoseRequest = new DescribeDeliveryStreamRequest() .withDeliveryStreamName(m_streamName); DescribeDeliveryStreamResult describeHoseResult = null; String status = "UNDEFINED"; describeHoseResult = m_firehoseClient.describeDeliveryStream(describeHoseRequest); status = describeHoseResult.getDeliveryStreamDescription().getDeliveryStreamStatus(); if ("ACTIVE".equalsIgnoreCase(status)) { return; } else if ("CREATING".equalsIgnoreCase(status)) { Thread.sleep(5000); validateStream(); } else { LOG.error("Cannot use stream %s, responded with %s", m_streamName, status); throw new RestartBlockException(true); } } final void checkOnFirstRow() throws RestartBlockException { if (!m_primed) try { validateStream(); } catch (AmazonServiceException | InterruptedException e) { LOG.error("Unable to instantiate a Amazon Kinesis Firehose client", e); throw new RestartBlockException("Unable to instantiate a Amazon Kinesis Firehose client", e, true); } m_primed = true; } @Override public boolean processRow(int rowSize, byte[] rowData) throws RestartBlockException { if (!m_primed) checkOnFirstRow(); Record record = new Record(); try { final ExportRowData rd = decodeRow(rowData); String decoded = m_decoder.decode(null, rd.values) + m_recordSeparator; // add a record separator ; record.withData(ByteBuffer.wrap(decoded.getBytes(StandardCharsets.UTF_8))); } catch (IOException e) { LOG.error("Failed to build record", e); throw new RestartBlockException("Failed to build record", e, true); } if (m_batchMode) { // PutRecordBatchRequest can not contain more than 500 records // And up to a limit of 4 MB for the entire request if ((m_currentBatchSize + rowSize) > BATCH_SIZE_LIMIT || currentBatch.size() >= m_batchSize) { // roll to next batch m_records.add(currentBatch); m_currentBatchSize = 0; currentBatch = new LinkedList<Record>(); } currentBatch.add(record); m_currentBatchSize += rowSize; } else { try { m_sink.writeRow(record); } catch (FirehoseExportException e) { throw new RestartBlockException("firehose write fault", e, true); } catch (ResourceNotFoundException | InvalidArgumentException | ServiceUnavailableException e) { LOG.error("Failed to send record batch", e); throw new RestartBlockException("Failed to send record batch", e, true); } } return true; } @Override public void sourceNoLongerAdvertised(AdvertisedDataSource source) { if (m_sink != null) { m_sink.shutDown(); } if (m_firehoseClient != null) m_firehoseClient.shutdown(); m_es.shutdown(); try { m_es.awaitTermination(365, TimeUnit.DAYS); } catch (InterruptedException e) { Throwables.propagate(e); } } @Override public void onBlockStart() throws RestartBlockException { if (!m_primed) checkOnFirstRow(); m_records = new LinkedList<List<Record>>(); m_currentBatchSize = 0; currentBatch = new LinkedList<Record>(); } @Override public void onBlockCompletion() throws RestartBlockException { if (m_batchMode) { // add last batch if (!currentBatch.isEmpty()) { // roll to next batch m_records.add(currentBatch); m_currentBatchSize = 0; currentBatch = new LinkedList<Record>(); } try { if (m_concurrentWriter > 0) { m_sink.write(m_records); } else { m_sink.syncWrite(m_records); } } catch (FirehoseExportException e) { throw new RestartBlockException("firehose write fault", e, true); } catch (ResourceNotFoundException | InvalidArgumentException | ServiceUnavailableException e) { LOG.error("Failed to send record batch", e); throw new RestartBlockException("Failed to send record batch", e, true); } } } } }