Java tutorial
/* * Copyright 2013 Daniel Selman * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.selman.tweetamo; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import twitter4j.Status; import com.amazonaws.services.dynamodbv2.model.AttributeValue; import com.amazonaws.services.dynamodbv2.model.ScanResult; import com.amazonaws.services.kinesis.clientlibrary.exceptions.InvalidStateException; import com.amazonaws.services.kinesis.clientlibrary.exceptions.ShutdownException; import com.amazonaws.services.kinesis.clientlibrary.exceptions.ThrottlingException; import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor; import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorCheckpointer; import com.amazonaws.services.kinesis.clientlibrary.types.ShutdownReason; import com.amazonaws.services.kinesis.model.Record; /** * */ public class TweetamoRecordProcessor implements IRecordProcessor { private static final Log LOG = LogFactory.getLog(TweetamoRecordProcessor.class); private String kinesisShardId; // Backoff and retry settings private static final long BACKOFF_TIME_IN_MILLIS = 3000L; private static final int NUM_RETRIES = 10; // Checkpoint about once a minute private static final long CHECKPOINT_INTERVAL_MILLIS = 60000L; private long nextCheckpointTimeInMillis; private PersistentStore persistentStore; /** * Constructor. * @param persistentStore */ public TweetamoRecordProcessor() { super(); this.persistentStore = PersistentStore.getInstance(); } /** * {@inheritDoc} */ @Override public void initialize(String shardId) { LOG.info("Initializing record processor for shard: " + shardId); this.kinesisShardId = shardId; } /** * {@inheritDoc} */ @Override public void processRecords(List<Record> records, IRecordProcessorCheckpointer checkpointer) { LOG.info("Processing " + records.size() + " records for kinesisShardId " + kinesisShardId); // Process records and perform all exception handling. processRecordsWithRetries(records); // Checkpoint once every checkpoint interval. if (System.currentTimeMillis() > nextCheckpointTimeInMillis) { checkpoint(checkpointer); nextCheckpointTimeInMillis = System.currentTimeMillis() + CHECKPOINT_INTERVAL_MILLIS; // get the last minutes tweets, max of 30 long start = System.currentTimeMillis() - TimeUnit.MINUTES.toMillis(30); try { if (persistentStore != null) { ScanResult scanResult = persistentStore.getSince(start, 30); if (scanResult != null) { System.out .println("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"); System.out.println("Last 30 mins of Tweets, max of 30"); System.out .println("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"); for (Map<String, AttributeValue> item : scanResult.getItems()) { printItem(item); } System.out .println("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"); } } } catch (Exception e) { LOG.error("Error retrieving tweets.", e); } } } /** Process records performing retries as needed. Skip "poison pill" records. * @param records */ private void processRecordsWithRetries(List<Record> records) { for (Record record : records) { boolean processedSuccessfully = false; for (int i = 0; i < NUM_RETRIES; i++) { try { Status status = TweetSerializer.fromBytes(record.getData()); if (persistentStore != null) { persistentStore.add(status); } processedSuccessfully = true; break; } catch (Throwable t) { LOG.warn("Caught throwable while processing record " + record, t); } // backoff if we encounter an exception. try { Thread.sleep(BACKOFF_TIME_IN_MILLIS); } catch (InterruptedException e) { LOG.debug("Interrupted sleep", e); } } if (!processedSuccessfully) { LOG.error("Couldn't process record " + record + ". Skipping the record."); } } } private static void printItem(Map<String, AttributeValue> attributeList) { for (Map.Entry<String, AttributeValue> item : attributeList.entrySet()) { String attributeName = item.getKey(); AttributeValue value = item.getValue(); System.out.println(attributeName + " " + (value.getS() == null ? "" : "S=[" + value.getS() + "]") + (value.getN() == null ? "" : "N=[" + value.getN() + "]") + (value.getB() == null ? "" : "B=[" + value.getB() + "]") + (value.getSS() == null ? "" : "SS=[" + value.getSS() + "]") + (value.getNS() == null ? "" : "NS=[" + value.getNS() + "]") + (value.getBS() == null ? "" : "BS=[" + value.getBS() + "] \n")); } } /** * {@inheritDoc} */ @Override public void shutdown(IRecordProcessorCheckpointer checkpointer, ShutdownReason reason) { LOG.info("Shutting down record processor for shard: " + kinesisShardId); // Important to checkpoint after reaching end of shard, so we can start processing data from child shards. if (reason == ShutdownReason.TERMINATE) { checkpoint(checkpointer); } } /** Checkpoint with retries. * @param checkpointer */ private void checkpoint(IRecordProcessorCheckpointer checkpointer) { LOG.info("Checkpointing shard " + kinesisShardId); for (int i = 0; i < NUM_RETRIES; i++) { try { checkpointer.checkpoint(); break; } catch (ShutdownException se) { // Ignore checkpoint if the processor instance has been shutdown (fail over). LOG.info("Caught shutdown exception, skipping checkpoint.", se); break; } catch (ThrottlingException e) { // Backoff and re-attempt checkpoint upon transient failures if (i >= (NUM_RETRIES - 1)) { LOG.error("Checkpoint failed after " + (i + 1) + "attempts.", e); break; } else { LOG.info("Transient issue when checkpointing - attempt " + (i + 1) + " of " + NUM_RETRIES, e); } } catch (InvalidStateException e) { // This indicates an issue with the DynamoDB table (check for table, provisioned IOPS). LOG.error("Cannot save checkpoint to the DynamoDB table used by the KinesisClientLibrary.", e); break; } try { Thread.sleep(BACKOFF_TIME_IN_MILLIS); } catch (InterruptedException e) { LOG.debug("Interrupted sleep", e); } } } }