Java tutorial
/* * Copyright 2014 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.tigon.data.transaction.queue; import co.cask.tephra.Transaction; import co.cask.tephra.TransactionAware; import co.cask.tigon.data.queue.ConsumerConfig; import co.cask.tigon.data.queue.DequeueResult; import co.cask.tigon.data.queue.DequeueStrategy; import co.cask.tigon.data.queue.QueueConsumer; import co.cask.tigon.data.queue.QueueName; import co.cask.tigon.utils.ImmutablePair; import com.google.common.base.Function; import com.google.common.base.Objects; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterators; import com.google.common.collect.Maps; import com.google.common.primitives.Ints; import com.google.common.primitives.Longs; import org.apache.hadoop.hbase.util.Bytes; import java.io.Closeable; import java.io.IOException; import java.util.Arrays; import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.NavigableMap; import java.util.Set; import java.util.SortedMap; /** * Common queue consumer for persisting engines such as HBase and LevelDB. */ public abstract class AbstractQueueConsumer implements QueueConsumer, TransactionAware, Closeable { private static final DequeueResult<byte[]> EMPTY_RESULT = DequeueResult.Empty.result(); // TODO: Make these configurable. // Minimum number of rows to fetch per scan. private static final int MIN_FETCH_ROWS = 100; // Multiple of batches to fetch per scan. // Number of rows to scan = max(MIN_FETCH_ROWS, dequeueBatchSize * groupSize * PREFETCH_BATCHES) private static final int PREFETCH_BATCHES = 10; private static final Function<SimpleQueueEntry, byte[]> ENTRY_TO_BYTE_ARRAY = new Function<SimpleQueueEntry, byte[]>() { @Override public byte[] apply(SimpleQueueEntry input) { return input.getData(); } }; private final ConsumerConfig consumerConfig; private final QueueName queueName; private final SortedMap<byte[], SimpleQueueEntry> entryCache; private final NavigableMap<byte[], SimpleQueueEntry> consumingEntries; protected final byte[] stateColumnName; private final byte[] queueRowPrefix; protected byte[] startRow; private byte[] scanStartRow; protected Transaction transaction; private boolean committed; protected int commitCount; protected abstract boolean claimEntry(byte[] rowKey, byte[] stateContent) throws IOException; protected abstract void updateState(Set<byte[]> rowKeys, byte[] stateColumnName, byte[] stateContent) throws IOException; protected abstract void undoState(Set<byte[]> rowKeys, byte[] stateColumnName) throws IOException, InterruptedException; protected abstract QueueScanner getScanner(byte[] startRow, byte[] stopRow, int numRows) throws IOException; protected AbstractQueueConsumer(ConsumerConfig consumerConfig, QueueName queueName) { this.consumerConfig = consumerConfig; this.queueName = queueName; this.entryCache = Maps.newTreeMap(Bytes.BYTES_COMPARATOR); this.consumingEntries = Maps.newTreeMap(Bytes.BYTES_COMPARATOR); this.queueRowPrefix = QueueEntryRow.getQueueRowPrefix(queueName); this.startRow = getRowKey(0L, 0); this.stateColumnName = Bytes.add(QueueEntryRow.STATE_COLUMN_PREFIX, Bytes.toBytes(consumerConfig.getGroupId())); } @Override public QueueName getQueueName() { return queueName; } @Override public ConsumerConfig getConfig() { return consumerConfig; } @Override public DequeueResult<byte[]> dequeue() throws IOException { return dequeue(1); } @Override public DequeueResult<byte[]> dequeue(int maxBatchSize) throws IOException { Preconditions.checkArgument(maxBatchSize > 0, "Batch size must be > 0."); // pre-compute the "claimed" state content in case of FIFO. byte[] claimedStateValue = null; if (consumerConfig.getDequeueStrategy() == DequeueStrategy.FIFO && consumerConfig.getGroupSize() > 1) { claimedStateValue = encodeStateColumn(ConsumerEntryState.CLAIMED); } while (consumingEntries.size() < maxBatchSize && getEntries(consumingEntries, maxBatchSize)) { // ANDREAS: this while loop should stop once getEntries/populateCache reaches the end of the queue. Currently, it // will retry as long as it gets at least one entry in every round, even if that is an entry that must be ignored // because it cannot be claimed. // ANDREAS: It could be a problem that we always read to the end of the queue. This way one flowlet instance may // always all entries, while others are idle. // For FIFO, need to try claiming the entry if group size > 1 if (consumerConfig.getDequeueStrategy() == DequeueStrategy.FIFO && consumerConfig.getGroupSize() > 1) { Iterator<Map.Entry<byte[], SimpleQueueEntry>> iterator = consumingEntries.entrySet().iterator(); while (iterator.hasNext()) { SimpleQueueEntry entry = iterator.next().getValue(); if (entry.getState() == null || QueueEntryRow .getStateInstanceId(entry.getState()) >= consumerConfig.getGroupSize()) { // If not able to claim it, remove it, and move to next one. if (!claimEntry(entry.getRowKey(), claimedStateValue)) { iterator.remove(); } } } } } // If nothing get dequeued, return the empty result. if (consumingEntries.isEmpty()) { return EMPTY_RESULT; } return new SimpleDequeueResult(consumingEntries.values()); } @Override public void startTx(Transaction tx) { consumingEntries.clear(); this.transaction = tx; this.committed = false; } @Override public Collection<byte[]> getTxChanges() { // No conflicts guaranteed in dequeue logic. return ImmutableList.of(); } @Override public boolean commitTx() throws Exception { if (consumingEntries.isEmpty()) { return true; } byte[] stateContent = encodeStateColumn(ConsumerEntryState.PROCESSED); updateState(consumingEntries.keySet(), stateColumnName, stateContent); commitCount += consumingEntries.size(); committed = true; return true; } @Override public void postTxCommit() { if (scanStartRow != null) { if (!consumingEntries.isEmpty()) { // Start row can be updated to the largest rowKey in the consumingEntries (now is consumed) // that is smaller than or equal to scanStartRow byte[] floorKey = consumingEntries.floorKey(scanStartRow); if (floorKey != null) { startRow = floorKey; } } else { // If the dequeue has empty result, startRow can advance to scanStartRow startRow = Arrays.copyOf(scanStartRow, scanStartRow.length); } } } @Override public boolean rollbackTx() throws Exception { if (consumingEntries.isEmpty()) { return true; } // Put the consuming entries back to cache entryCache.putAll(consumingEntries); // If not committed, no need to update HBase. if (!committed) { return true; } commitCount -= consumingEntries.size(); // Revert changes in HBase rows // If it is FIFO, restore to the CLAIMED state. This instance will retry it on the next dequeue. if (consumerConfig.getDequeueStrategy() == DequeueStrategy.FIFO && consumerConfig.getGroupSize() > 1) { byte[] stateContent = encodeStateColumn(ConsumerEntryState.CLAIMED); updateState(consumingEntries.keySet(), stateColumnName, stateContent); } else { undoState(consumingEntries.keySet(), stateColumnName); } return true; } /** * Try to dequeue (claim) entries up to a maximum size. * @param entries For claimed entries to fill in. * @param maxBatchSize Maximum number of entries to claim. * @return The entries instance. * @throws java.io.IOException */ private boolean getEntries(SortedMap<byte[], SimpleQueueEntry> entries, int maxBatchSize) throws IOException { boolean hasEntry = fetchFromCache(entries, maxBatchSize); // If not enough entries from the cache, try to get more. // ANDREAS: I think this is wrong. If the batch=10, and the cache has 5 entries, but populateCache cannot // fetch more entries, then we have 5 and should return true. But this code will return false. // TERENCE: If there are 5 entries in the cache, the first call to fetchFromCache will return true, // the second call to fetchFromCache from call to populateCache will return false, but // hasEntry = false || true => true, hence returning true. if (entries.size() < maxBatchSize) { populateRowCache(entries.keySet(), maxBatchSize); hasEntry = fetchFromCache(entries, maxBatchSize) || hasEntry; } return hasEntry; } private boolean fetchFromCache(SortedMap<byte[], SimpleQueueEntry> entries, int maxBatchSize) { if (entryCache.isEmpty()) { return false; } Iterator<Map.Entry<byte[], SimpleQueueEntry>> iterator = entryCache.entrySet().iterator(); while (entries.size() < maxBatchSize && iterator.hasNext()) { Map.Entry<byte[], SimpleQueueEntry> entry = iterator.next(); entries.put(entry.getKey(), entry.getValue()); iterator.remove(); } return true; } private void populateRowCache(Set<byte[]> excludeRows, int maxBatchSize) throws IOException { long readPointer = transaction.getReadPointer(); // Scan the table for queue entries. int numRows = Math.max(MIN_FETCH_ROWS, maxBatchSize * PREFETCH_BATCHES); if (scanStartRow == null) { scanStartRow = Arrays.copyOf(startRow, startRow.length); } QueueScanner scanner = getScanner(scanStartRow, QueueEntryRow.getStopRowForTransaction(queueRowPrefix, transaction), numRows); try { // Try fill up the cache boolean firstScannedRow = true; while (entryCache.size() < numRows) { ImmutablePair<byte[], Map<byte[], byte[]>> entry = scanner.next(); if (entry == null) { // No more result, breaking out. break; } byte[] rowKey = entry.getFirst(); if (excludeRows.contains(rowKey)) { continue; } // Row key is queue_name + writePointer + counter long writePointer = Bytes.toLong(rowKey, queueRowPrefix.length, Longs.BYTES); // If it is first row returned by the scanner and was written before the earliest in progress, // it's safe to advance scanStartRow to current row because nothing can be written before this row. if (firstScannedRow && writePointer < transaction.getFirstInProgress()) { firstScannedRow = false; scanStartRow = Arrays.copyOf(rowKey, rowKey.length); } // If writes later than the reader pointer, abort the loop, as entries that comes later are all uncommitted. // this is probably not needed due to the limit of the scan to the stop row, but to be safe... if (writePointer > readPointer) { break; } // If the write is in the excluded list, ignore it. if (transaction.isExcluded(writePointer)) { continue; } // Based on the strategy to determine if include the given entry or not. byte[] dataBytes = entry.getSecond().get(QueueEntryRow.DATA_COLUMN); byte[] metaBytes = entry.getSecond().get(QueueEntryRow.META_COLUMN); if (dataBytes == null || metaBytes == null) { continue; } byte[] stateBytes = entry.getSecond().get(stateColumnName); int counter = Bytes.toInt(rowKey, rowKey.length - 4, Ints.BYTES); if (!shouldInclude(writePointer, counter, metaBytes, stateBytes)) { continue; } entryCache.put(rowKey, new SimpleQueueEntry(rowKey, dataBytes, stateBytes)); } } finally { scanner.close(); } } private byte[] encodeStateColumn(ConsumerEntryState state) { // State column content is encoded as (writePointer) + (instanceId) + (state) byte[] stateContent = new byte[Longs.BYTES + Ints.BYTES + 1]; Bytes.putLong(stateContent, 0, transaction.getWritePointer()); Bytes.putInt(stateContent, Longs.BYTES, consumerConfig.getInstanceId()); Bytes.putByte(stateContent, Longs.BYTES + Ints.BYTES, state.getState()); return stateContent; } private boolean shouldInclude(long enqueueWritePointer, int counter, byte[] metaValue, byte[] stateValue) throws IOException { QueueEntryRow.CanConsume canConsume = QueueEntryRow.canConsume(consumerConfig, transaction, enqueueWritePointer, counter, metaValue, stateValue); if (QueueEntryRow.CanConsume.NO_INCLUDING_ALL_OLDER == canConsume) { scanStartRow = getNextRow(scanStartRow, enqueueWritePointer, counter); return false; } return QueueEntryRow.CanConsume.YES == canConsume; } /** * Creates a new byte[] that gives the entry row key for the given enqueue transaction and counter. */ private byte[] getRowKey(long writePointer, int count) { byte[] row = Arrays.copyOf(queueRowPrefix, queueRowPrefix.length + Longs.BYTES + Ints.BYTES); Bytes.putLong(row, queueRowPrefix.length, writePointer); Bytes.putInt(row, queueRowPrefix.length + Longs.BYTES, count); return row; } /** * Get the next row based on the given write pointer and counter. It modifies the given row byte[] in place * and returns it. */ private byte[] getNextRow(byte[] row, long writePointer, int count) { Bytes.putLong(row, queueRowPrefix.length, writePointer); Bytes.putInt(row, queueRowPrefix.length + Longs.BYTES, count + 1); return row; } @Override public String getTransactionAwareName() { return getClass().getSimpleName() + "(queue = " + queueName + ")"; } /** * Implementation of dequeue result. */ private final class SimpleDequeueResult implements DequeueResult<byte[]> { private final List<SimpleQueueEntry> entries; private SimpleDequeueResult(Iterable<SimpleQueueEntry> entries) { this.entries = ImmutableList.copyOf(entries); } @Override public boolean isEmpty() { return entries.isEmpty(); } @Override public void reclaim() { // Simply put all entries into consumingEntries and clear those up from the entry cache as well. for (SimpleQueueEntry entry : entries) { consumingEntries.put(entry.getRowKey(), entry); entryCache.remove(entry.getRowKey()); } } @Override public int size() { return entries.size(); } @Override public Iterator<byte[]> iterator() { if (isEmpty()) { return Iterators.emptyIterator(); } return Iterators.transform(entries.iterator(), ENTRY_TO_BYTE_ARRAY); } @Override public String toString() { return Objects.toStringHelper(this).add("size", entries.size()).add("queue", queueName) .add("config", consumerConfig).toString(); } } }