Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.drill.exec.physical.impl.common; import java.util.ArrayList; import java.util.Iterator; import java.util.Set; import javax.inject.Named; import org.apache.drill.exec.expr.ClassGenerator; import org.apache.drill.exec.ops.FragmentContext; import org.apache.drill.shaded.guava.com.google.common.collect.Sets; import org.apache.commons.lang3.tuple.Pair; import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.common.types.Types; import org.apache.drill.exec.compile.sig.RuntimeOverridden; import org.apache.drill.exec.exception.OutOfMemoryException; import org.apache.drill.exec.exception.SchemaChangeException; import org.apache.drill.exec.expr.TypeHelper; import org.apache.drill.exec.memory.AllocationManager; import org.apache.drill.exec.memory.BufferAllocator; import org.apache.drill.exec.physical.impl.join.HashJoinMemoryCalculator; import org.apache.drill.exec.record.MaterializedField; import org.apache.drill.exec.record.RecordBatch; import org.apache.drill.exec.record.RecordBatchSizer; import org.apache.drill.exec.record.TransferPair; import org.apache.drill.exec.record.VectorContainer; import org.apache.drill.exec.record.VectorWrapper; import org.apache.drill.exec.vector.BigIntVector; import org.apache.drill.exec.vector.FixedWidthVector; import org.apache.drill.exec.vector.IntVector; import org.apache.drill.exec.vector.ValueVector; import org.apache.drill.common.exceptions.RetryAfterSpillException; import org.apache.drill.exec.vector.VariableWidthVector; public abstract class HashTableTemplate implements HashTable { public static final int MAX_VARCHAR_SIZE = 8; // This is a bad heuristic which will be eliminated when the keys are removed from the HashTable. private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(HashTable.class); private static final boolean EXTRA_DEBUG = false; private static final int EMPTY_SLOT = -1; // A hash 'bucket' consists of the start index to indicate start of a hash chain // Array of start indexes. start index is a global index across all batch holders // This is the "classic hash table", where Hash-Value % size-of-table yields // the offset/position (in the startIndices) of the beginning of the hash chain. private IntVector startIndices; // Array of batch holders..each batch holder can hold up to BATCH_SIZE entries private ArrayList<BatchHolder> batchHolders; private int totalIndexSize; // index size of all batchHolders including current batch private int prevIndexSize; // index size of all batchHolders not including current batch private int currentIndexSize; // prevIndexSize + current batch count. // Current size of the hash table in terms of number of buckets private int tableSize = 0; // Original size of the hash table (needed when re-initializing) private int originalTableSize; // Threshold after which we rehash; It must be the tableSize * loadFactor private int threshold; // Actual number of entries in the hash table private int numEntries = 0; // current available (free) slot globally across all batch holders private int freeIndex = 0; // Placeholder for the current index while probing the hash table private IndexPointer currentIdxHolder; private BufferAllocator allocator; // The incoming build side record batch private VectorContainer incomingBuild; // The incoming probe side record batch (may be null) private RecordBatch incomingProbe; // The outgoing record batch private RecordBatch outgoing; // Hash table configuration parameters private HashTableConfig htConfig; // Allocation tracker private HashTableAllocationTracker allocationTracker; // The original container from which others may be cloned private VectorContainer htContainerOrig; private MaterializedField dummyIntField; protected FragmentContext context; protected ClassGenerator<?> cg; private int numResizing = 0; private int resizingTime = 0; private Iterator<BatchHolder> htIter = null; // This class encapsulates the links, keys and values for up to BATCH_SIZE // *unique* records. Thus, suppose there are N incoming record batches, each // of size BATCH_SIZE..but they have M unique keys altogether, the number of // BatchHolders will be (M/BATCH_SIZE) + 1 public class BatchHolder { // Container of vectors to hold type-specific keys private VectorContainer htContainer; // Array of 'link' values private IntVector links; // Array of hash values - this is useful when resizing the hash table private IntVector hashValues; private int maxOccupiedIdx = -1; private int targetBatchRowCount; private int batchIndex = 0; public void setTargetBatchRowCount(int targetBatchRowCount) { this.targetBatchRowCount = targetBatchRowCount; } public int getTargetBatchRowCount() { return targetBatchRowCount; } public BatchHolder(int idx, int newBatchHolderSize) { this.batchIndex = idx; this.targetBatchRowCount = newBatchHolderSize; htContainer = new VectorContainer(); boolean success = false; try { for (VectorWrapper<?> w : htContainerOrig) { ValueVector vv = TypeHelper.getNewVector(w.getField(), allocator); htContainer.add(vv); // add to container before actual allocation (to allow clearing in case of an OOM) // Capacity for "hashValues" and "links" vectors is newBatchHolderSize records. It is better to allocate space for // "key" vectors to store as close to as newBatchHolderSize records. A new BatchHolder is created when either newBatchHolderSize // records are inserted or "key" vectors ran out of space. Allocating too less space for "key" vectors will // result in unused space in "hashValues" and "links" vectors in the BatchHolder. Also for each new // BatchHolder we create a SV4 vector of newBatchHolderSize in HashJoinHelper. if (vv instanceof FixedWidthVector) { ((FixedWidthVector) vv).allocateNew(newBatchHolderSize); } else if (vv instanceof VariableWidthVector) { long beforeMem = allocator.getAllocatedMemory(); ((VariableWidthVector) vv).allocateNew(MAX_VARCHAR_SIZE * newBatchHolderSize, newBatchHolderSize); logger.trace("HT allocated {} for varchar of max width {}", allocator.getAllocatedMemory() - beforeMem, MAX_VARCHAR_SIZE); } else { vv.allocateNew(); } } links = allocMetadataVector(newBatchHolderSize, EMPTY_SLOT); hashValues = allocMetadataVector(newBatchHolderSize, 0); success = true; } finally { if (!success) { htContainer.clear(); if (links != null) { links.clear(); } } } } private void init(IntVector links, IntVector hashValues, int size) { for (int i = 0; i < size; i++) { links.getMutator().set(i, EMPTY_SLOT); } for (int i = 0; i < size; i++) { hashValues.getMutator().set(i, 0); } links.getMutator().setValueCount(size); hashValues.getMutator().setValueCount(size); } protected void setup() throws SchemaChangeException { setupInterior(incomingBuild, incomingProbe, outgoing, htContainer); } // Check if the key at the currentIdx position in hash table matches the key // at the incomingRowIdx. if the key does not match, update the // currentIdxHolder with the index of the next link. private boolean isKeyMatch(int incomingRowIdx, IndexPointer currentIdxHolder, boolean isProbe) throws SchemaChangeException { int currentIdxWithinBatch = currentIdxHolder.value & BATCH_MASK; boolean match; if (currentIdxWithinBatch >= batchHolders.get((currentIdxHolder.value >>> 16) & BATCH_MASK) .getTargetBatchRowCount()) { logger.debug("Batch size = {}, incomingRowIdx = {}, currentIdxWithinBatch = {}.", batchHolders.get((currentIdxHolder.value >>> 16) & BATCH_MASK).getTargetBatchRowCount(), incomingRowIdx, currentIdxWithinBatch); } assert (currentIdxWithinBatch < batchHolders.get((currentIdxHolder.value >>> 16) & BATCH_MASK) .getTargetBatchRowCount()); assert (incomingRowIdx < HashTable.BATCH_SIZE); if (isProbe) { match = isKeyMatchInternalProbe(incomingRowIdx, currentIdxWithinBatch); } else { match = isKeyMatchInternalBuild(incomingRowIdx, currentIdxWithinBatch); } if (!match) { currentIdxHolder.value = links.getAccessor().get(currentIdxWithinBatch); } return match; } // Insert a new <key1, key2...keyN> entry coming from the incoming batch into the hash table // container at the specified index private void insertEntry(int incomingRowIdx, int currentIdx, int hashValue, BatchHolder lastEntryBatch, int lastEntryIdxWithinBatch) throws SchemaChangeException { int currentIdxWithinBatch = currentIdx & BATCH_MASK; setValue(incomingRowIdx, currentIdxWithinBatch); // setValue may OOM when doubling of one of the VarChar Key Value Vectors // This would be caught and retried later (setValue() is idempotent) // the previous entry in this hash chain should now point to the entry in this currentIdx if (lastEntryBatch != null) { lastEntryBatch.updateLinks(lastEntryIdxWithinBatch, currentIdx); } // since this is the last entry in the hash chain, the links array at position currentIdx // will point to a null (empty) slot links.getMutator().set(currentIdxWithinBatch, EMPTY_SLOT); hashValues.getMutator().set(currentIdxWithinBatch, hashValue); maxOccupiedIdx = Math.max(maxOccupiedIdx, currentIdxWithinBatch); if (EXTRA_DEBUG) { logger.debug("BatchHolder: inserted key at incomingRowIdx = {}, currentIdx = {}, hash value = {}.", incomingRowIdx, currentIdx, hashValue); } } private void updateLinks(int lastEntryIdxWithinBatch, int currentIdx) { links.getMutator().set(lastEntryIdxWithinBatch, currentIdx); } private void rehash(int numbuckets, IntVector newStartIndices, int batchStartIdx) { logger.debug( "Rehashing entries within the batch: {}; batchStartIdx = {}, total numBuckets in hash table = {}.", batchIndex, batchStartIdx, numbuckets); int size = links.getAccessor().getValueCount(); IntVector newLinks = allocMetadataVector(size, EMPTY_SLOT); IntVector newHashValues = allocMetadataVector(size, 0); for (int i = 0; i <= maxOccupiedIdx; i++) { int entryIdxWithinBatch = i; int entryIdx = entryIdxWithinBatch + batchStartIdx; int hash = hashValues.getAccessor().get(entryIdxWithinBatch); // get the already saved hash value int bucketIdx = getBucketIndex(hash, numbuckets); int newStartIdx = newStartIndices.getAccessor().get(bucketIdx); if (newStartIdx == EMPTY_SLOT) { // new bucket was empty newStartIndices.getMutator().set(bucketIdx, entryIdx); // update the start index to point to entry newLinks.getMutator().set(entryIdxWithinBatch, EMPTY_SLOT); newHashValues.getMutator().set(entryIdxWithinBatch, hash); if (EXTRA_DEBUG) { logger.debug( "New bucket was empty. bucketIdx = {}, newStartIndices[ {} ] = {}, newLinks[ {} ] = {}, " + "hash value = {}.", bucketIdx, bucketIdx, newStartIndices.getAccessor().get(bucketIdx), entryIdxWithinBatch, newLinks.getAccessor().get(entryIdxWithinBatch), newHashValues.getAccessor().get(entryIdxWithinBatch)); } } else { // follow the new table's hash chain until we encounter empty slot. Note that the hash chain could // traverse multiple batch holders, so make sure we are accessing the right batch holder. int idx = newStartIdx; int idxWithinBatch = 0; BatchHolder bh = this; while (true) { if (idx != EMPTY_SLOT) { idxWithinBatch = idx & BATCH_MASK; bh = batchHolders.get((idx >>> 16) & BATCH_MASK); } if (bh == this && newLinks.getAccessor().get(idxWithinBatch) == EMPTY_SLOT) { newLinks.getMutator().set(idxWithinBatch, entryIdx); newLinks.getMutator().set(entryIdxWithinBatch, EMPTY_SLOT); newHashValues.getMutator().set(entryIdxWithinBatch, hash); if (EXTRA_DEBUG) { logger.debug( "Followed hash chain in new bucket. bucketIdx = {}, newLinks[ {} ] = {}, " + "newLinks[ {} ] = {}, hash value = {}.", bucketIdx, idxWithinBatch, newLinks.getAccessor().get(idxWithinBatch), entryIdxWithinBatch, newLinks.getAccessor().get(entryIdxWithinBatch), newHashValues.getAccessor().get(entryIdxWithinBatch)); } break; } else if (bh != this && bh.links.getAccessor().get(idxWithinBatch) == EMPTY_SLOT) { bh.links.getMutator().set(idxWithinBatch, entryIdx); // update the link in the other batch newLinks.getMutator().set(entryIdxWithinBatch, EMPTY_SLOT); // update the newLink entry in this // batch to mark end of the hash chain newHashValues.getMutator().set(entryIdxWithinBatch, hash); if (EXTRA_DEBUG) { logger.debug( "Followed hash chain in new bucket. bucketIdx = {}, newLinks[ {} ] = {}, " + "newLinks[ {} ] = {}, hash value = {}.", bucketIdx, idxWithinBatch, newLinks.getAccessor().get(idxWithinBatch), entryIdxWithinBatch, newLinks.getAccessor().get(entryIdxWithinBatch), newHashValues.getAccessor().get(entryIdxWithinBatch)); } break; } if (bh == this) { idx = newLinks.getAccessor().get(idxWithinBatch); } else { idx = bh.links.getAccessor().get(idxWithinBatch); } } } } links.clear(); hashValues.clear(); links = newLinks; hashValues = newHashValues; } private boolean outputKeys(VectorContainer outContainer, int numRecords) { // set the value count for htContainer's value vectors before the transfer .. setValueCount(); Iterator<VectorWrapper<?>> outgoingIter = outContainer.iterator(); for (VectorWrapper<?> sourceWrapper : htContainer) { @SuppressWarnings("resource") ValueVector sourceVV = sourceWrapper.getValueVector(); @SuppressWarnings("resource") ValueVector targetVV = outgoingIter.next().getValueVector(); TransferPair tp = sourceVV.makeTransferPair(targetVV); // The normal case: The whole column key(s) are transfered as is tp.transfer(); } return true; } private void setValueCount() { for (VectorWrapper<?> vw : htContainer) { @SuppressWarnings("resource") ValueVector vv = vw.getValueVector(); vv.getMutator().setValueCount(maxOccupiedIdx + 1); } htContainer.setRecordCount(maxOccupiedIdx + 1); } private void dump(int idx) { while (true) { int idxWithinBatch = idx & BATCH_MASK; if (idxWithinBatch == EMPTY_SLOT) { break; } else { logger.debug("links[ {} ] = {}, hashValues[ {} ] = {}.", idxWithinBatch, links.getAccessor().get(idxWithinBatch), idxWithinBatch, hashValues.getAccessor().get(idxWithinBatch)); idx = links.getAccessor().get(idxWithinBatch); } } } private void clear() { htContainer.clear(); if (links != null) { links.clear(); } if (hashValues != null) { hashValues.clear(); } } // Only used for internal debugging. Get the value vector at a particular index from the htContainer. // By default this assumes the VV is a BigIntVector. private ValueVector getValueVector(int index) { Object tmp = (htContainer).getValueAccessorById(BigIntVector.class, index).getValueVector(); if (tmp != null) { BigIntVector vv0 = ((BigIntVector) tmp); return vv0; } return null; } // These methods will be code-generated @RuntimeOverridden protected void setupInterior(@Named("incomingBuild") VectorContainer incomingBuild, @Named("incomingProbe") RecordBatch incomingProbe, @Named("outgoing") RecordBatch outgoing, @Named("htContainer") VectorContainer htContainer) throws SchemaChangeException { } @RuntimeOverridden protected boolean isKeyMatchInternalBuild(@Named("incomingRowIdx") int incomingRowIdx, @Named("htRowIdx") int htRowIdx) throws SchemaChangeException { return false; } @RuntimeOverridden protected boolean isKeyMatchInternalProbe(@Named("incomingRowIdx") int incomingRowIdx, @Named("htRowIdx") int htRowIdx) throws SchemaChangeException { return false; } @RuntimeOverridden protected void setValue(@Named("incomingRowIdx") int incomingRowIdx, @Named("htRowIdx") int htRowIdx) throws SchemaChangeException { } @RuntimeOverridden protected void outputRecordKeys(@Named("htRowIdx") int htRowIdx, @Named("outRowIdx") int outRowIdx) throws SchemaChangeException { } public long getActualSize() { Set<AllocationManager.BufferLedger> ledgers = Sets.newHashSet(); links.collectLedgers(ledgers); hashValues.collectLedgers(ledgers); long size = 0L; for (AllocationManager.BufferLedger ledger : ledgers) { size += ledger.getAccountedSize(); } size += new RecordBatchSizer(htContainer).getActualSize(); return size; } } @Override public void setup(HashTableConfig htConfig, BufferAllocator allocator, VectorContainer incomingBuild, RecordBatch incomingProbe, RecordBatch outgoing, VectorContainer htContainerOrig, FragmentContext context, ClassGenerator<?> cg) { float loadf = htConfig.getLoadFactor(); int initialCap = htConfig.getInitialCapacity(); if (loadf <= 0 || Float.isNaN(loadf)) { throw new IllegalArgumentException("Load factor must be a valid number greater than 0"); } if (initialCap <= 0) { throw new IllegalArgumentException("The initial capacity must be greater than 0"); } if (initialCap > MAXIMUM_CAPACITY) { throw new IllegalArgumentException("The initial capacity must be less than maximum capacity allowed"); } if (htConfig.getKeyExprsBuild() == null || htConfig.getKeyExprsBuild().size() == 0) { throw new IllegalArgumentException("Hash table must have at least 1 key expression"); } this.htConfig = htConfig; this.allocator = allocator; this.incomingBuild = incomingBuild; this.incomingProbe = incomingProbe; this.outgoing = outgoing; this.htContainerOrig = htContainerOrig; this.context = context; this.cg = cg; this.allocationTracker = new HashTableAllocationTracker(htConfig); // round up the initial capacity to nearest highest power of 2 tableSize = roundUpToPowerOf2(initialCap); if (tableSize > MAXIMUM_CAPACITY) { tableSize = MAXIMUM_CAPACITY; } originalTableSize = tableSize; // retain original size threshold = (int) Math.ceil(tableSize * loadf); dummyIntField = MaterializedField.create("dummy", Types.required(MinorType.INT)); startIndices = allocMetadataVector(tableSize, EMPTY_SLOT); // Create the first batch holder batchHolders = new ArrayList<BatchHolder>(); // First BatchHolder is created when the first put request is received. prevIndexSize = 0; currentIndexSize = 0; totalIndexSize = 0; try { doSetup(incomingBuild, incomingProbe); } catch (SchemaChangeException e) { throw new IllegalStateException("Unexpected schema change", e); } currentIdxHolder = new IndexPointer(); } @Override public void updateInitialCapacity(int initialCapacity) { htConfig = htConfig.withInitialCapacity(initialCapacity); allocationTracker = new HashTableAllocationTracker(htConfig); enlargeEmptyHashTableIfNeeded(initialCapacity); } @Override public void updateBatches() throws SchemaChangeException { doSetup(incomingBuild, incomingProbe); for (BatchHolder batchHolder : batchHolders) { batchHolder.setup(); } } public int numBuckets() { return startIndices.getAccessor().getValueCount(); } public int numResizing() { return numResizing; } @Override public int size() { return numEntries; } @Override public void getStats(HashTableStats stats) { assert stats != null; stats.numBuckets = numBuckets(); stats.numEntries = numEntries; stats.numResizing = numResizing; stats.resizingTime = resizingTime; } @Override public boolean isEmpty() { return numEntries == 0; } @Override public void clear() { clear(true); } private void clear(boolean close) { if (close) { // If we are closing, we need to clear the htContainerOrig as well. htContainerOrig.clear(); } if (batchHolders != null) { for (BatchHolder bh : batchHolders) { bh.clear(); } batchHolders.clear(); batchHolders = null; prevIndexSize = 0; currentIndexSize = 0; totalIndexSize = 0; } startIndices.clear(); // currentIdxHolder = null; // keep IndexPointer in case HT is reused numEntries = 0; } private int getBucketIndex(int hash, int numBuckets) { return hash & (numBuckets - 1); } private static int roundUpToPowerOf2(int number) { int rounded = number >= MAXIMUM_CAPACITY ? MAXIMUM_CAPACITY : (rounded = Integer.highestOneBit(number)) != 0 ? (Integer.bitCount(number) > 1) ? rounded << 1 : rounded : 1; return rounded; } private void retryAfterOOM(boolean batchAdded) throws RetryAfterSpillException { // If a batch was added then undo; otherwise when retrying this put() we'd miss a NEW_BATCH_ADDED if (batchAdded) { logger.trace("OOM - Removing index {} from the batch holders list", batchHolders.size() - 1); BatchHolder bh = batchHolders.remove(batchHolders.size() - 1); prevIndexSize = batchHolders.size() > 1 ? (batchHolders.size() - 1) * BATCH_SIZE : 0; currentIndexSize = prevIndexSize + (batchHolders.size() > 0 ? batchHolders.get(batchHolders.size() - 1).getTargetBatchRowCount() : 0); totalIndexSize = batchHolders.size() * BATCH_SIZE; // update freeIndex to point to end of last batch + 1 freeIndex = totalIndexSize + 1; bh.clear(); } else { freeIndex--; } throw new RetryAfterSpillException(); } /** * Return the Hash Value for the row in the Build incoming batch at index: * (For Hash Aggregate there's no "Build" side -- only one batch - this one) * * @param incomingRowIdx * @return * @throws SchemaChangeException */ @Override public int getBuildHashCode(int incomingRowIdx) throws SchemaChangeException { return getHashBuild(incomingRowIdx, 0); } /** * Return the Hash Value for the row in the Probe incoming batch at index: * * @param incomingRowIdx * @return * @throws SchemaChangeException */ @Override public int getProbeHashCode(int incomingRowIdx) throws SchemaChangeException { return getHashProbe(incomingRowIdx, 0); } /** put() uses the hash code (from gethashCode() above) to insert the key(s) from the incoming * row into the hash table. The code selects the bucket in the startIndices, then the keys are * placed into the chained list - by storing the key values into a batch, and updating its * "links" member. Last it modifies the index holder to the batch offset so that the caller * can store the remaining parts of the row into a matching batch (outside the hash table). * Returning * * @param incomingRowIdx - position of the incoming row * @param htIdxHolder - to return batch + batch-offset (for caller to manage a matching batch) * @param hashCode - computed over the key(s) by calling getBuildHashCode() * @return Status - the key(s) was ADDED or was already PRESENT */ @Override public PutStatus put(int incomingRowIdx, IndexPointer htIdxHolder, int hashCode, int targetBatchRowCount) throws SchemaChangeException, RetryAfterSpillException { int bucketIndex = getBucketIndex(hashCode, numBuckets()); int startIdx = startIndices.getAccessor().get(bucketIndex); int currentIdx; BatchHolder lastEntryBatch = null; int lastEntryIdxWithinBatch = EMPTY_SLOT; // if startIdx is non-empty, follow the hash chain links until we find a matching // key or reach the end of the chain (and remember the last link there) for (currentIdxHolder.value = startIdx; currentIdxHolder.value != EMPTY_SLOT; /* isKeyMatch() below also advances the currentIdxHolder to the next link */) { // remember the current link, which would be the last when the next link is empty lastEntryBatch = batchHolders.get((currentIdxHolder.value >>> 16) & BATCH_MASK); lastEntryIdxWithinBatch = currentIdxHolder.value & BATCH_MASK; if (lastEntryBatch.isKeyMatch(incomingRowIdx, currentIdxHolder, false)) { htIdxHolder.value = currentIdxHolder.value; return PutStatus.KEY_PRESENT; } } // no match was found, so insert a new entry currentIdx = freeIndex++; boolean addedBatch = false; try { // ADD A BATCH addedBatch = addBatchIfNeeded(currentIdx, targetBatchRowCount); if (addedBatch) { // If we just added the batch, update the current index to point to beginning of new batch. currentIdx = (batchHolders.size() - 1) * BATCH_SIZE; freeIndex = currentIdx + 1; } } catch (OutOfMemoryException OOME) { retryAfterOOM(currentIdx < totalIndexSize); } try { // INSERT ENTRY BatchHolder bh = batchHolders.get((currentIdx >>> 16) & BATCH_MASK); bh.insertEntry(incomingRowIdx, currentIdx, hashCode, lastEntryBatch, lastEntryIdxWithinBatch); numEntries++; } catch (OutOfMemoryException OOME) { retryAfterOOM(addedBatch); } try { // RESIZE HT /* Resize hash table if needed and transfer the metadata * Resize only after inserting the current entry into the hash table * Otherwise our calculated lastEntryBatch and lastEntryIdx * becomes invalid after resize. */ resizeAndRehashIfNeeded(); } catch (OutOfMemoryException OOME) { numEntries--; // undo - insert entry if (lastEntryBatch != null) { // undo last added link in chain (if any) lastEntryBatch.updateLinks(lastEntryIdxWithinBatch, EMPTY_SLOT); } retryAfterOOM(addedBatch); } if (EXTRA_DEBUG) { logger.debug("No match was found for incomingRowIdx = {}; inserting new entry at currentIdx = {}.", incomingRowIdx, currentIdx); } // if there was no hash chain at this bucket, need to update the start index array if (startIdx == EMPTY_SLOT) { startIndices.getMutator().set(getBucketIndex(hashCode, numBuckets()), currentIdx); } htIdxHolder.value = currentIdx; return addedBatch ? PutStatus.NEW_BATCH_ADDED : (freeIndex + 1 > currentIndexSize) ? PutStatus.KEY_ADDED_LAST : // the last key in the batch PutStatus.KEY_ADDED; // otherwise } /** * Return -1 if Probe-side key is not found in the (build-side) hash table. * Otherwise, return the global index of the key * * * @param incomingRowIdx * @param hashCode - The hash code for the Probe-side key * @return -1 if key is not found, else return the global index of the key * @throws SchemaChangeException */ @Override public int probeForKey(int incomingRowIdx, int hashCode) throws SchemaChangeException { int bucketIndex = getBucketIndex(hashCode, numBuckets()); for (currentIdxHolder.value = startIndices.getAccessor() .get(bucketIndex); currentIdxHolder.value != EMPTY_SLOT;) { BatchHolder bh = batchHolders.get((currentIdxHolder.value >>> 16) & BATCH_MASK); if (bh.isKeyMatch(incomingRowIdx, currentIdxHolder, true /* isProbe */)) { return currentIdxHolder.value; } } return -1; } // Add a new BatchHolder to the list of batch holders if needed. This is based on the supplied // currentIdx; since each BatchHolder can hold up to BATCH_SIZE entries, if the currentIdx exceeds // the capacity, we will add a new BatchHolder. Return true if a new batch was added. private boolean addBatchIfNeeded(int currentIdx, int batchRowCount) throws SchemaChangeException { // Add a new batch if this is the first batch or // index is greater than current batch target count i.e. we reached the limit of current batch. if (batchHolders.size() == 0 || (currentIdx >= currentIndexSize)) { final int allocationSize = allocationTracker.getNextBatchHolderSize(batchRowCount); final BatchHolder bh = newBatchHolder(batchHolders.size(), allocationSize); batchHolders.add(bh); prevIndexSize = batchHolders.size() > 1 ? (batchHolders.size() - 1) * BATCH_SIZE : 0; currentIndexSize = prevIndexSize + batchHolders.get(batchHolders.size() - 1).getTargetBatchRowCount(); totalIndexSize = batchHolders.size() * BATCH_SIZE; bh.setup(); if (EXTRA_DEBUG) { logger.debug("HashTable: Added new batch. Num batches = {}.", batchHolders.size()); } allocationTracker.commit(allocationSize); return true; } return false; } protected BatchHolder newBatchHolder(int index, int newBatchHolderSize) { // special method to allow debugging of gen code return this.injectMembers(new BatchHolder(index, newBatchHolderSize)); } protected BatchHolder injectMembers(BatchHolder batchHolder) { CodeGenMemberInjector.injectMembers(cg, batchHolder, context); return batchHolder; } // Resize the hash table if needed by creating a new one with double the number of buckets. // For each entry in the old hash table, re-hash it to the new table and update the metadata // in the new table.. the metadata consists of the startIndices, links and hashValues. // Note that the keys stored in the BatchHolders are not moved around. private void resizeAndRehashIfNeeded() { if (numEntries < threshold) { return; } if (EXTRA_DEBUG) { logger.debug("Hash table numEntries = {}, threshold = {}; resizing the table...", numEntries, threshold); } // If the table size is already MAXIMUM_CAPACITY, don't resize // the table, but set the threshold to Integer.MAX_VALUE such that // future attempts to resize will return immediately. if (tableSize == MAXIMUM_CAPACITY) { threshold = Integer.MAX_VALUE; return; } int newTableSize = 2 * tableSize; newTableSize = roundUpToPowerOf2(newTableSize); // if not enough memory available to allocate the new hash-table, plus the new links and // the new hash-values (to replace the existing ones - inside rehash() ), then OOM if (4 /* sizeof(int) */ * (newTableSize + 2 * HashTable.BATCH_SIZE /* links + hashValues */) >= allocator .getLimit() - allocator.getAllocatedMemory()) { throw new OutOfMemoryException("Resize Hash Table"); } tableSize = newTableSize; if (tableSize > MAXIMUM_CAPACITY) { tableSize = MAXIMUM_CAPACITY; } long t0 = System.currentTimeMillis(); // set the new threshold based on the new table size and load factor threshold = (int) Math.ceil(tableSize * htConfig.getLoadFactor()); IntVector newStartIndices = allocMetadataVector(tableSize, EMPTY_SLOT); for (int i = 0; i < batchHolders.size(); i++) { BatchHolder bh = batchHolders.get(i); int batchStartIdx = i * BATCH_SIZE; bh.rehash(tableSize, newStartIndices, batchStartIdx); } startIndices.clear(); startIndices = newStartIndices; if (EXTRA_DEBUG) { logger.debug("After resizing and rehashing, dumping the hash table..."); logger.debug("Number of buckets = {}.", startIndices.getAccessor().getValueCount()); for (int i = 0; i < startIndices.getAccessor().getValueCount(); i++) { logger.debug("Bucket: {}, startIdx[ {} ] = {}.", i, i, startIndices.getAccessor().get(i)); int startIdx = startIndices.getAccessor().get(i); BatchHolder bh = batchHolders.get((startIdx >>> 16) & BATCH_MASK); bh.dump(startIdx); } } resizingTime += System.currentTimeMillis() - t0; numResizing++; } /** * Resize up the Hash Table if needed (to hold newNum entries) */ public void enlargeEmptyHashTableIfNeeded(int newNum) { assert numEntries == 0; if (newNum < threshold) { return; } // no need to resize while (tableSize * 2 < MAXIMUM_CAPACITY && newNum > threshold) { tableSize *= 2; threshold = (int) Math.ceil(tableSize * htConfig.getLoadFactor()); } startIndices.clear(); startIndices = allocMetadataVector(tableSize, EMPTY_SLOT); } /** * Reinit the hash table to its original size, and clear up all its prior batch holder * */ public void reset() { this.clear(false); // Clear all current batch holders and hash table (i.e. free their memory) freeIndex = 0; // all batch holders are gone // reallocate batch holders, and the hash table to the original size batchHolders = new ArrayList<BatchHolder>(); prevIndexSize = 0; currentIndexSize = 0; totalIndexSize = 0; startIndices = allocMetadataVector(originalTableSize, EMPTY_SLOT); } public void updateIncoming(VectorContainer newIncoming, RecordBatch newIncomingProbe) { incomingBuild = newIncoming; incomingProbe = newIncomingProbe; // reset(); try { updateBatches(); // Needed to update the value vectors in the generated code with the new incoming } catch (SchemaChangeException e) { throw new IllegalStateException("Unexpected schema change", e); } } @Override public boolean outputKeys(int batchIdx, VectorContainer outContainer, int numRecords) { assert batchIdx < batchHolders.size(); return batchHolders.get(batchIdx).outputKeys(outContainer, numRecords); } private IntVector allocMetadataVector(int size, int initialValue) { IntVector vector = (IntVector) TypeHelper.getNewVector(dummyIntField, allocator); vector.allocateNew(size); for (int i = 0; i < size; i++) { vector.getMutator().set(i, initialValue); } vector.getMutator().setValueCount(size); return vector; } public Pair<VectorContainer, Integer> nextBatch() { if (batchHolders == null || batchHolders.size() == 0) { return null; } if (htIter == null) { htIter = batchHolders.iterator(); } if (htIter.hasNext()) { BatchHolder bh = htIter.next(); // set the value count for the vectors in the batch // TODO: investigate why the value count is not already set in the // batch.. it seems even outputKeys() sets the value count explicitly if (bh != null) { bh.setValueCount(); return Pair.of(bh.htContainer, bh.maxOccupiedIdx); } } return null; } // These methods will be code-generated in the context of the outer class protected abstract void doSetup(@Named("incomingBuild") VectorContainer incomingBuild, @Named("incomingProbe") RecordBatch incomingProbe) throws SchemaChangeException; protected abstract int getHashBuild(@Named("incomingRowIdx") int incomingRowIdx, @Named("seedValue") int seedValue) throws SchemaChangeException; protected abstract int getHashProbe(@Named("incomingRowIdx") int incomingRowIdx, @Named("seedValue") int seedValue) throws SchemaChangeException; @Override public long getActualSize() { Set<AllocationManager.BufferLedger> ledgers = Sets.newHashSet(); startIndices.collectLedgers(ledgers); long size = 0L; for (AllocationManager.BufferLedger ledger : ledgers) { size += ledger.getAccountedSize(); } for (BatchHolder batchHolder : batchHolders) { size += batchHolder.getActualSize(); } return size; } @Override public String makeDebugString() { return String.format("[numBuckets = %d, numEntries = %d, numBatchHolders = %d, actualSize = %s]", numBuckets(), numEntries, batchHolders.size(), HashJoinMemoryCalculator.PartitionStatSet.prettyPrintBytes(getActualSize())); } @Override public void setTargetBatchRowCount(int batchRowCount) { batchHolders.get(batchHolders.size() - 1).targetBatchRowCount = batchRowCount; } @Override public int getTargetBatchRowCount() { return batchHolders.get(batchHolders.size() - 1).targetBatchRowCount; } }