co.cask.cdap.data2.increment.hbase11.IncrementSummingScanner.java Source code

Introduction

Here is the source code for co.cask.cdap.data2.increment.hbase11.IncrementSummingScanner.java
Source

/*
 * Copyright  2015 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.cdap.data2.increment.hbase11;

import co.cask.cdap.data2.increment.hbase.IncrementHandlerState;
import com.google.common.base.Preconditions;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.regionserver.InternalScanner;
import org.apache.hadoop.hbase.regionserver.Region;
import org.apache.hadoop.hbase.regionserver.RegionScanner;
import org.apache.hadoop.hbase.regionserver.ScanType;
import org.apache.hadoop.hbase.regionserver.ScannerContext;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.Closeable;
import java.io.IOException;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.List;

/**
 * Transforms reads of the stored delta increments into calculated sums for each column.
 */
class IncrementSummingScanner implements RegionScanner {
    private static final Log LOG = LogFactory.getLog(IncrementSummingScanner.class);
    private static final Field SCANNER_CONTEXT_LIMITS_FIELD;
    private static final Method GET_BATCH_METHOD;

    static {
        try {
            Field limitsField = ScannerContext.class.getDeclaredField("limits");
            limitsField.setAccessible(true);
            SCANNER_CONTEXT_LIMITS_FIELD = limitsField;

            Class<?> limitFieldsClass = limitsField.getType();
            Method getBatchMethod = limitFieldsClass.getDeclaredMethod("getBatch");
            getBatchMethod.setAccessible(true);
            GET_BATCH_METHOD = getBatchMethod;
        } catch (Exception e) {
            LOG.error("Failed to get ScannerContext.LimitFields.getBatch method through reflection.");
            throw new IllegalStateException(e);
        }
    }

    private final Region region;
    private final WrappedScanner baseScanner;
    private RegionScanner baseRegionScanner;
    private final int batchSize;
    private final ScanType scanType;
    // Highest timestamp, beyond which we cannot aggregate increments during flush and compaction.
    // Increments newer than this may still be visible to running transactions
    private final long compactionUpperBound;
    // scan start time to use in computing TTL
    private final long oldestTsByTTL;

    IncrementSummingScanner(Region region, int batchSize, InternalScanner internalScanner, ScanType scanType) {
        this(region, batchSize, internalScanner, scanType, Long.MAX_VALUE, -1);
    }

    IncrementSummingScanner(Region region, int batchSize, InternalScanner internalScanner, ScanType scanType,
            long compationUpperBound, long oldestTsByTTL) {
        this.region = region;
        this.batchSize = batchSize;
        this.baseScanner = new WrappedScanner(internalScanner);
        if (internalScanner instanceof RegionScanner) {
            this.baseRegionScanner = (RegionScanner) internalScanner;
        }
        this.scanType = scanType;
        this.compactionUpperBound = compationUpperBound;
        this.oldestTsByTTL = oldestTsByTTL;
    }

    @Override
    public HRegionInfo getRegionInfo() {
        return region.getRegionInfo();
    }

    @Override
    public boolean isFilterDone() throws IOException {
        if (baseRegionScanner != null) {
            return baseRegionScanner.isFilterDone();
        }
        throw new IllegalStateException(
                "RegionScanner.isFilterDone() called when the wrapped scanner is not a RegionScanner");
    }

    @Override
    public boolean reseek(byte[] bytes) throws IOException {
        if (baseRegionScanner != null) {
            return baseRegionScanner.reseek(bytes);
        }
        throw new IllegalStateException(
                "RegionScanner.reseek() called when the wrapped scanner is not a RegionScanner");
    }

    @Override
    public long getMaxResultSize() {
        if (baseRegionScanner != null) {
            return baseRegionScanner.getMaxResultSize();
        }
        throw new IllegalStateException(
                "RegionScanner.isFilterDone() called when the wrapped scanner is not a RegionScanner");
    }

    @Override
    public long getMvccReadPoint() {
        if (baseRegionScanner != null) {
            return baseRegionScanner.getMvccReadPoint();
        }
        throw new IllegalStateException(
                "RegionScanner.isFilterDone() called when the wrapped scanner is not a RegionScanner");
    }

    /**
     * @return The limit on the number of cells to retrieve on each call to next(). See
     * {@link Scan#setBatch(int)}
     */
    @Override
    public int getBatch() {
        return batchSize;
    }

    @Override
    public boolean nextRaw(List<Cell> cells) throws IOException {
        return nextRaw(cells, ScannerContext.newBuilder().setBatchLimit(batchSize).build());
    }

    @Override
    public boolean nextRaw(List<Cell> cells, ScannerContext scannerContext) throws IOException {
        return nextInternal(cells, scannerContext);
    }

    @Override
    public boolean next(List<Cell> cells) throws IOException {
        return next(cells, ScannerContext.newBuilder().setBatchLimit(batchSize).build());
    }

    @Override
    public boolean next(List<Cell> cells, ScannerContext scannerContext) throws IOException {
        return nextInternal(cells, scannerContext);
    }

    private boolean nextInternal(List<Cell> cells, ScannerContext scannerContext) throws IOException {
        if (LOG.isTraceEnabled()) {
            LOG.trace("nextInternal called with limit=" + scannerContext);
        }
        Cell previousIncrement = null;
        long runningSum = 0;
        int addedCnt = 0;
        baseScanner.startNext();
        Cell cell;
        int limit = getBatchLimit(scannerContext);

        while ((cell = baseScanner.peekNextCell(scannerContext)) != null && (limit <= 0 || addedCnt < limit)) {
            // we use the "peek" semantics so that only once cell is ever emitted per iteration
            // this makes is clearer and easier to enforce that the returned results are <= limit
            if (LOG.isTraceEnabled()) {
                LOG.trace("Checking cell " + cell);
            }
            // any cells visible to in-progress transactions must be kept unchanged
            if (cell.getTimestamp() > compactionUpperBound) {
                if (previousIncrement != null) {
                    // if previous increment is present, only emit the previous increment, reset it, and continue.
                    // the current cell will be consumed on the next iteration, if we have not yet reached the limit
                    if (LOG.isTraceEnabled()) {
                        LOG.trace("Including increment: sum=" + runningSum + ", cell=" + previousIncrement);
                    }
                    cells.add(newCell(previousIncrement, runningSum));
                    addedCnt++;
                    previousIncrement = null;
                    runningSum = 0;

                    continue;
                }
                if (LOG.isTraceEnabled()) {
                    LOG.trace("Including cell visible to in-progress, cell=" + cell);
                }
                cells.add(cell);
                addedCnt++;
                baseScanner.nextCell(scannerContext);
                continue;
            }

            // compact any delta writes
            if (IncrementHandler.isIncrement(cell)) {
                if (LOG.isTraceEnabled()) {
                    LOG.trace("Found increment for row=" + Bytes.toStringBinary(CellUtil.cloneRow(cell)) + ", "
                            + "column=" + Bytes.toStringBinary(CellUtil.cloneQualifier(cell)));
                }
                if (!sameCell(previousIncrement, cell)) {
                    if (previousIncrement != null) {
                        // if different qualifier, and prev qualifier non-null
                        // emit the previous sum
                        if (LOG.isTraceEnabled()) {
                            LOG.trace("Including increment: sum=" + runningSum + ", cell=" + previousIncrement);
                        }
                        cells.add(newCell(previousIncrement, runningSum));
                        previousIncrement = null;
                        addedCnt++;
                        // continue without advancing, current cell will be consumed on the next iteration
                        continue;
                    }
                    previousIncrement = cell;
                    runningSum = 0;
                }
                // add this increment to the tally
                runningSum += Bytes.toLong(cell.getValueArray(),
                        cell.getValueOffset() + IncrementHandlerState.DELTA_MAGIC_PREFIX.length);
            } else {
                // otherwise (not an increment)
                if (previousIncrement != null) {
                    if (sameCell(previousIncrement, cell) && !CellUtil.isDelete(cell)) {
                        // if qualifier matches previous and this is a long, add to running sum, emit
                        runningSum += Bytes.toLong(cell.getValueArray(), cell.getValueOffset());
                        // this cell already processed as part of the previous increment's sum, so consume it
                        baseScanner.nextCell(scannerContext);
                    }
                    if (LOG.isTraceEnabled()) {
                        LOG.trace("Including increment: sum=" + runningSum + ", cell=" + previousIncrement);
                    }
                    // if this put is a different cell from the previous increment, then
                    // we only emit the previous increment, reset it, and continue.
                    // the current cell will be consumed on the next iteration, if we have not yet reached the limit
                    cells.add(newCell(previousIncrement, runningSum));
                    addedCnt++;
                    previousIncrement = null;
                    runningSum = 0;

                    continue;
                }
                // otherwise emit the current cell
                if (LOG.isTraceEnabled()) {
                    LOG.trace("Including raw cell: " + cell);
                }

                // apply any configured TTL
                if (cell.getTimestamp() > oldestTsByTTL) {
                    cells.add(cell);
                    addedCnt++;
                }
            }
            // if we made it this far, consume the current cell
            baseScanner.nextCell(scannerContext);
        }
        // emit any left over increment, if we hit the end
        if (previousIncrement != null) {
            // in any situation where we exited due to limit, previousIncrement should already be null
            Preconditions.checkState(getBatch() <= 0 || addedCnt < getBatch(), "addedCnt=%s, limit=%s", addedCnt,
                    getBatch());
            if (LOG.isTraceEnabled()) {
                LOG.trace("Including leftover increment: sum=" + runningSum + ", cell=" + previousIncrement);
            }
            cells.add(newCell(previousIncrement, runningSum));
        }

        boolean hasMore = baseScanner.hasMore();
        if (LOG.isTraceEnabled()) {
            LOG.trace("nextInternal done with limit=" + getBatch() + " hasMore=" + hasMore);
        }
        return hasMore;
    }

    /**
     * Gets the batch limit from the given {@link ScannerContext} through reflection.
     */
    private int getBatchLimit(ScannerContext scannerContext) {
        // We need to to access the batch limit (limit on number of columns) in the ScannerContext.
        // The ScannerContext does not exposes its method to get the limits and has it as an integer inside a classs
        // LimitFields. So get this through reflection. Also, we cannot depend on the limit tracking of the
        // internal scanner as it does not differentiate between an increment cell and a non-increment cell.
        // So, we read all the cells from the internal scanner in batches of limit and do the tracking for columns
        // by ourselves.
        try {
            return (Integer) GET_BATCH_METHOD.invoke(SCANNER_CONTEXT_LIMITS_FIELD.get(scannerContext));
        } catch (Exception e) {
            throw e instanceof RuntimeException ? (RuntimeException) e : new RuntimeException(e);
        }
    }

    private boolean sameCell(Cell first, Cell second) {
        if (first == null && second == null) {
            return true;
        } else if (first == null || second == null) {
            return false;
        }

        return CellUtil.matchingRow(first, second) && CellUtil.matchingFamily(first, second)
                && CellUtil.matchingQualifier(first, second);
    }

    private Cell newCell(Cell toCopy, long value) {
        byte[] newValue = Bytes.toBytes(value);
        if (scanType == ScanType.COMPACT_RETAIN_DELETES) {
            newValue = Bytes.add(IncrementHandlerState.DELTA_MAGIC_PREFIX, newValue);
        }
        return CellUtil.createCell(CellUtil.cloneRow(toCopy), CellUtil.cloneFamily(toCopy),
                CellUtil.cloneQualifier(toCopy), toCopy.getTimestamp(), KeyValue.Type.Put.getCode(), newValue);
    }

    @Override
    public void close() throws IOException {
        baseScanner.close();
    }

    /**
     * Wraps the underlying store or region scanner in an API that hides the details of calling and managing the
     * buffered batch of results.
     */
    private static class WrappedScanner implements Closeable {
        private boolean hasMore;
        private byte[] currentRow;
        private List<Cell> cellsToConsume = new ArrayList<>();
        private int currentIdx;
        private final InternalScanner scanner;

        public WrappedScanner(InternalScanner scanner) {
            this.scanner = scanner;
        }

        /**
         * Called to signal the start of the next() call by the scanner.
         */
        public void startNext() {
            currentRow = null;
        }

        /**
         * Returns the next available cell for the current row, without advancing the pointer.  Calling this method
         * multiple times in a row will continue to return the same cell.
         *
         * @param scannerContext The {@link ScannerContext} instance encapsulating all limits that should
         * be tracked.
         * @return the next available cell or null if no more cells are available for the current row
         * @throws IOException
         */
        public Cell peekNextCell(ScannerContext scannerContext) throws IOException {
            if (currentIdx >= cellsToConsume.size()) {
                // finished current batch
                cellsToConsume.clear();
                currentIdx = 0;
                hasMore = scanner.next(cellsToConsume, scannerContext);
            }
            Cell cell = null;
            if (currentIdx < cellsToConsume.size()) {
                cell = cellsToConsume.get(currentIdx);
                if (currentRow == null) {
                    currentRow = CellUtil.cloneRow(cell);
                } else if (!CellUtil.matchingRow(cell, currentRow)) {
                    // moved on to the next row
                    // don't consume current cell and signal no more cells for this row
                    return null;
                }
            }
            return cell;
        }

        /**
         * Returns the next available cell for the current row and advances the pointer to the next cell.  This method
         * can be called multiple times in a row to advance through all the available cells.
         *
         * @param scannerContext The {@link ScannerContext} instance encapsulating all limits that should
         * be tracked.
         * @return the next available cell or null if no more cells are available for the current row
         * @throws IOException
         */
        public Cell nextCell(ScannerContext scannerContext) throws IOException {
            Cell cell = peekNextCell(scannerContext);
            if (cell != null) {
                currentIdx++;
            }
            return cell;
        }

        /**
         * Returns whether or not the underlying scanner has more rows.
         */
        public boolean hasMore() {
            return currentIdx < cellsToConsume.size() ? true : hasMore;
        }

        @Override
        public void close() throws IOException {
            scanner.close();
        }
    }
}