org.apache.hadoop.hbase.util.MultiThreadedAction.java Source code

Introduction

Here is the source code for org.apache.hadoop.hbase.util.MultiThreadedAction.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with this
 * work for additional information regarding copyright ownership. The ASF
 * licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations
 * under the License.
 */
package org.apache.hadoop.hbase.util;

import static org.apache.hadoop.hbase.util.test.LoadTestDataGenerator.INCREMENT;
import static org.apache.hadoop.hbase.util.test.LoadTestDataGenerator.MUTATE_INFO;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto.MutationType;
import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
import org.apache.hadoop.hbase.util.test.LoadTestKVGenerator;
import org.apache.hadoop.util.StringUtils;

import com.google.common.base.Preconditions;

/**
 * Common base class for reader and writer parts of multi-thread HBase load
 * test ({@link LoadTestTool}).
 */
public abstract class MultiThreadedAction {
    private static final Log LOG = LogFactory.getLog(MultiThreadedAction.class);

    protected final TableName tableName;
    protected final Configuration conf;

    protected int numThreads = 1;

    /** The start key of the key range, inclusive */
    protected long startKey = 0;

    /** The end key of the key range, exclusive */
    protected long endKey = 1;

    protected AtomicInteger numThreadsWorking = new AtomicInteger();
    protected AtomicLong numKeys = new AtomicLong();
    protected AtomicLong numCols = new AtomicLong();
    protected AtomicLong totalOpTimeMs = new AtomicLong();
    protected boolean verbose = false;

    protected LoadTestDataGenerator dataGenerator = null;

    /**
     * Default implementation of LoadTestDataGenerator that uses LoadTestKVGenerator, fixed
     * set of column families, and random number of columns in range. The table for it can
     * be created manually or, for example, via
     * {@link HBaseTestingUtility#createPreSplitLoadTestTable(
     * org.apache.hadoop.hbase.Configuration, byte[], byte[], Algorithm, DataBlockEncoding)}
     */
    public static class DefaultDataGenerator extends LoadTestDataGenerator {
        private byte[][] columnFamilies = null;
        private int minColumnsPerKey;
        private int maxColumnsPerKey;
        private final Random random = new Random();

        public DefaultDataGenerator(int minValueSize, int maxValueSize, int minColumnsPerKey, int maxColumnsPerKey,
                byte[]... columnFamilies) {
            super(minValueSize, maxValueSize);
            this.columnFamilies = columnFamilies;
            this.minColumnsPerKey = minColumnsPerKey;
            this.maxColumnsPerKey = maxColumnsPerKey;
        }

        public DefaultDataGenerator(byte[]... columnFamilies) {
            // Default values for tests that didn't care to provide theirs.
            this(256, 1024, 1, 10, columnFamilies);
        }

        @Override
        public byte[] getDeterministicUniqueKey(long keyBase) {
            return LoadTestKVGenerator.md5PrefixedKey(keyBase).getBytes();
        }

        @Override
        public byte[][] getColumnFamilies() {
            return columnFamilies;
        }

        @Override
        public byte[][] generateColumnsForCf(byte[] rowKey, byte[] cf) {
            int numColumns = minColumnsPerKey + random.nextInt(maxColumnsPerKey - minColumnsPerKey + 1);
            byte[][] columns = new byte[numColumns][];
            for (int i = 0; i < numColumns; ++i) {
                columns[i] = Integer.toString(i).getBytes();
            }
            return columns;
        }

        @Override
        public byte[] generateValue(byte[] rowKey, byte[] cf, byte[] column) {
            return kvGenerator.generateRandomSizeValue(rowKey, cf, column);
        }

        @Override
        public boolean verify(byte[] rowKey, byte[] cf, byte[] column, byte[] value) {
            return LoadTestKVGenerator.verify(value, rowKey, cf, column);
        }

        @Override
        public boolean verify(byte[] rowKey, byte[] cf, Set<byte[]> columnSet) {
            return (columnSet.size() >= minColumnsPerKey) && (columnSet.size() <= maxColumnsPerKey);
        }
    }

    /** "R" or "W" */
    private String actionLetter;

    /** Whether we need to print out Hadoop Streaming-style counters */
    private boolean streamingCounters;

    public static final int REPORTING_INTERVAL_MS = 5000;

    public MultiThreadedAction(LoadTestDataGenerator dataGen, Configuration conf, TableName tableName,
            String actionLetter) {
        this.conf = conf;
        this.dataGenerator = dataGen;
        this.tableName = tableName;
        this.actionLetter = actionLetter;
    }

    public void start(long startKey, long endKey, int numThreads) throws IOException {
        this.startKey = startKey;
        this.endKey = endKey;
        this.numThreads = numThreads;
        (new Thread(new ProgressReporter(actionLetter))).start();
    }

    private static String formatTime(long elapsedTime) {
        String format = String.format("%%0%dd", 2);
        elapsedTime = elapsedTime / 1000;
        String seconds = String.format(format, elapsedTime % 60);
        String minutes = String.format(format, (elapsedTime % 3600) / 60);
        String hours = String.format(format, elapsedTime / 3600);
        String time = hours + ":" + minutes + ":" + seconds;
        return time;
    }

    /** Asynchronously reports progress */
    private class ProgressReporter implements Runnable {

        private String reporterId = "";

        public ProgressReporter(String id) {
            this.reporterId = id;
        }

        @Override
        public void run() {
            long startTime = System.currentTimeMillis();
            long priorNumKeys = 0;
            long priorCumulativeOpTime = 0;
            int priorAverageKeysPerSecond = 0;

            // Give other threads time to start.
            Threads.sleep(REPORTING_INTERVAL_MS);

            while (numThreadsWorking.get() != 0) {
                String threadsLeft = "[" + reporterId + ":" + numThreadsWorking.get() + "] ";
                if (numKeys.get() == 0) {
                    LOG.info(threadsLeft + "Number of keys = 0");
                } else {
                    long numKeys = MultiThreadedAction.this.numKeys.get();
                    long time = System.currentTimeMillis() - startTime;
                    long totalOpTime = totalOpTimeMs.get();

                    long numKeysDelta = numKeys - priorNumKeys;
                    long totalOpTimeDelta = totalOpTime - priorCumulativeOpTime;

                    double averageKeysPerSecond = (time > 0) ? (numKeys * 1000 / time) : 0;

                    LOG.info(threadsLeft + "Keys=" + numKeys + ", cols="
                            + StringUtils.humanReadableInt(numCols.get()) + ", time=" + formatTime(time)
                            + ((numKeys > 0 && time > 0)
                                    ? (" Overall: [" + "keys/s= " + numKeys * 1000 / time + ", latency="
                                            + totalOpTime / numKeys + " ms]")
                                    : "")
                            + ((numKeysDelta > 0)
                                    ? (" Current: [" + "keys/s=" + numKeysDelta * 1000 / REPORTING_INTERVAL_MS
                                            + ", latency=" + totalOpTimeDelta / numKeysDelta + " ms]")
                                    : "")
                            + progressInfo());

                    if (streamingCounters) {
                        printStreamingCounters(numKeysDelta, averageKeysPerSecond - priorAverageKeysPerSecond);
                    }

                    priorNumKeys = numKeys;
                    priorCumulativeOpTime = totalOpTime;
                    priorAverageKeysPerSecond = (int) averageKeysPerSecond;
                }

                Threads.sleep(REPORTING_INTERVAL_MS);
            }
        }

        private void printStreamingCounters(long numKeysDelta, double avgKeysPerSecondDelta) {
            // Write stats in a format that can be interpreted as counters by
            // streaming map-reduce jobs.
            System.err.println("reporter:counter:numKeys," + reporterId + "," + numKeysDelta);
            System.err.println("reporter:counter:numCols," + reporterId + "," + numCols.get());
            System.err.println(
                    "reporter:counter:avgKeysPerSecond," + reporterId + "," + (long) (avgKeysPerSecondDelta));
        }
    }

    public void waitForFinish() {
        while (numThreadsWorking.get() != 0) {
            Threads.sleepWithoutInterrupt(1000);
        }
    }

    public boolean isDone() {
        return (numThreadsWorking.get() == 0);
    }

    protected void startThreads(Collection<? extends Thread> threads) {
        numThreadsWorking.addAndGet(threads.size());
        for (Thread thread : threads) {
            thread.start();
        }
    }

    /** @return the end key of the key range, exclusive */
    public long getEndKey() {
        return endKey;
    }

    /** Returns a task-specific progress string */
    protected abstract String progressInfo();

    protected static void appendToStatus(StringBuilder sb, String desc, long v) {
        if (v == 0) {
            return;
        }
        sb.append(", ");
        sb.append(desc);
        sb.append("=");
        sb.append(v);
    }

    protected static void appendToStatus(StringBuilder sb, String desc, String v) {
        sb.append(", ");
        sb.append(desc);
        sb.append("=");
        sb.append(v);
    }

    /**
     * See {@link #verifyResultAgainstDataGenerator(Result, boolean, boolean)}.
     * Does not verify cf/column integrity.
     */
    public boolean verifyResultAgainstDataGenerator(Result result, boolean verifyValues) {
        return verifyResultAgainstDataGenerator(result, verifyValues, false);
    }

    /**
     * Verifies the result from get or scan using the dataGenerator (that was presumably
     * also used to generate said result).
     * @param verifyValues verify that values in the result make sense for row/cf/column combination
     * @param verifyCfAndColumnIntegrity verify that cf/column set in the result is complete. Note
     *                                   that to use this multiPut should be used, or verification
     *                                   has to happen after writes, otherwise there can be races.
     * @return
     */
    public boolean verifyResultAgainstDataGenerator(Result result, boolean verifyValues,
            boolean verifyCfAndColumnIntegrity) {
        String rowKeyStr = Bytes.toString(result.getRow());

        // See if we have any data at all.
        if (result.isEmpty()) {
            LOG.error("Error checking data for key [" + rowKeyStr + "], no data returned");
            return false;
        }

        if (!verifyValues && !verifyCfAndColumnIntegrity) {
            return true; // as long as we have something, we are good.
        }

        // See if we have all the CFs.
        byte[][] expectedCfs = dataGenerator.getColumnFamilies();
        if (verifyCfAndColumnIntegrity && (expectedCfs.length != result.getMap().size())) {
            LOG.error(
                    "Error checking data for key [" + rowKeyStr + "], bad family count: " + result.getMap().size());
            return false;
        }

        // Verify each column family from get in the result.
        for (byte[] cf : result.getMap().keySet()) {
            String cfStr = Bytes.toString(cf);
            Map<byte[], byte[]> columnValues = result.getFamilyMap(cf);
            if (columnValues == null) {
                LOG.error("Error checking data for key [" + rowKeyStr + "], no data for family [" + cfStr + "]]");
                return false;
            }

            Map<String, MutationType> mutateInfo = null;
            if (verifyCfAndColumnIntegrity || verifyValues) {
                if (!columnValues.containsKey(MUTATE_INFO)) {
                    LOG.error("Error checking data for key [" + rowKeyStr + "], column family [" + cfStr
                            + "], column [" + Bytes.toString(MUTATE_INFO) + "]; value is not found");
                    return false;
                }

                long cfHash = Arrays.hashCode(cf);
                // Verify deleted columns, and make up column counts if deleted
                byte[] mutateInfoValue = columnValues.remove(MUTATE_INFO);
                mutateInfo = parseMutateInfo(mutateInfoValue);
                for (Map.Entry<String, MutationType> mutate : mutateInfo.entrySet()) {
                    if (mutate.getValue() == MutationType.DELETE) {
                        byte[] column = Bytes.toBytes(mutate.getKey());
                        long columnHash = Arrays.hashCode(column);
                        long hashCode = cfHash + columnHash;
                        if (hashCode % 2 == 0) {
                            if (columnValues.containsKey(column)) {
                                LOG.error("Error checking data for key [" + rowKeyStr + "], column family [" + cfStr
                                        + "], column [" + mutate.getKey() + "]; should be deleted");
                                return false;
                            }
                            byte[] hashCodeBytes = Bytes.toBytes(hashCode);
                            columnValues.put(column, hashCodeBytes);
                        }
                    }
                }

                // Verify increment
                if (!columnValues.containsKey(INCREMENT)) {
                    LOG.error("Error checking data for key [" + rowKeyStr + "], column family [" + cfStr
                            + "], column [" + Bytes.toString(INCREMENT) + "]; value is not found");
                    return false;
                }
                long currentValue = Bytes.toLong(columnValues.remove(INCREMENT));
                if (verifyValues) {
                    long amount = mutateInfo.isEmpty() ? 0 : cfHash;
                    long originalValue = Arrays.hashCode(result.getRow());
                    long extra = currentValue - originalValue;
                    if (extra != 0 && (amount == 0 || extra % amount != 0)) {
                        LOG.error("Error checking data for key [" + rowKeyStr + "], column family [" + cfStr
                                + "], column [increment], extra [" + extra + "], amount [" + amount + "]");
                        return false;
                    }
                    if (amount != 0 && extra != amount) {
                        LOG.warn("Warning checking data for key [" + rowKeyStr + "], column family [" + cfStr
                                + "], column [increment], incremented [" + (extra / amount) + "] times");
                    }
                }

                // See if we have correct columns.
                if (verifyCfAndColumnIntegrity
                        && !dataGenerator.verify(result.getRow(), cf, columnValues.keySet())) {
                    String colsStr = "";
                    for (byte[] col : columnValues.keySet()) {
                        if (colsStr.length() > 0) {
                            colsStr += ", ";
                        }
                        colsStr += "[" + Bytes.toString(col) + "]";
                    }
                    LOG.error("Error checking data for key [" + rowKeyStr + "], bad columns for family [" + cfStr
                            + "]: " + colsStr);
                    return false;
                }
                // See if values check out.
                if (verifyValues) {
                    for (Map.Entry<byte[], byte[]> kv : columnValues.entrySet()) {
                        String column = Bytes.toString(kv.getKey());
                        MutationType mutation = mutateInfo.get(column);
                        boolean verificationNeeded = true;
                        byte[] bytes = kv.getValue();
                        if (mutation != null) {
                            boolean mutationVerified = true;
                            long columnHash = Arrays.hashCode(kv.getKey());
                            long hashCode = cfHash + columnHash;
                            byte[] hashCodeBytes = Bytes.toBytes(hashCode);
                            if (mutation == MutationType.APPEND) {
                                int offset = bytes.length - hashCodeBytes.length;
                                mutationVerified = offset > 0 && Bytes.equals(hashCodeBytes, 0,
                                        hashCodeBytes.length, bytes, offset, hashCodeBytes.length);
                                if (mutationVerified) {
                                    int n = 1;
                                    while (true) {
                                        int newOffset = offset - hashCodeBytes.length;
                                        if (newOffset < 0 || !Bytes.equals(hashCodeBytes, 0, hashCodeBytes.length,
                                                bytes, newOffset, hashCodeBytes.length)) {
                                            break;
                                        }
                                        offset = newOffset;
                                        n++;
                                    }
                                    if (n > 1) {
                                        LOG.warn("Warning checking data for key [" + rowKeyStr
                                                + "], column family [" + cfStr + "], column [" + column
                                                + "], appended [" + n + "] times");
                                    }
                                    byte[] dest = new byte[offset];
                                    System.arraycopy(bytes, 0, dest, 0, offset);
                                    bytes = dest;
                                }
                            } else if (hashCode % 2 == 0) { // checkAndPut
                                mutationVerified = Bytes.equals(bytes, hashCodeBytes);
                                verificationNeeded = false;
                            }
                            if (!mutationVerified) {
                                LOG.error("Error checking data for key [" + rowKeyStr
                                        + "], mutation checking failed for column family [" + cfStr + "], column ["
                                        + column + "]; mutation [" + mutation + "], hashCode [" + hashCode
                                        + "], verificationNeeded [" + verificationNeeded + "]");
                                return false;
                            }
                        } // end of mutation checking
                        if (verificationNeeded && !dataGenerator.verify(result.getRow(), cf, kv.getKey(), bytes)) {
                            LOG.error("Error checking data for key [" + rowKeyStr + "], column family [" + cfStr
                                    + "], column [" + column + "], mutation [" + mutation + "]; value of length "
                                    + bytes.length);
                            return false;
                        }
                    }
                }
            }
        }
        return true;
    }

    // Parse mutate info into a map of <column name> => <update action>
    private Map<String, MutationType> parseMutateInfo(byte[] mutateInfo) {
        Map<String, MutationType> mi = new HashMap<String, MutationType>();
        if (mutateInfo != null) {
            String mutateInfoStr = Bytes.toString(mutateInfo);
            String[] mutations = mutateInfoStr.split("#");
            for (String mutation : mutations) {
                if (mutation.isEmpty())
                    continue;
                Preconditions.checkArgument(mutation.contains(":"), "Invalid mutation info " + mutation);
                int p = mutation.indexOf(":");
                String column = mutation.substring(0, p);
                MutationType type = MutationType.valueOf(Integer.parseInt(mutation.substring(p + 1)));
                mi.put(column, type);
            }
        }
        return mi;
    }
}