org.apache.hadoop.hbase.PerformanceEvaluation.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hbase.PerformanceEvaluation.java

Source

/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase;

import static org.codehaus.jackson.map.SerializationConfig.Feature.SORT_PROPERTIES_ALPHABETICALLY;

import java.io.IOException;
import java.io.PrintStream;
import java.lang.reflect.Constructor;
import java.math.BigDecimal;
import java.math.MathContext;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.Map;
import java.util.Random;
import java.util.TreeMap;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.FilterAllFilter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.PageFilter;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.filter.WhileMatchFilter;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.trace.SpanReceiverHost;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Hash;
import org.apache.hadoop.hbase.util.MurmurHash;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.codehaus.jackson.map.ObjectMapper;

import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.yammer.metrics.core.Histogram;
import com.yammer.metrics.stats.UniformSample;
import com.yammer.metrics.stats.Snapshot;
import org.htrace.Sampler;
import org.htrace.Trace;
import org.htrace.TraceScope;
import org.htrace.impl.ProbabilitySampler;

/**
 * Script used evaluating HBase performance and scalability.  Runs a HBase
 * client that steps through one of a set of hardcoded tests or 'experiments'
 * (e.g. a random reads test, a random writes test, etc.). Pass on the
 * command-line which test to run and how many clients are participating in
 * this experiment. Run <code>java PerformanceEvaluation --help</code> to
 * obtain usage.
 *
 * <p>This class sets up and runs the evaluation programs described in
 * Section 7, <i>Performance Evaluation</i>, of the <a
 * href="http://labs.google.com/papers/bigtable.html">Bigtable</a>
 * paper, pages 8-10.
 *
 * <p>If number of clients > 1, we start up a MapReduce job. Each map task
 * runs an individual client. Each client does about 1GB of data.
 */
public class PerformanceEvaluation extends Configured implements Tool {
    protected static final Log LOG = LogFactory.getLog(PerformanceEvaluation.class.getName());

    public static final String TABLE_NAME = "TestTable";
    public static final byte[] FAMILY_NAME = Bytes.toBytes("info");
    public static final byte[] QUALIFIER_NAME = Bytes.toBytes("data");
    public static final int VALUE_LENGTH = 1000;
    public static final int ROW_LENGTH = 26;

    private static final int ONE_GB = 1024 * 1024 * 1000;
    private static final int ROWS_PER_GB = ONE_GB / VALUE_LENGTH;
    // TODO : should we make this configurable
    private static final int TAG_LENGTH = 256;
    private static final DecimalFormat FMT = new DecimalFormat("0.##");
    private static final MathContext CXT = MathContext.DECIMAL64;
    private static final BigDecimal MS_PER_SEC = BigDecimal.valueOf(1000);
    private static final BigDecimal BYTES_PER_MB = BigDecimal.valueOf(1024 * 1024);
    private static final TestOptions DEFAULT_OPTS = new TestOptions();

    protected Map<String, CmdDescriptor> commands = new TreeMap<String, CmdDescriptor>();

    private static final Path PERF_EVAL_DIR = new Path("performance_evaluation");

    /**
     * Enum for map metrics.  Keep it out here rather than inside in the Map
     * inner-class so we can find associated properties.
     */
    protected static enum Counter {
        /** elapsed time */
        ELAPSED_TIME,
        /** number of rows */
        ROWS
    }

    /**
     * Constructor
     * @param conf Configuration object
     */
    public PerformanceEvaluation(final Configuration conf) {
        super(conf);

        addCommandDescriptor(RandomReadTest.class, "randomRead", "Run random read test");
        addCommandDescriptor(RandomSeekScanTest.class, "randomSeekScan", "Run random seek and scan 100 test");
        addCommandDescriptor(RandomScanWithRange10Test.class, "scanRange10",
                "Run random seek scan with both start and stop row (max 10 rows)");
        addCommandDescriptor(RandomScanWithRange100Test.class, "scanRange100",
                "Run random seek scan with both start and stop row (max 100 rows)");
        addCommandDescriptor(RandomScanWithRange1000Test.class, "scanRange1000",
                "Run random seek scan with both start and stop row (max 1000 rows)");
        addCommandDescriptor(RandomScanWithRange10000Test.class, "scanRange10000",
                "Run random seek scan with both start and stop row (max 10000 rows)");
        addCommandDescriptor(RandomWriteTest.class, "randomWrite", "Run random write test");
        addCommandDescriptor(SequentialReadTest.class, "sequentialRead", "Run sequential read test");
        addCommandDescriptor(SequentialWriteTest.class, "sequentialWrite", "Run sequential write test");
        addCommandDescriptor(ScanTest.class, "scan", "Run scan test (read every row)");
        addCommandDescriptor(FilteredScanTest.class, "filterScan",
                "Run scan test using a filter to find a specific row based on it's value "
                        + "(make sure to use --rows=20)");
    }

    protected void addCommandDescriptor(Class<? extends Test> cmdClass, String name, String description) {
        CmdDescriptor cmdDescriptor = new CmdDescriptor(cmdClass, name, description);
        commands.put(name, cmdDescriptor);
    }

    /**
     * Implementations can have their status set.
     */
    interface Status {
        /**
         * Sets status
         * @param msg status message
         * @throws IOException
         */
        void setStatus(final String msg) throws IOException;
    }

    /**
     * MapReduce job that runs a performance evaluation client in each map task.
     */
    public static class EvaluationMapTask extends Mapper<LongWritable, Text, LongWritable, LongWritable> {

        /** configuration parameter name that contains the command */
        public final static String CMD_KEY = "EvaluationMapTask.command";
        /** configuration parameter name that contains the PE impl */
        public static final String PE_KEY = "EvaluationMapTask.performanceEvalImpl";

        private Class<? extends Test> cmd;

        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            this.cmd = forName(context.getConfiguration().get(CMD_KEY), Test.class);

            // this is required so that extensions of PE are instantiated within the
            // map reduce task...
            Class<? extends PerformanceEvaluation> peClass = forName(context.getConfiguration().get(PE_KEY),
                    PerformanceEvaluation.class);
            try {
                peClass.getConstructor(Configuration.class).newInstance(context.getConfiguration());
            } catch (Exception e) {
                throw new IllegalStateException("Could not instantiate PE instance", e);
            }
        }

        private <Type> Class<? extends Type> forName(String className, Class<Type> type) {
            try {
                return Class.forName(className).asSubclass(type);
            } catch (ClassNotFoundException e) {
                throw new IllegalStateException("Could not find class for name: " + className, e);
            }
        }

        protected void map(LongWritable key, Text value, final Context context)
                throws IOException, InterruptedException {

            Status status = new Status() {
                public void setStatus(String msg) {
                    context.setStatus(msg);
                }
            };

            ObjectMapper mapper = new ObjectMapper();
            TestOptions opts = mapper.readValue(value.toString(), TestOptions.class);
            Configuration conf = HBaseConfiguration.create(context.getConfiguration());

            // Evaluation task
            long elapsedTime = runOneClient(this.cmd, conf, opts, status);
            // Collect how much time the thing took. Report as map output and
            // to the ELAPSED_TIME counter.
            context.getCounter(Counter.ELAPSED_TIME).increment(elapsedTime);
            context.getCounter(Counter.ROWS).increment(opts.perClientRunRows);
            context.write(new LongWritable(opts.startRow), new LongWritable(elapsedTime));
            context.progress();
        }
    }

    /*
     * If table does not already exist, create.
     * @param c Client to use checking.
     * @return True if we created the table.
     * @throws IOException
     */
    private static boolean checkTable(HBaseAdmin admin, TestOptions opts) throws IOException {
        HTableDescriptor tableDescriptor = getTableDescriptor(opts);
        if (opts.presplitRegions > 0) {
            // presplit requested
            if (admin.tableExists(tableDescriptor.getTableName())) {
                admin.disableTable(tableDescriptor.getTableName());
                admin.deleteTable(tableDescriptor.getTableName());
            }

            byte[][] splits = getSplits(opts);
            for (int i = 0; i < splits.length; i++) {
                LOG.debug(" split " + i + ": " + Bytes.toStringBinary(splits[i]));
            }
            admin.createTable(tableDescriptor, splits);
            LOG.info("Table created with " + opts.presplitRegions + " splits");
        } else {
            boolean tableExists = admin.tableExists(tableDescriptor.getTableName());
            if (!tableExists) {
                admin.createTable(tableDescriptor);
                LOG.info("Table " + tableDescriptor + " created");
            }
        }
        return admin.tableExists(tableDescriptor.getTableName());
    }

    /**
     * Create an HTableDescriptor from provided TestOptions.
     */
    protected static HTableDescriptor getTableDescriptor(TestOptions opts) {
        HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(opts.tableName));
        HColumnDescriptor family = new HColumnDescriptor(FAMILY_NAME);
        family.setDataBlockEncoding(opts.blockEncoding);
        family.setCompressionType(opts.compression);
        if (opts.inMemoryCF) {
            family.setInMemory(true);
        }
        desc.addFamily(family);
        return desc;
    }

    /**
     * generates splits based on total number of rows and specified split regions
     */
    protected static byte[][] getSplits(TestOptions opts) {
        if (opts.presplitRegions == 0)
            return new byte[0][];

        int numSplitPoints = opts.presplitRegions - 1;
        byte[][] splits = new byte[numSplitPoints][];
        int jump = opts.totalRows / opts.presplitRegions;
        for (int i = 0; i < numSplitPoints; i++) {
            int rowkey = jump * (1 + i);
            splits[i] = format(rowkey);
        }
        return splits;
    }

    /*
     * Run all clients in this vm each to its own thread.
     * @param cmd Command to run.
     * @throws IOException
     */
    private void doLocalClients(final Class<? extends Test> cmd, final TestOptions opts)
            throws IOException, InterruptedException {
        Future<Long>[] threads = new Future[opts.numClientThreads];
        long[] timings = new long[opts.numClientThreads];
        ExecutorService pool = Executors.newFixedThreadPool(opts.numClientThreads,
                new ThreadFactoryBuilder().setNameFormat("TestClient-%s").build());
        for (int i = 0; i < threads.length; i++) {
            final int index = i;
            threads[i] = pool.submit(new Callable<Long>() {
                @Override
                public Long call() throws Exception {
                    TestOptions threadOpts = new TestOptions(opts);
                    threadOpts.startRow = index * threadOpts.perClientRunRows;
                    long elapsedTime = runOneClient(cmd, getConf(), threadOpts, new Status() {
                        public void setStatus(final String msg) throws IOException {
                            LOG.info(msg);
                        }
                    });
                    LOG.info("Finished in " + elapsedTime + "ms over " + threadOpts.perClientRunRows + " rows");
                    return elapsedTime;
                }
            });
        }
        pool.shutdown();
        for (int i = 0; i < threads.length; i++) {
            try {
                timings[i] = threads[i].get();
            } catch (ExecutionException e) {
                throw new IOException(e.getCause());
            }
        }
        final String test = cmd.getSimpleName();
        LOG.info("[" + test + "] Summary of timings (ms): " + Arrays.toString(timings));
        Arrays.sort(timings);
        long total = 0;
        for (long timing : timings) {
            total += timing;
        }
        LOG.info("[" + test + "]" + "\tMin: " + timings[0] + "ms" + "\tMax: " + timings[timings.length - 1] + "ms"
                + "\tAvg: " + (total / timings.length) + "ms");
    }

    /*
     * Run a mapreduce job.  Run as many maps as asked-for clients.
     * Before we start up the job, write out an input file with instruction
     * per client regards which row they are to start on.
     * @param cmd Command to run.
     * @throws IOException
     */
    private void doMapReduce(final Class<? extends Test> cmd, TestOptions opts)
            throws IOException, InterruptedException, ClassNotFoundException {
        Configuration conf = getConf();
        Path inputDir = writeInputFile(conf, opts);
        conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
        conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
        Job job = new Job(conf);
        job.setJarByClass(PerformanceEvaluation.class);
        job.setJobName("HBase Performance Evaluation");

        job.setInputFormatClass(NLineInputFormat.class);
        NLineInputFormat.setInputPaths(job, inputDir);
        // this is default, but be explicit about it just in case.
        NLineInputFormat.setNumLinesPerSplit(job, 1);

        job.setOutputKeyClass(LongWritable.class);
        job.setOutputValueClass(LongWritable.class);

        job.setMapperClass(EvaluationMapTask.class);
        job.setReducerClass(LongSumReducer.class);

        job.setNumReduceTasks(1);

        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));

        TableMapReduceUtil.addDependencyJars(job);
        TableMapReduceUtil.addDependencyJars(job.getConfiguration(), Histogram.class, // yammer metrics   
                ObjectMapper.class); // jackson-mapper-asl

        TableMapReduceUtil.initCredentials(job);

        job.waitForCompletion(true);
    }

    /*
     * Write input file of offsets-per-client for the mapreduce job.
     * @param c Configuration
     * @return Directory that contains file written.
     * @throws IOException
     */
    private Path writeInputFile(final Configuration c, final TestOptions opts) throws IOException {
        SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMddHHmmss");
        Path jobdir = new Path(PERF_EVAL_DIR, formatter.format(new Date()));
        Path inputDir = new Path(jobdir, "inputs");

        FileSystem fs = FileSystem.get(c);
        fs.mkdirs(inputDir);

        Path inputFile = new Path(inputDir, "input.txt");
        PrintStream out = new PrintStream(fs.create(inputFile));
        // Make input random.
        Map<Integer, String> m = new TreeMap<Integer, String>();
        Hash h = MurmurHash.getInstance();
        int perClientRows = (opts.totalRows / opts.numClientThreads);
        ObjectMapper mapper = new ObjectMapper();
        mapper.configure(SORT_PROPERTIES_ALPHABETICALLY, true);
        try {
            for (int i = 0; i < 10; i++) {
                for (int j = 0; j < opts.numClientThreads; j++) {
                    TestOptions next = new TestOptions(opts);
                    next.startRow = (j * perClientRows) + (i * (perClientRows / 10));
                    next.perClientRunRows = perClientRows / 10;
                    String s = mapper.writeValueAsString(next);
                    int hash = h.hash(Bytes.toBytes(s));
                    m.put(hash, s);
                }
            }
            for (Map.Entry<Integer, String> e : m.entrySet()) {
                out.println(e.getValue());
            }
        } finally {
            out.close();
        }
        return inputDir;
    }

    /**
     * Describes a command.
     */
    static class CmdDescriptor {
        private Class<? extends Test> cmdClass;
        private String name;
        private String description;

        CmdDescriptor(Class<? extends Test> cmdClass, String name, String description) {
            this.cmdClass = cmdClass;
            this.name = name;
            this.description = description;
        }

        public Class<? extends Test> getCmdClass() {
            return cmdClass;
        }

        public String getName() {
            return name;
        }

        public String getDescription() {
            return description;
        }
    }

    /**
     * Wraps up options passed to {@link org.apache.hadoop.hbase.PerformanceEvaluation}.
     * This makes tracking all these arguments a little easier.
     */
    static class TestOptions {

        public TestOptions() {
        }

        public TestOptions(TestOptions that) {
            this.nomapred = that.nomapred;
            this.startRow = that.startRow;
            this.size = that.size;
            this.perClientRunRows = that.perClientRunRows;
            this.numClientThreads = that.numClientThreads;
            this.totalRows = that.totalRows;
            this.sampleRate = that.sampleRate;
            this.traceRate = that.traceRate;
            this.tableName = that.tableName;
            this.flushCommits = that.flushCommits;
            this.writeToWAL = that.writeToWAL;
            this.autoFlush = that.autoFlush;
            this.useTags = that.useTags;
            this.noOfTags = that.noOfTags;
            this.reportLatency = that.reportLatency;
            this.multiGet = that.multiGet;
            this.inMemoryCF = that.inMemoryCF;
            this.presplitRegions = that.presplitRegions;
            this.compression = that.compression;
            this.blockEncoding = that.blockEncoding;
            this.filterAll = that.filterAll;
        }

        public boolean nomapred = false;
        public boolean filterAll = false;
        public int startRow = 0;
        public float size = 1.0f;
        public int perClientRunRows = ROWS_PER_GB;
        public int numClientThreads = 1;
        public int totalRows = ROWS_PER_GB;
        public float sampleRate = 1.0f;
        public double traceRate = 0.0;
        public String tableName = TABLE_NAME;
        public boolean flushCommits = true;
        public boolean writeToWAL = true;
        public boolean autoFlush = false;
        public boolean useTags = false;
        public int noOfTags = 1;
        public boolean reportLatency = false;
        public int multiGet = 0;
        public boolean inMemoryCF = false;
        public int presplitRegions = 0;
        public Compression.Algorithm compression = Compression.Algorithm.NONE;
        public DataBlockEncoding blockEncoding = DataBlockEncoding.NONE;
    }

    /*
     * A test.
     * Subclass to particularize what happens per row.
     */
    static abstract class Test {
        // Below is make it so when Tests are all running in the one
        // jvm, that they each have a differently seeded Random.
        private static final Random randomSeed = new Random(System.currentTimeMillis());

        private static long nextRandomSeed() {
            return randomSeed.nextLong();
        }

        private final int everyN;

        protected final Random rand = new Random(nextRandomSeed());
        protected final Configuration conf;
        protected final TestOptions opts;

        private final Status status;
        private final Sampler<?> traceSampler;
        private final SpanReceiverHost receiverHost;
        protected HConnection connection;
        protected HTableInterface table;

        private String testName;
        private Histogram latency;

        /**
         * Note that all subclasses of this class must provide a public contructor
         * that has the exact same list of arguments.
         */
        Test(final Configuration conf, final TestOptions options, final Status status) {
            this.conf = conf;
            this.opts = options;
            this.status = status;
            this.testName = this.getClass().getSimpleName();
            receiverHost = SpanReceiverHost.getInstance(conf);
            if (options.traceRate >= 1.0) {
                this.traceSampler = Sampler.ALWAYS;
            } else if (options.traceRate > 0.0) {
                this.traceSampler = new ProbabilitySampler(options.traceRate);
            } else {
                this.traceSampler = Sampler.NEVER;
            }
            everyN = (int) (opts.totalRows / (opts.totalRows * opts.sampleRate));
            LOG.info("Sampling 1 every " + everyN + " out of " + opts.perClientRunRows + " total rows.");
        }

        private String generateStatus(final int sr, final int i, final int lr) {
            return sr + "/" + i + "/" + lr + " " + getShortLatencyReport();
        }

        protected int getReportingPeriod() {
            int period = opts.perClientRunRows / 10;
            return period == 0 ? opts.perClientRunRows : period;
        }

        void testSetup() throws IOException {
            this.connection = HConnectionManager.createConnection(conf);
            this.table = connection.getTable(opts.tableName);
            this.table.setAutoFlush(opts.autoFlush, true);

            try {
                Constructor<?> ctor = Histogram.class.getDeclaredConstructor(com.yammer.metrics.stats.Sample.class);
                ctor.setAccessible(true);
                latency = (Histogram) ctor.newInstance(new UniformSample(1024 * 500));
            } catch (Exception e) {
                throw new RuntimeException(e);
            }

        }

        void testTakedown() throws IOException {
            reportLatency();
            if (opts.flushCommits) {
                this.table.flushCommits();
            }
            table.close();
            connection.close();
            receiverHost.closeReceivers();
        }

        /*
         * Run test
         * @return Elapsed time.
         * @throws IOException
         */
        long test() throws IOException {
            testSetup();
            LOG.info("Timed test starting in thread " + Thread.currentThread().getName());
            final long startTime = System.nanoTime();
            try {
                testTimed();
            } finally {
                testTakedown();
            }
            return (System.nanoTime() - startTime) / 1000000;
        }

        /**
         * Provides an extension point for tests that don't want a per row invocation.
         */
        void testTimed() throws IOException {
            int lastRow = opts.startRow + opts.perClientRunRows;
            // Report on completion of 1/10th of total.
            for (int i = opts.startRow; i < lastRow; i++) {
                if (i % everyN != 0)
                    continue;
                long startTime = System.nanoTime();
                TraceScope scope = Trace.startSpan("test row", traceSampler);
                try {
                    testRow(i);
                } finally {
                    scope.close();
                }
                latency.update((System.nanoTime() - startTime) / 1000);
                if (status != null && i > 0 && (i % getReportingPeriod()) == 0) {
                    status.setStatus(generateStatus(opts.startRow, i, lastRow));
                }
            }
        }

        /**
         * report percentiles of latency
         * @throws IOException
         */
        private void reportLatency() throws IOException {
            status.setStatus(testName + " latency log (microseconds), on " + latency.count() + " measures");
            Snapshot sn = latency.getSnapshot();
            status.setStatus(testName + " Min      = " + latency.min());
            status.setStatus(testName + " Avg      = " + latency.mean());
            status.setStatus(testName + " StdDev   = " + latency.stdDev());
            status.setStatus(testName + " 50th     = " + sn.getMedian());
            status.setStatus(testName + " 95th     = " + sn.get95thPercentile());
            status.setStatus(testName + " 99th     = " + sn.get99thPercentile());
            status.setStatus(testName + " 99.9th   = " + sn.get999thPercentile());
            status.setStatus(testName + " 99.99th  = " + sn.getValue(0.9999));
            status.setStatus(testName + " 99.999th = " + sn.getValue(0.99999));
            status.setStatus(testName + " Max      = " + latency.max());
        }

        /**
         * Used formating doubles so only two places after decimal point.
         */
        private static DecimalFormat DOUBLE_FORMAT = new DecimalFormat("#0.00");

        /**
         * @return Subset of the histograms' calculation.
         */
        private String getShortLatencyReport() {
            Snapshot sn = latency.getSnapshot();
            return "Mean=" + DOUBLE_FORMAT.format(latency.mean()) + ", StdDev="
                    + DOUBLE_FORMAT.format(latency.stdDev()) + ", 95th="
                    + DOUBLE_FORMAT.format(sn.get95thPercentile()) + ", 99th="
                    + DOUBLE_FORMAT.format(sn.get99thPercentile());
        }

        /*
        * Test for individual row.
        * @param i Row index.
        */
        abstract void testRow(final int i) throws IOException;
    }

    @SuppressWarnings("unused")
    static class RandomSeekScanTest extends Test {
        RandomSeekScanTest(Configuration conf, TestOptions options, Status status) {
            super(conf, options, status);
        }

        @Override
        void testRow(final int i) throws IOException {
            Scan scan = new Scan(getRandomRow(this.rand, opts.totalRows));
            FilterList list = new FilterList();
            scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
            if (opts.filterAll) {
                list.addFilter(new FilterAllFilter());
            }
            list.addFilter(new WhileMatchFilter(new PageFilter(120)));
            scan.setFilter(list);
            ResultScanner s = this.table.getScanner(scan);
            for (Result rr; (rr = s.next()) != null;)
                ;
            s.close();
        }

        @Override
        protected int getReportingPeriod() {
            int period = opts.perClientRunRows / 100;
            return period == 0 ? opts.perClientRunRows : period;
        }

    }

    static abstract class RandomScanWithRangeTest extends Test {
        RandomScanWithRangeTest(Configuration conf, TestOptions options, Status status) {
            super(conf, options, status);
        }

        @Override
        void testRow(final int i) throws IOException {
            Pair<byte[], byte[]> startAndStopRow = getStartAndStopRow();
            Scan scan = new Scan(startAndStopRow.getFirst(), startAndStopRow.getSecond());
            if (opts.filterAll) {
                scan.setFilter(new FilterAllFilter());
            }
            scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
            ResultScanner s = this.table.getScanner(scan);
            int count = 0;
            while (s.next() != null) {
                count++;
            }

            if (i % 100 == 0) {
                LOG.info(String.format("Scan for key range %s - %s returned %s rows",
                        Bytes.toString(startAndStopRow.getFirst()), Bytes.toString(startAndStopRow.getSecond()),
                        count));
            }

            s.close();
        }

        protected abstract Pair<byte[], byte[]> getStartAndStopRow();

        protected Pair<byte[], byte[]> generateStartAndStopRows(int maxRange) {
            int start = this.rand.nextInt(Integer.MAX_VALUE) % opts.totalRows;
            int stop = start + maxRange;
            return new Pair<byte[], byte[]>(format(start), format(stop));
        }

        @Override
        protected int getReportingPeriod() {
            int period = opts.perClientRunRows / 100;
            return period == 0 ? opts.perClientRunRows : period;
        }
    }

    static class RandomScanWithRange10Test extends RandomScanWithRangeTest {
        RandomScanWithRange10Test(Configuration conf, TestOptions options, Status status) {
            super(conf, options, status);
        }

        @Override
        protected Pair<byte[], byte[]> getStartAndStopRow() {
            return generateStartAndStopRows(10);
        }
    }

    static class RandomScanWithRange100Test extends RandomScanWithRangeTest {
        RandomScanWithRange100Test(Configuration conf, TestOptions options, Status status) {
            super(conf, options, status);
        }

        @Override
        protected Pair<byte[], byte[]> getStartAndStopRow() {
            return generateStartAndStopRows(100);
        }
    }

    static class RandomScanWithRange1000Test extends RandomScanWithRangeTest {
        RandomScanWithRange1000Test(Configuration conf, TestOptions options, Status status) {
            super(conf, options, status);
        }

        @Override
        protected Pair<byte[], byte[]> getStartAndStopRow() {
            return generateStartAndStopRows(1000);
        }
    }

    static class RandomScanWithRange10000Test extends RandomScanWithRangeTest {
        RandomScanWithRange10000Test(Configuration conf, TestOptions options, Status status) {
            super(conf, options, status);
        }

        @Override
        protected Pair<byte[], byte[]> getStartAndStopRow() {
            return generateStartAndStopRows(10000);
        }
    }

    static class RandomReadTest extends Test {
        private ArrayList<Get> gets;

        RandomReadTest(Configuration conf, TestOptions options, Status status) {
            super(conf, options, status);
            if (opts.multiGet > 0) {
                LOG.info("MultiGet enabled. Sending GETs in batches of " + opts.multiGet + ".");
                this.gets = new ArrayList<Get>(opts.multiGet);
            }
        }

        @Override
        void testRow(final int i) throws IOException {
            Get get = new Get(getRandomRow(this.rand, opts.totalRows));
            get.addColumn(FAMILY_NAME, QUALIFIER_NAME);
            if (opts.filterAll) {
                get.setFilter(new FilterAllFilter());
            }
            if (LOG.isTraceEnabled())
                LOG.trace(get.toString());
            if (opts.multiGet > 0) {
                this.gets.add(get);
                if (this.gets.size() == opts.multiGet) {
                    this.table.get(this.gets);
                    this.gets.clear();
                }
            } else {
                this.table.get(get);
            }
        }

        @Override
        protected int getReportingPeriod() {
            int period = opts.perClientRunRows / 100;
            return period == 0 ? opts.perClientRunRows : period;
        }

        @Override
        protected void testTakedown() throws IOException {
            if (this.gets != null && this.gets.size() > 0) {
                this.table.get(gets);
                this.gets.clear();
            }
            super.testTakedown();
        }
    }

    static class RandomWriteTest extends Test {
        RandomWriteTest(Configuration conf, TestOptions options, Status status) {
            super(conf, options, status);
        }

        @Override
        void testRow(final int i) throws IOException {
            byte[] row = getRandomRow(this.rand, opts.totalRows);
            Put put = new Put(row);
            byte[] value = generateData(this.rand, VALUE_LENGTH);
            if (opts.useTags) {
                byte[] tag = generateData(this.rand, TAG_LENGTH);
                Tag[] tags = new Tag[opts.noOfTags];
                for (int n = 0; n < opts.noOfTags; n++) {
                    Tag t = new Tag((byte) n, tag);
                    tags[n] = t;
                }
                KeyValue kv = new KeyValue(row, FAMILY_NAME, QUALIFIER_NAME, HConstants.LATEST_TIMESTAMP, value,
                        tags);
                put.add(kv);
            } else {
                put.add(FAMILY_NAME, QUALIFIER_NAME, value);
            }
            put.setDurability(opts.writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL);
            table.put(put);
        }
    }

    static class ScanTest extends Test {
        private ResultScanner testScanner;

        ScanTest(Configuration conf, TestOptions options, Status status) {
            super(conf, options, status);
        }

        @Override
        void testTakedown() throws IOException {
            if (this.testScanner != null) {
                this.testScanner.close();
            }
            super.testTakedown();
        }

        @Override
        void testRow(final int i) throws IOException {
            if (this.testScanner == null) {
                Scan scan = new Scan(format(opts.startRow));
                scan.setCaching(30);
                scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
                if (opts.filterAll) {
                    scan.setFilter(new FilterAllFilter());
                }
                this.testScanner = table.getScanner(scan);
            }
            testScanner.next();
        }

    }

    static class SequentialReadTest extends Test {
        SequentialReadTest(Configuration conf, TestOptions options, Status status) {
            super(conf, options, status);
        }

        @Override
        void testRow(final int i) throws IOException {
            Get get = new Get(format(i));
            get.addColumn(FAMILY_NAME, QUALIFIER_NAME);
            if (opts.filterAll) {
                get.setFilter(new FilterAllFilter());
            }
            table.get(get);
        }
    }

    static class SequentialWriteTest extends Test {
        SequentialWriteTest(Configuration conf, TestOptions options, Status status) {
            super(conf, options, status);
        }

        @Override
        void testRow(final int i) throws IOException {
            byte[] row = format(i);
            Put put = new Put(row);
            byte[] value = generateData(this.rand, VALUE_LENGTH);
            if (opts.useTags) {
                byte[] tag = generateData(this.rand, TAG_LENGTH);
                Tag[] tags = new Tag[opts.noOfTags];
                for (int n = 0; n < opts.noOfTags; n++) {
                    Tag t = new Tag((byte) n, tag);
                    tags[n] = t;
                }
                KeyValue kv = new KeyValue(row, FAMILY_NAME, QUALIFIER_NAME, HConstants.LATEST_TIMESTAMP, value,
                        tags);
                put.add(kv);
            } else {
                put.add(FAMILY_NAME, QUALIFIER_NAME, value);
            }
            put.setDurability(opts.writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL);
            table.put(put);
        }
    }

    static class FilteredScanTest extends Test {
        protected static final Log LOG = LogFactory.getLog(FilteredScanTest.class.getName());

        FilteredScanTest(Configuration conf, TestOptions options, Status status) {
            super(conf, options, status);
        }

        @Override
        void testRow(int i) throws IOException {
            byte[] value = generateData(this.rand, VALUE_LENGTH);
            Scan scan = constructScan(value);
            ResultScanner scanner = null;
            try {
                scanner = this.table.getScanner(scan);
                while (scanner.next() != null) {
                }
            } finally {
                if (scanner != null)
                    scanner.close();
            }
        }

        protected Scan constructScan(byte[] valuePrefix) throws IOException {
            FilterList list = new FilterList();
            Filter filter = new SingleColumnValueFilter(FAMILY_NAME, QUALIFIER_NAME, CompareFilter.CompareOp.EQUAL,
                    new BinaryComparator(valuePrefix));
            list.addFilter(filter);
            if (opts.filterAll) {
                list.addFilter(new FilterAllFilter());
            }
            Scan scan = new Scan();
            scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
            scan.setFilter(list);
            return scan;
        }
    }

    /**
     * Compute a throughput rate in MB/s.
     * @param rows Number of records consumed.
     * @param timeMs Time taken in milliseconds.
     * @return String value with label, ie '123.76 MB/s'
     */
    private static String calculateMbps(int rows, long timeMs) {
        // MB/s = ((totalRows * ROW_SIZE_BYTES) / totalTimeMS)
        //        * 1000 MS_PER_SEC / (1024 * 1024) BYTES_PER_MB
        BigDecimal rowSize = BigDecimal
                .valueOf(ROW_LENGTH + VALUE_LENGTH + FAMILY_NAME.length + QUALIFIER_NAME.length);
        BigDecimal mbps = BigDecimal.valueOf(rows).multiply(rowSize, CXT).divide(BigDecimal.valueOf(timeMs), CXT)
                .multiply(MS_PER_SEC, CXT).divide(BYTES_PER_MB, CXT);
        return FMT.format(mbps) + " MB/s";
    }

    /*
     * Format passed integer.
     * @param number
     * @return Returns zero-prefixed ROW_LENGTH-byte wide decimal version of passed
     * number (Does absolute in case number is negative).
     */
    public static byte[] format(final int number) {
        byte[] b = new byte[ROW_LENGTH];
        int d = Math.abs(number);
        for (int i = b.length - 1; i >= 0; i--) {
            b[i] = (byte) ((d % 10) + '0');
            d /= 10;
        }
        return b;
    }

    /*
     * This method takes some time and is done inline uploading data.  For
     * example, doing the mapfile test, generation of the key and value
     * consumes about 30% of CPU time.
     * @return Generated random value to insert into a table cell.
     */
    public static byte[] generateData(final Random r, int length) {
        byte[] b = new byte[length];
        int i;

        for (i = 0; i < (length - 8); i += 8) {
            b[i] = (byte) (65 + r.nextInt(26));
            b[i + 1] = b[i];
            b[i + 2] = b[i];
            b[i + 3] = b[i];
            b[i + 4] = b[i];
            b[i + 5] = b[i];
            b[i + 6] = b[i];
            b[i + 7] = b[i];
        }

        byte a = (byte) (65 + r.nextInt(26));
        for (; i < length; i++) {
            b[i] = a;
        }
        return b;
    }

    /**
     * @deprecated Use {@link #generateData(java.util.Random, int)} instead.
     * @return Generated random value to insert into a table cell.
     */
    @Deprecated
    public static byte[] generateValue(final Random r) {
        return generateData(r, VALUE_LENGTH);
    }

    static byte[] getRandomRow(final Random random, final int totalRows) {
        return format(random.nextInt(Integer.MAX_VALUE) % totalRows);
    }

    static long runOneClient(final Class<? extends Test> cmd, Configuration conf, TestOptions opts,
            final Status status) throws IOException {
        status.setStatus(
                "Start " + cmd + " at offset " + opts.startRow + " for " + opts.perClientRunRows + " rows");
        long totalElapsedTime;

        final Test t;
        try {
            Constructor<? extends Test> constructor = cmd.getDeclaredConstructor(Configuration.class,
                    TestOptions.class, Status.class);
            t = constructor.newInstance(conf, opts, status);
        } catch (NoSuchMethodException e) {
            throw new IllegalArgumentException("Invalid command class: " + cmd.getName()
                    + ".  It does not provide a constructor as described by "
                    + "the javadoc comment.  Available constructors are: "
                    + Arrays.toString(cmd.getConstructors()));
        } catch (Exception e) {
            throw new IllegalStateException("Failed to construct command class", e);
        }
        totalElapsedTime = t.test();

        status.setStatus("Finished " + cmd + " in " + totalElapsedTime + "ms at offset " + opts.startRow + " for "
                + opts.perClientRunRows + " rows" + " ("
                + calculateMbps((int) (opts.perClientRunRows * opts.sampleRate), totalElapsedTime) + ")");
        return totalElapsedTime;
    }

    private void runTest(final Class<? extends Test> cmd, TestOptions opts)
            throws IOException, InterruptedException, ClassNotFoundException {
        HBaseAdmin admin = null;
        try {
            admin = new HBaseAdmin(getConf());
            checkTable(admin, opts);
        } finally {
            if (admin != null)
                admin.close();
        }
        if (opts.nomapred) {
            doLocalClients(cmd, opts);
        } else {
            doMapReduce(cmd, opts);
        }
    }

    protected void printUsage() {
        printUsage(null);
    }

    protected void printUsage(final String message) {
        if (message != null && message.length() > 0) {
            System.err.println(message);
        }
        System.err.println("Usage: java " + this.getClass().getName() + " \\");
        System.err.println("  [--nomapred] [--rows=ROWS] [--table=NAME] \\");
        System.err.println(
                "  [--compress=TYPE] [--blockEncoding=TYPE] " + "[-D<property=value>]* <command> <nclients>");
        System.err.println();
        System.err.println("Options:");
        System.err.println(" nomapred        Run multiple clients using threads " + "(rather than use mapreduce)");
        System.err.println(" rows            Rows each client runs. Default: One million");
        System.err
                .println(" size            Total size in GiB. Mutually exclusive with --rows. " + "Default: 1.0.");
        System.err.println(" sampleRate      Execute test on a sample of total "
                + "rows. Only supported by randomRead. Default: 1.0");
        System.err.println(" traceRate       Enable HTrace spans. Initiate tracing every N rows. " + "Default: 0");
        System.err.println(" table           Alternate table name. Default: 'TestTable'");
        System.err.println(" multiGet        If >0, when doing RandomRead, perform multiple gets "
                + "instead of single gets. Default: 0");
        System.err.println(" compress        Compression type to use (GZ, LZO, ...). Default: 'NONE'");
        System.err.println(
                " flushCommits    Used to determine if the test should flush the table. " + "Default: false");
        System.err.println(" writeToWAL      Set writeToWAL on puts. Default: True");
        System.err.println(" autoFlush       Set autoFlush on htable. Default: False");
        System.err.println(" presplit        Create presplit table. Recommended for accurate perf "
                + "analysis (see guide).  Default: disabled");
        System.err.println(" inmemory        Tries to keep the HFiles of the CF "
                + "inmemory as far as possible. Not guaranteed that reads are always served "
                + "from memory.  Default: false");
        System.err.println(" usetags         Writes tags along with KVs. Use with HFile V3. " + "Default: false");
        System.err.println(" numoftags       Specify the no of tags that would be needed. "
                + "This works only if usetags is true.");
        System.err.println(" filterAll       Helps to filter out all the rows on the server side"
                + " there by not returning any thing back to the client.  Helps to check the server side"
                + " performance.  Uses FilterAllFilter internally. ");
        System.err.println(" latency         Set to report operation latencies. Default: False");
        System.err.println();
        System.err.println(" Note: -D properties will be applied to the conf used. ");
        System.err.println("  For example: ");
        System.err.println("   -Dmapreduce.output.fileoutputformat.compress=true");
        System.err.println("   -Dmapreduce.task.timeout=60000");
        System.err.println();
        System.err.println("Command:");
        for (CmdDescriptor command : commands.values()) {
            System.err.println(String.format(" %-15s %s", command.getName(), command.getDescription()));
        }
        System.err.println();
        System.err.println("Args:");
        System.err.println(" nclients        Integer. Required. Total number of " + "clients (and HRegionServers)");
        System.err.println("                 running: 1 <= value <= 500");
        System.err.println("Examples:");
        System.err.println(" To run a single evaluation client:");
        System.err.println(" $ bin/hbase " + this.getClass().getName() + " sequentialWrite 1");
    }

    private static int getNumClients(final int start, final String[] args) {
        if (start + 1 > args.length) {
            throw new IllegalArgumentException("must supply the number of clients");
        }
        int N = Integer.parseInt(args[start]);
        if (N < 1) {
            throw new IllegalArgumentException("Number of clients must be > 1");
        }
        return N;
    }

    public int run(String[] args) throws Exception {
        // Process command-line args. TODO: Better cmd-line processing
        // (but hopefully something not as painful as cli options).
        int errCode = -1;
        if (args.length < 1) {
            printUsage();
            return errCode;
        }

        try {
            // MR-NOTE: if you are adding a property that is used to control an operation
            // like put(), get(), scan(), ... you must also add it as part of the MR 
            // input, take a look at writeInputFile().
            // Then you must adapt the LINE_PATTERN input regex,
            // and parse the argument, take a look at PEInputFormat.getSplits().

            TestOptions opts = new TestOptions();

            for (int i = 0; i < args.length; i++) {
                String cmd = args[i];
                if (cmd.equals("-h") || cmd.startsWith("--h")) {
                    printUsage();
                    errCode = 0;
                    break;
                }

                final String nmr = "--nomapred";
                if (cmd.startsWith(nmr)) {
                    opts.nomapred = true;
                    continue;
                }

                final String rows = "--rows=";
                if (cmd.startsWith(rows)) {
                    opts.perClientRunRows = Integer.parseInt(cmd.substring(rows.length()));
                    continue;
                }

                final String sampleRate = "--sampleRate=";
                if (cmd.startsWith(sampleRate)) {
                    opts.sampleRate = Float.parseFloat(cmd.substring(sampleRate.length()));
                    continue;
                }

                final String traceRate = "--traceRate=";
                if (cmd.startsWith(traceRate)) {
                    opts.traceRate = Double.parseDouble(cmd.substring(traceRate.length()));
                    continue;
                }

                final String table = "--table=";
                if (cmd.startsWith(table)) {
                    opts.tableName = cmd.substring(table.length());
                    continue;
                }

                final String compress = "--compress=";
                if (cmd.startsWith(compress)) {
                    opts.compression = Compression.Algorithm.valueOf(cmd.substring(compress.length()));
                    continue;
                }

                final String blockEncoding = "--blockEncoding=";
                if (cmd.startsWith(blockEncoding)) {
                    opts.blockEncoding = DataBlockEncoding.valueOf(cmd.substring(blockEncoding.length()));
                    continue;
                }

                final String flushCommits = "--flushCommits=";
                if (cmd.startsWith(flushCommits)) {
                    opts.flushCommits = Boolean.parseBoolean(cmd.substring(flushCommits.length()));
                    continue;
                }

                final String writeToWAL = "--writeToWAL=";
                if (cmd.startsWith(writeToWAL)) {
                    opts.writeToWAL = Boolean.parseBoolean(cmd.substring(writeToWAL.length()));
                    continue;
                }

                final String autoFlush = "--autoFlush=";
                if (cmd.startsWith(autoFlush)) {
                    opts.autoFlush = Boolean.parseBoolean(cmd.substring(autoFlush.length()));
                    continue;
                }

                final String presplit = "--presplit=";
                if (cmd.startsWith(presplit)) {
                    opts.presplitRegions = Integer.parseInt(cmd.substring(presplit.length()));
                    continue;
                }

                final String inMemory = "--inmemory=";
                if (cmd.startsWith(inMemory)) {
                    opts.inMemoryCF = Boolean.parseBoolean(cmd.substring(inMemory.length()));
                    continue;
                }

                final String latency = "--latency";
                if (cmd.startsWith(latency)) {
                    opts.reportLatency = true;
                    continue;
                }

                final String multiGet = "--multiGet=";
                if (cmd.startsWith(multiGet)) {
                    opts.multiGet = Integer.parseInt(cmd.substring(multiGet.length()));
                    continue;
                }

                final String useTags = "--usetags=";
                if (cmd.startsWith(useTags)) {
                    opts.useTags = Boolean.parseBoolean(cmd.substring(useTags.length()));
                    continue;
                }

                final String noOfTags = "--nooftags=";
                if (cmd.startsWith(noOfTags)) {
                    opts.noOfTags = Integer.parseInt(cmd.substring(noOfTags.length()));
                    continue;
                }

                final String filterOutAll = "--filterAll";
                if (cmd.startsWith(filterOutAll)) {
                    opts.filterAll = true;
                    continue;
                }

                final String size = "--size=";
                if (cmd.startsWith(size)) {
                    opts.size = Float.parseFloat(cmd.substring(size.length()));
                    continue;
                }

                Class<? extends Test> cmdClass = determineCommandClass(cmd);
                if (cmdClass != null) {
                    opts.numClientThreads = getNumClients(i + 1, args);
                    if (opts.size != DEFAULT_OPTS.size && opts.perClientRunRows != DEFAULT_OPTS.perClientRunRows) {
                        throw new IllegalArgumentException(
                                rows + " and " + size + " are mutually exclusive arguments.");
                    }
                    if (opts.size != DEFAULT_OPTS.size) {
                        // total size in GB specified
                        opts.totalRows = (int) opts.size * ROWS_PER_GB;
                        opts.perClientRunRows = opts.totalRows / opts.numClientThreads;
                    } else if (opts.perClientRunRows != DEFAULT_OPTS.perClientRunRows) {
                        // number of rows specified
                        opts.totalRows = opts.perClientRunRows * opts.numClientThreads;
                        opts.size = opts.totalRows / ROWS_PER_GB;
                    }
                    runTest(cmdClass, opts);
                    errCode = 0;
                    break;
                }

                printUsage();
                break;
            }
        } catch (Exception e) {
            e.printStackTrace();
        }

        return errCode;
    }

    private Class<? extends Test> determineCommandClass(String cmd) {
        CmdDescriptor descriptor = commands.get(cmd);
        return descriptor != null ? descriptor.getCmdClass() : null;
    }

    public static void main(final String[] args) throws Exception {
        int res = ToolRunner.run(new PerformanceEvaluation(HBaseConfiguration.create()), args);
        System.exit(res);
    }
}