org.apache.hadoop.hbase.regionserver.TestJoinedScanners.java Source code

Introduction

Here is the source code for org.apache.hadoop.hbase.regionserver.TestJoinedScanners.java
Source

/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.regionserver;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.LargeTests;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;
import org.junit.experimental.categories.Category;

/**
 * Test performance improvement of joined scanners optimization:
 * https://issues.apache.org/jira/browse/HBASE-5416
 */
@Category(LargeTests.class)
public class TestJoinedScanners {
    static final Log LOG = LogFactory.getLog(TestJoinedScanners.class);

    private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
    private static final String DIR = TEST_UTIL.getDataTestDir("TestJoinedScanners").toString();

    private static final byte[] cf_essential = Bytes.toBytes("essential");
    private static final byte[] cf_joined = Bytes.toBytes("joined");
    private static final byte[] col_name = Bytes.toBytes("a");
    private static final byte[] flag_yes = Bytes.toBytes("Y");
    private static final byte[] flag_no = Bytes.toBytes("N");

    private static DataBlockEncoding blockEncoding = DataBlockEncoding.FAST_DIFF;
    private static int selectionRatio = 30;
    private static int valueWidth = 128 * 1024;

    @Test
    public void testJoinedScanners() throws Exception {
        String dataNodeHosts[] = new String[] { "host1", "host2", "host3" };
        int regionServersCount = 3;

        HBaseTestingUtility htu = new HBaseTestingUtility();

        final int DEFAULT_BLOCK_SIZE = 1024 * 1024;
        htu.getConfiguration().setLong("dfs.blocksize", DEFAULT_BLOCK_SIZE);
        htu.getConfiguration().setInt("dfs.replication", 1);
        htu.getConfiguration().setLong("hbase.hregion.max.filesize", 322122547200L);
        MiniHBaseCluster cluster = null;

        try {
            cluster = htu.startMiniCluster(1, regionServersCount, dataNodeHosts);
            byte[][] families = { cf_essential, cf_joined };

            byte[] tableName = Bytes.toBytes(this.getClass().getSimpleName());
            HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(tableName));
            for (byte[] family : families) {
                HColumnDescriptor hcd = new HColumnDescriptor(family);
                hcd.setDataBlockEncoding(blockEncoding);
                desc.addFamily(hcd);
            }
            htu.getHBaseAdmin().createTable(desc);
            HTable ht = new HTable(htu.getConfiguration(), tableName);

            long rows_to_insert = 1000;
            int insert_batch = 20;
            long time = System.nanoTime();
            Random rand = new Random(time);

            LOG.info("Make " + Long.toString(rows_to_insert) + " rows, total size = "
                    + Float.toString(rows_to_insert * valueWidth / 1024 / 1024) + " MB");

            byte[] val_large = new byte[valueWidth];

            List<Put> puts = new ArrayList<Put>();

            for (long i = 0; i < rows_to_insert; i++) {
                Put put = new Put(Bytes.toBytes(Long.toString(i)));
                if (rand.nextInt(100) <= selectionRatio) {
                    put.add(cf_essential, col_name, flag_yes);
                } else {
                    put.add(cf_essential, col_name, flag_no);
                }
                put.add(cf_joined, col_name, val_large);
                puts.add(put);
                if (puts.size() >= insert_batch) {
                    ht.put(puts);
                    puts.clear();
                }
            }
            if (puts.size() >= 0) {
                ht.put(puts);
                puts.clear();
            }

            LOG.info(
                    "Data generated in " + Double.toString((System.nanoTime() - time) / 1000000000.0) + " seconds");

            boolean slow = true;
            for (int i = 0; i < 10; ++i) {
                runScanner(ht, slow);
                slow = !slow;
            }

            ht.close();
        } finally {
            if (cluster != null) {
                htu.shutdownMiniCluster();
            }
        }
    }

    private void runScanner(HTable table, boolean slow) throws Exception {
        long time = System.nanoTime();
        Scan scan = new Scan();
        scan.addColumn(cf_essential, col_name);
        scan.addColumn(cf_joined, col_name);

        SingleColumnValueFilter filter = new SingleColumnValueFilter(cf_essential, col_name,
                CompareFilter.CompareOp.EQUAL, flag_yes);
        filter.setFilterIfMissing(true);
        scan.setFilter(filter);
        scan.setLoadColumnFamiliesOnDemand(!slow);

        ResultScanner result_scanner = table.getScanner(scan);
        Result res;
        long rows_count = 0;
        while ((res = result_scanner.next()) != null) {
            rows_count++;
        }

        double timeSec = (System.nanoTime() - time) / 1000000000.0;
        result_scanner.close();
        LOG.info((slow ? "Slow" : "Joined") + " scanner finished in " + Double.toString(timeSec) + " seconds, got "
                + Long.toString(rows_count / 2) + " rows");
    }

    private static HRegion initHRegion(byte[] tableName, byte[] startKey, byte[] stopKey, String callingMethod,
            Configuration conf, byte[]... families) throws IOException {
        HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tableName));
        for (byte[] family : families) {
            HColumnDescriptor hcd = new HColumnDescriptor(family);
            hcd.setDataBlockEncoding(DataBlockEncoding.FAST_DIFF);
            htd.addFamily(hcd);
        }
        HRegionInfo info = new HRegionInfo(htd.getTableName(), startKey, stopKey, false);
        Path path = new Path(DIR + callingMethod);
        FileSystem fs = FileSystem.get(conf);
        if (fs.exists(path)) {
            if (!fs.delete(path, true)) {
                throw new IOException("Failed delete of " + path);
            }
        }
        return HRegion.createHRegion(info, path, conf, htd);
    }

    private static Options options = new Options();

    /**
     * Command line interface:
     * @param args
     * @throws IOException if there is a bug while reading from disk
     */
    public static void main(final String[] args) throws Exception {
        Option encodingOption = new Option("e", "blockEncoding", true, "Data block encoding; Default: FAST_DIFF");
        encodingOption.setRequired(false);
        options.addOption(encodingOption);

        Option ratioOption = new Option("r", "selectionRatio", true,
                "Ratio of selected rows using essential column family");
        ratioOption.setRequired(false);
        options.addOption(ratioOption);

        Option widthOption = new Option("w", "valueWidth", true, "Width of value for non-essential column family");
        widthOption.setRequired(false);
        options.addOption(widthOption);

        CommandLineParser parser = new GnuParser();
        CommandLine cmd = parser.parse(options, args);
        if (args.length < 1) {
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp("TestJoinedScanners", options, true);
        }

        if (cmd.hasOption("e")) {
            blockEncoding = DataBlockEncoding.valueOf(cmd.getOptionValue("e"));
        }
        if (cmd.hasOption("r")) {
            selectionRatio = Integer.parseInt(cmd.getOptionValue("r"));
        }
        if (cmd.hasOption("w")) {
            valueWidth = Integer.parseInt(cmd.getOptionValue("w"));
        }
        // run the test
        TestJoinedScanners test = new TestJoinedScanners();
        test.testJoinedScanners();
    }
}