org.apache.hadoop.hbase.KEY.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hbase.KEY.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase;

import static org.junit.Assert.*;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.Compression;
import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;

import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableSplit;
import org.apache.hadoop.hbase.regionserver.Store;
import org.apache.hadoop.hbase.regionserver.StoreFile;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.StringUtils;

class KEY {
    public static final String INPUT_TABLE = TableInputFormat.INPUT_TABLE;
    public static final String OPTION_CFNUM = "test_key_cfnum";
    public static final String OPTION_COLNUM = "test_key_colnum";
    public static final String CF_PREIX = "test_key_cfprefix";
    public static final String COL_PREIX = "test_key_colprefix";
    public static final String OPTION_COLSFORMAT = "test_key_colsformat";
    public static final String OPTION_USEBULKLOAD = "test_key_usebulkload";
    public static String OPTION_ROWNUM = "test_key_rownum";
    public static String OPTION_BASEROWNUM = "test_key_baserownum";
    public static String OPTION_REGIONROWNUM = "test_key_regionrownum";

    public static long BATCHNUM = 1000;
    public static String BULKLOADDIR = "/bulkload/";
}

class DataGenUtil {

    private static Random numberGenerator = new Random(1L);

    public static String[] genSequenceStrings(String s, int n) {
        String result[] = new String[n];
        int numberWidth = 0;

        for (int i = 1; i < n; i *= 10) {
            numberWidth++;
        }

        for (int i = 0; i < n; i++) {
            result[i] = Integer.toString(i);
            int padding = numberWidth - result[i].length();
            for (int j = 0; j < padding; j++) {
                result[i] = "0" + result[i];
            }
            result[i] = s + result[i];
        }

        return result;
    }

    public static String genRandomString(int minLength, int maxLength, String prefix) {
        int lengthOfStr = (int) (Math.random() * (maxLength - minLength + 1)) + minLength;
        char[] strChar = new char[lengthOfStr];

        for (int i = 0; i < strChar.length; i++) {
            strChar[i] = (char) (Math.random() * 26 + 97);
        }

        String result = new String(strChar);

        if (prefix != null) {
            result = prefix + result;
        }

        return result;
    }

    public static int genRandomInt(int min, int max) {
        int result = numberGenerator.nextInt(max - min) + min;
        return result;
    }

    public static long genRandomLong() {
        long result = numberGenerator.nextLong();
        return result;
    }

    public enum ColType {
        NUMINT('i', "numberInt"), NUMLONG('l', "numberLong"), STRSEQ('s', "stringSequence"), TEXTRANDOM('t',
                "textRandom");

        private String typeName;
        private char id;

        private ColType(char id, String name) {
            this.id = id;
            this.typeName = name;
        }

        public char getID() {
            return this.id;
        }

        public String getColName() {
            return this.typeName;
        }

        static public ColType[] parseType(String s) {
            int length = s.length();
            ColType results[] = new ColType[length];
            for (int i = 0; i < length; i++) {
                switch (s.charAt(i)) {
                case 'i':
                    results[i] = NUMINT;
                    break;
                case 'l':
                    results[i] = NUMLONG;
                    break;
                case 's':
                    results[i] = STRSEQ;
                    break;
                case 't':
                    results[i] = TEXTRANDOM;
                    break;
                default:
                    results[i] = STRSEQ;
                    break;
                }
            }
            return results;
        }
    }

    public class ColFormat {
        public ColType type;

        public ColFormat(ColType t) {
            this.type = t;
        }
    }

    public ColFormat[] parseColsFormat(String formatStr) {
        String[] cols = formatStr.split(",");
        int length = cols.length;
        ColFormat results[] = new ColFormat[length];
        ColFormat curCol = null;
        String[] fields = null;

        for (int i = 0; i < length; i++) {
            fields = cols[i].split(":");
            if (fields[0].equalsIgnoreCase("i")) {
                if (fields.length < 2 || fields.length > 3)
                    throw new RuntimeException("parseColsFormat Err");
                int minint = Integer.parseInt(fields[1]);
                int maxint = minint;
                if (fields.length > 2)
                    maxint = Integer.parseInt(fields[2]);
                if (maxint < minint)
                    maxint = minint;
                curCol = new ColIntRange(minint, maxint);
                results[i] = curCol;
            } else if (fields[0].equalsIgnoreCase("l")) {
                curCol = new ColLong();
                results[i] = curCol;
            } else if (fields[0].equalsIgnoreCase("s")) {
                curCol = new ColStrSeq();
                results[i] = curCol;
            } else if (fields[0].equalsIgnoreCase("t")) {
                if (fields.length < 2 || fields.length > 3)
                    throw new RuntimeException("parseColsFormat Err");

                int minlength = Integer.parseInt(fields[1]);
                int maxlength = 0;
                if (fields.length > 2)
                    maxlength = Integer.parseInt(fields[2]);
                if (maxlength < minlength)
                    maxlength = minlength;
                curCol = new ColTextRandom(minlength, maxlength);
                results[i] = curCol;
            } else {
                throw new RuntimeException("parseColsFormat Err");
            }
        }
        return results;
    }

    public class ColTextRandom extends ColFormat {
        public int minLength;
        public int maxLength;

        public ColTextRandom(int min, int max) {
            super(ColType.TEXTRANDOM);
            minLength = min;
            maxLength = max;
        }
    }

    public class ColIntRange extends ColFormat {
        public int min;
        public int max;

        public ColIntRange(int min, int max) {
            super(ColType.NUMINT);
            this.min = min;
            this.max = max;
        }
    }

    public class ColLong extends ColFormat {
        public ColLong() {
            super(ColType.NUMLONG);
        }
    }

    public class ColStrSeq extends ColFormat {
        public ColStrSeq() {
            super(ColType.STRSEQ);
        }
    }
}

class RegionWriteInputFormat extends InputFormat<String, Long> {

    public static class RegionWriteRecordReader extends RecordReader<String, Long> {
        private TableSplit value = null;
        private Configuration conf;
        private long rowNum;
        private long index = 0L;
        private long baseRowNumber = 1L;
        private String regionPrefix = null;
        private boolean currentValueRead = false;

        public RegionWriteRecordReader(InputSplit split, TaskAttemptContext context)
                throws IOException, InterruptedException {
            initialize(split, context);
        }

        @Override
        public void initialize(InputSplit split, TaskAttemptContext context)
                throws IOException, InterruptedException {
            this.value = (TableSplit) split;
            conf = context.getConfiguration();
            this.rowNum = conf.getLong(KEY.OPTION_REGIONROWNUM, 100000L);
            this.baseRowNumber = conf.getLong(KEY.OPTION_BASEROWNUM, 1L);
            this.index = 0L;

            byte[] srow = value.getStartRow();
            if (srow.length == 0) {
                // this is the first region, we use "aaaa" for prefix.
                regionPrefix = "aaaa";
            } else {
                regionPrefix = Bytes.toString(srow);
            }
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (index >= rowNum) {
                return false;
            }
            if (currentValueRead == true) {
                index += KEY.BATCHNUM;
                currentValueRead = false;
            }
            return true;
        }

        @Override
        public String getCurrentKey() throws IOException, InterruptedException {
            String s = regionPrefix + Long.toString((this.baseRowNumber + index) / KEY.BATCHNUM);
            return s;
        }

        @Override
        public Long getCurrentValue() throws IOException, InterruptedException {
            currentValueRead = true;
            if ((rowNum - index) > KEY.BATCHNUM)
                return KEY.BATCHNUM;
            return rowNum - index;
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return index * 1.0f / rowNum;
        }

        @Override
        public void close() throws IOException {
        }
    }

    @Override
    public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
        TableInputFormat tif = new TableInputFormat();
        tif.setConf(context.getConfiguration());
        return tif.getSplits(context);
    }

    @Override
    public RecordReader<String, Long> createRecordReader(InputSplit split, TaskAttemptContext context)
            throws IOException, InterruptedException {
        return new RegionWriteRecordReader(split, context);
    }
}

class GenerateRegionDataTask extends Mapper<String, Long, LongWritable, LongWritable> {

    private int colNum;
    private int cfNum = 1;
    private String tableName = null;
    private String cfPrefix = null;
    private String colPrefix = null;
    private HTable ht = null;
    private String[] families = null;
    private String[] columns = null;

    private Configuration conf;
    private DataGenUtil.ColFormat[] ColsFormat;
    private BulkWriter[] bulkWriters;

    private boolean useBulkload = false;

    public class BulkWriter {
        StoreFile.Writer writer = null;
    }

    @Override
    protected void setup(Context context) throws IOException, InterruptedException {

        conf = context.getConfiguration();

        this.cfNum = conf.getInt(KEY.OPTION_CFNUM, 1);
        this.colNum = conf.getInt(KEY.OPTION_COLNUM, 18);
        this.tableName = conf.get(KEY.INPUT_TABLE);
        this.cfPrefix = conf.get(KEY.CF_PREIX, "F");
        this.colPrefix = conf.get(KEY.COL_PREIX, "C");

        families = DataGenUtil.genSequenceStrings(cfPrefix, this.cfNum);
        columns = DataGenUtil.genSequenceStrings(colPrefix, this.colNum);

        this.useBulkload = conf.getBoolean(KEY.OPTION_USEBULKLOAD, false);

        String colsFormatString = conf.get(KEY.OPTION_COLSFORMAT, "");

        DataGenUtil dataGenUtil = new DataGenUtil();
        this.ColsFormat = dataGenUtil.parseColsFormat(colsFormatString);

        try {
            ht = new HTable(conf, tableName);
        } catch (IOException e) {
            assertNull("Failed to create table", e);
        }

        if (this.useBulkload) {
            bulkWriters = new BulkWriter[this.cfNum];
            Path bulkOutputPath = new Path(KEY.BULKLOADDIR + tableName);
            FileSystem fs = bulkOutputPath.getFileSystem(conf);

            for (int i = 0; i < families.length; i++) {
                String family = families[i];
                Path cfPath = new Path(bulkOutputPath, family);

                if (!fs.exists(cfPath)) {
                    fs.mkdirs(cfPath);
                }

                HColumnDescriptor cfDesc = ht.getTableDescriptor().getFamily(family.getBytes());
                HFileDataBlockEncoder dataBlockEncoder = new HFileDataBlockEncoderImpl(
                        cfDesc.getDataBlockEncodingOnDisk(), cfDesc.getDataBlockEncoding());

                Configuration tempConf = new Configuration(conf);

                BulkWriter w = new BulkWriter();

                w.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs, cfDesc.getBlocksize())
                        .withOutputDir(cfPath).withCompression(cfDesc.getCompression())
                        .withBloomType(cfDesc.getBloomFilterType()).withComparator(KeyValue.COMPARATOR)
                        .withDataBlockEncoder(dataBlockEncoder).withChecksumType(Store.getChecksumType(conf))
                        .withBytesPerChecksum(Store.getBytesPerChecksum(conf)).build();

                bulkWriters[i] = w;
            }
        }

    }

    @Override
    protected void cleanup(Context context) throws IOException, InterruptedException {

        if (ht != null)
            ht.close();

        if (this.useBulkload) {
            for (BulkWriter w : this.bulkWriters) {
                w.writer.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
                w.writer.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                        Bytes.toBytes(context.getTaskAttemptID().toString()));
                w.writer.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                w.writer.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(false));
                w.writer.appendTrackedTimestampsToMetadata();
                w.writer.close();
            }
        }
    }

    @Override
    protected void map(String prefix, Long rows, final Context context) throws IOException, InterruptedException {

        //long startTime = System.currentTimeMillis();
        // region write task
        try {
            doWrite(prefix, rows, context);
        } catch (Exception e) {
            e.printStackTrace();
            throw new IOException(e);
        }

        //long elapsedTime = System.currentTimeMillis() - startTime;

        context.progress();
    }

    private String longToStringPadding(int width, long val) {
        String result = Long.toString(val);
        int padding = width - result.length();

        for (int i = 0; i < padding; i++) {
            result = "0" + result;
        }
        return result;
    }

    private void doWrite(String rowPrefix, Long rows, final Context context) throws IOException {

        long remainRows = rows;
        long index = 0;
        int toProcess;
        String row = null;
        Put p = null;
        BulkWriter w = null;

        long ts = this.useBulkload ? System.currentTimeMillis() : HConstants.LATEST_TIMESTAMP;

        int rowWidth = 0;
        for (long i = 1L; i < rows; i *= 10L) {
            rowWidth++;
        }

        while (remainRows > 0) {
            toProcess = (int) KEY.BATCHNUM;
            if (toProcess > remainRows)
                toProcess = (int) remainRows;

            List<Put> putList = new ArrayList<Put>(toProcess);

            for (int i = 0; i < toProcess; i++) {
                row = rowPrefix + longToStringPadding(rowWidth, index);
                if (!this.useBulkload) {
                    p = new Put(Bytes.toBytes(row));
                    p.setWriteToWAL(false);
                }

                for (int fIndex = 0; fIndex < families.length; fIndex++) {
                    String family = families[fIndex];

                    if (this.useBulkload) {
                        w = this.bulkWriters[fIndex];
                    }

                    for (int cIndex = 0; cIndex < columns.length; cIndex++) {
                        String column = columns[cIndex];
                        KeyValue kv;
                        switch (this.ColsFormat[cIndex].type) {
                        case NUMINT:
                            kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes(family), Bytes.toBytes(column), ts,
                                    KeyValue.Type.Put,
                                    Bytes.toBytes(DataGenUtil.genRandomInt(
                                            ((DataGenUtil.ColIntRange) this.ColsFormat[cIndex]).min,
                                            ((DataGenUtil.ColIntRange) this.ColsFormat[cIndex]).max)));
                            break;
                        case NUMLONG:
                            kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes(family), Bytes.toBytes(column), ts,
                                    KeyValue.Type.Put, Bytes.toBytes(DataGenUtil.genRandomLong()));
                            break;
                        case STRSEQ:
                            kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes(family), Bytes.toBytes(column), ts,
                                    KeyValue.Type.Put, Bytes.toBytes("v" + "-" + column + "-" + row));
                            break;
                        case TEXTRANDOM:
                            kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes(family), Bytes.toBytes(column), ts,
                                    KeyValue.Type.Put,
                                    Bytes.toBytes(DataGenUtil.genRandomString(
                                            ((DataGenUtil.ColTextRandom) this.ColsFormat[cIndex]).minLength,
                                            ((DataGenUtil.ColTextRandom) this.ColsFormat[cIndex]).maxLength,
                                            null)));
                            break;
                        default:
                            kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes(family), Bytes.toBytes(column), ts,
                                    KeyValue.Type.Put, Bytes.toBytes("v" + "-" + column + "-" + row));
                            break;
                        }

                        //KeyValue kv = KeyValueTestUtil.create(row, family, column,
                        //    ts, "v" + "-" + column + "-" + row);

                        if (!this.useBulkload) {
                            p.add(kv);
                        } else {
                            w.writer.append(kv);
                        }
                    }
                }

                if (!this.useBulkload) {
                    putList.add(p);
                }
                index++;
            }

            if (!this.useBulkload) {
                ht.put(putList);
            }
            remainRows -= toProcess;
        }
    }
}

public class GenerateTestTable {

    static final Log LOG = LogFactory.getLog(GenerateTestTable.class);

    private HBaseAdmin admin = null;

    private final int MAX_COLUMN_NUM = 100;
    private final int DEFAULT_COLUMN_NUM = 5;
    private final int MAX_FAMILY_NUM = 10;
    private final int DEFAULT_FAMILY_NUM = 1;
    private final long MAX_ROW_NUM = 10000000000L;
    private final long DEFAULT_ROW_NUM = 10000;
    private final int MAX_REGION_NUM = 2048;
    private final int DEFAULT_REGION_NUMBER = 96;

    private final String[] rowPrefix = { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
            "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z" };
    private String cfPrefix = "F";
    private String colPrefix = "C";
    private String dotColPrefix = "D.C";

    private Configuration conf = null;
    private byte[][] tableSplits = null;

    private int colNum = this.DEFAULT_COLUMN_NUM;
    private String coltypes = null;
    private long rowNum = this.DEFAULT_ROW_NUM;
    private int cfNum = this.DEFAULT_FAMILY_NUM;
    private int regionNumber = this.DEFAULT_REGION_NUMBER;
    private long baseRowNumber = 1L;
    private String tableName = null;
    private boolean createDotTable = false;
    private int maxVersions = 1;
    private String dotTableName;

    private Compression.Algorithm compression = Compression.Algorithm.NONE;
    private DataBlockEncoding encoding = DataBlockEncoding.NONE;

    private boolean useBulkLoad = false;

    /**
     * Constructor
     * 
     * @param c
     *          Configuration object
     */
    public GenerateTestTable() {

    }

    private void init() {
        this.conf = HBaseConfiguration.create();
        try {
            this.admin = new HBaseAdmin(this.conf);
        } catch (MasterNotRunningException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (ZooKeeperConnectionException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    private void doMapReduce(Class<? extends InputFormat> inputFormatClass, Class<? extends Mapper> mapperClass,
            String mrTableName, String cfPrefix, String colPrefix)
            throws IOException, ClassNotFoundException, InterruptedException {

        this.conf.set(KEY.INPUT_TABLE, mrTableName);
        Job job = new Job(this.conf);
        job.setJobName("Generate Data for [" + mrTableName + "]");
        job.setJarByClass(GenerateTestTable.class);

        this.conf.set(KEY.CF_PREIX, cfPrefix);
        this.conf.set(KEY.COL_PREIX, colPrefix);

        job.setInputFormatClass(inputFormatClass);

        job.setOutputKeyClass(LongWritable.class);
        job.setOutputValueClass(LongWritable.class);

        FileSystem fs = FileSystem.get(conf);
        Path path = new Path("/tmp", "tempout");
        fs.delete(path, true);

        FileOutputFormat.setOutputPath(job, path);

        job.setMapperClass(mapperClass);
        job.setNumReduceTasks(0);

        TableMapReduceUtil.addDependencyJars(job);
        // Add a Class from the hbase.jar so it gets registered too.
        TableMapReduceUtil.addDependencyJars(job.getConfiguration(), org.apache.hadoop.hbase.util.Bytes.class);

        TableMapReduceUtil.initCredentials(job);

        job.waitForCompletion(true);

    }

    private void createNormalTable(String tableName, Map<String, String[]> layouts, byte[][] splits) {

        HTableDescriptor htd = new HTableDescriptor(tableName);
        for (Map.Entry<String, String[]> cfLayout : layouts.entrySet()) {
            String family = cfLayout.getKey();
            HColumnDescriptor cfdesc = new HColumnDescriptor(family).setMaxVersions(maxVersions)
                    .setDataBlockEncoding(encoding).setCompressionType(compression);
            htd.addFamily(cfdesc);
        }

        try {
            if (splits == null) {
                admin.createTable(htd);
            } else {
                admin.createTable(htd, splits);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private void createDotTable(String tableName, Map<String, String[]> layouts, byte[][] splits) {

        HTableDescriptor htd = new HTableDescriptor(tableName);

        for (Map.Entry<String, String[]> cfLayout : layouts.entrySet()) {
            String family = cfLayout.getKey();
            String[] columns = cfLayout.getValue();

            HColumnDescriptor cfdesc = new HColumnDescriptor(family).setMaxVersions(maxVersions)
                    .setDataBlockEncoding(encoding).setCompressionType(compression);

            Map<String, List<String>> docsMap = new HashMap<String, List<String>>();

            for (String q : columns) {
                int idx = q.indexOf(".");
                String doc = q.substring(0, idx);
                String field = q.substring(idx + 1);

                List<String> fieldList = docsMap.get(doc);

                if (fieldList == null) {
                    fieldList = new ArrayList<String>();
                    docsMap.put(doc, fieldList);
                }

                fieldList.add(field);
            }

            String[] docs = new String[docsMap.entrySet().size()];
            int index = 0;

            for (Map.Entry<String, List<String>> m : docsMap.entrySet()) {
                String docName = m.getKey();
                List<String> fields = m.getValue();
                boolean firstField = true;

                docs[index++] = docName;

                String docSchemaId = "hbase.dot.columnfamily.doc.schema." + docName;
                String docSchemaValue = " {    \n" + " \"name\": \"" + docName + "\", \n"
                        + " \"type\": \"record\",\n" + " \"fields\": [\n";
                for (String field : fields) {
                    if (firstField) {
                        firstField = false;
                    } else {
                        docSchemaValue += ", \n";
                    }
                    docSchemaValue += " {\"name\": \"" + field + "\", \"type\": \"bytes\"}";
                }

                docSchemaValue += " ]}";
                LOG.info("--- " + family + ":" + docName + " = " + docSchemaValue);
                cfdesc.setValue(docSchemaId, docSchemaValue);
            }
            String docElements = StringUtils.arrayToString(docs);
            cfdesc.setValue("hbase.dot.columnfamily.doc.element", docElements);
            htd.addFamily(cfdesc);
        }

        htd.setValue("hbase.dot.enable", "true");
        htd.setValue("hbase.dot.type", "ANALYTICAL");

        try {
            if (splits == null) {
                admin.createTable(htd);
            } else {
                admin.createTable(htd, splits);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public void doBulkLoad() throws Exception {

        HTable ht = new HTable(conf, this.tableName);
        new LoadIncrementalHFiles(conf).doBulkLoad(new Path(KEY.BULKLOADDIR + tableName), ht);
        ht.close();
        // We don't support bulkload for dot yet.
    }

    public void doMajorCompact() throws Exception {
        admin.flush(this.tableName);
        if (this.createDotTable)
            admin.flush(this.dotTableName);

        admin.majorCompact(this.tableName);
        if (this.createDotTable)
            admin.majorCompact(this.dotTableName);
    }

    public void createTable() throws Exception {

        String familys[] = DataGenUtil.genSequenceStrings(cfPrefix, this.cfNum);
        String columns[] = DataGenUtil.genSequenceStrings(colPrefix, this.colNum);

        Map<String, String[]> layouts = new HashMap<String, String[]>();

        for (String family : familys) {
            layouts.put(family, columns);
        }

        tableSplits = getFourLetterSplits(this.regionNumber);
        createNormalTable(tableName, layouts, tableSplits);

        if (this.createDotTable) {
            Map<String, String[]> dotLayouts = new HashMap<String, String[]>();
            String[] dotColumns = DataGenUtil.genSequenceStrings(dotColPrefix, this.colNum);
            for (String family : familys) {
                dotLayouts.put(family, dotColumns);
            }
            this.dotTableName = this.tableName + "Dot";
            createDotTable(this.dotTableName, dotLayouts, tableSplits);
        }
    }

    protected void printUsage() {
        printUsage(null);
    }

    protected void printUsage(final String message) {
        if (message != null && message.length() > 0) {
            System.err.println(message);
        }
        System.err.println("Usage: java " + this.getClass().getName());
        System.err.println("--table=tablename [--rownum=] [--colnum=] [--cfnum=] [--regions=] [--enabledot]");
        System.err.println(
                "[--colsformat=i:min:max(int),|l(long),|s(sequence string),|t:minlengh:maxlength(random text),]");
        System.err.println("[--encoding=prefix|diff|fastdiff|none] [--compression=gz|lzo|snappy|none]");
        System.err.println("[--usebulkload]");
        System.err.println();
    }

    private byte[][] getFourLetterSplits(int n) {
        double range = 26.0 * 26.0 * 26.0 * 26.0;
        assert (n > 0 && n < MAX_REGION_NUM);
        byte[][] splits = new byte[n - 1][];

        double step = range / n;
        double offset = 0.0;
        long index;
        char[] letter = new char[4];
        for (int i = 0; i < (n - 1); i++) {
            offset += step;
            index = Math.round(offset);
            letter[0] = (char) ((index / (26 * 26 * 26)) + 97);
            letter[1] = (char) ((index / (26 * 26) % 26) + 97);
            letter[2] = (char) ((index / (26) % 26) + 97);
            letter[3] = (char) ((index % 26) + 97);
            splits[i] = Bytes.toBytes(new String(letter));
        }
        return splits;
    }

    public int parseCommandLine(final String[] args) {
        // (but hopefully something not as painful as cli options).
        int errCode = 0;
        if (args.length < 1) {
            printUsage();
            return -1;
        }

        for (int i = 0; i < args.length; i++) {
            String cmd = args[i];
            if (cmd.equals("-h") || cmd.startsWith("--h")) {
                printUsage();
                break;
            }

            final String colnum = "--colnum=";
            if (cmd.startsWith(colnum)) {
                int val = Integer.parseInt(cmd.substring(colnum.length()));
                if (val <= 0 || val > this.MAX_COLUMN_NUM)
                    val = this.DEFAULT_COLUMN_NUM;
                this.colNum = val;
                this.conf.setLong(KEY.OPTION_COLNUM, this.colNum);
                continue;
            }

            final String cf = "--cfnum=";
            if (cmd.startsWith(cf)) {
                int val = Integer.parseInt(cmd.substring(cf.length()));
                if (val <= 0 || val > this.MAX_FAMILY_NUM)
                    val = this.DEFAULT_FAMILY_NUM;
                this.cfNum = val;
                this.conf.setInt(KEY.OPTION_CFNUM, this.cfNum);
                continue;
            }

            final String rows = "--rownum=";
            if (cmd.startsWith(rows)) {
                long val = Long.decode(cmd.substring(rows.length()));
                if (val < 0 || val > this.MAX_ROW_NUM)
                    val = this.DEFAULT_ROW_NUM;
                this.rowNum = val;
                continue;
            }

            final String colsFormat = "--colsformat=";
            if (cmd.startsWith(colsFormat)) {
                this.coltypes = cmd.substring(colsFormat.length());
                continue;
            }

            final String regions = "--regions=";
            if (cmd.startsWith(regions)) {
                int val = Integer.parseInt(cmd.substring(regions.length()));
                if (val <= 0 || val > this.MAX_REGION_NUM)
                    val = this.DEFAULT_REGION_NUMBER;
                this.regionNumber = val;
                continue;
            }

            final String enabledot = "--enabledot";
            if (cmd.startsWith(enabledot)) {
                this.createDotTable = true;
                continue;
            }

            final String usebulkload = "--usebulkload";
            if (cmd.startsWith(usebulkload)) {
                this.useBulkLoad = true;
                continue;
            }

            final String compressionOP = "--compression=";
            if (cmd.startsWith(compressionOP)) {
                String compressionCodec = cmd.substring(compressionOP.length());
                if (compressionCodec.equalsIgnoreCase("gz")) {
                    this.compression = Compression.Algorithm.GZ;
                } else if (compressionCodec.equalsIgnoreCase("lzo")) {
                    this.compression = Compression.Algorithm.LZO;
                } else if (compressionCodec.equalsIgnoreCase("snappy")) {
                    this.compression = Compression.Algorithm.SNAPPY;
                } else {
                    this.compression = Compression.Algorithm.NONE;
                }
                continue;
            }

            final String encodingOP = "--encoding=";
            if (cmd.startsWith(encodingOP)) {
                String encodingCodec = cmd.substring(encodingOP.length());
                if (encodingCodec.equalsIgnoreCase("fastdiff")) {
                    this.encoding = DataBlockEncoding.FAST_DIFF;
                } else if (encodingCodec.equalsIgnoreCase("diff")) {
                    this.encoding = DataBlockEncoding.DIFF;
                } else if (encodingCodec.equalsIgnoreCase("prefix")) {
                    this.encoding = DataBlockEncoding.PREFIX;
                } else {
                    this.encoding = DataBlockEncoding.NONE;
                }
                continue;
            }

            final String table = "--table=";
            if (cmd.startsWith(table)) {
                this.tableName = cmd.substring(table.length());
                continue;
            }
        }

        if (this.tableName == null) {
            printUsage("Please specify the table name");
            errCode = -2;
        }

        if (this.coltypes == null) {
            this.coltypes = "s";
            for (int j = 1; j < this.colNum; j++) {
                this.coltypes = this.coltypes.concat(",s");
            }
        }

        DataGenUtil dataGenUtil = new DataGenUtil();
        DataGenUtil.ColFormat[] colsFormat = dataGenUtil.parseColsFormat(this.coltypes);
        if (colsFormat.length != this.colNum) {
            System.err.println("colformat string : " + this.coltypes + " does not match colNum");
            errCode = -3;
        }

        this.conf.set(KEY.OPTION_COLSFORMAT, this.coltypes);

        this.baseRowNumber = 1L;
        while (this.baseRowNumber < this.rowNum) {
            this.baseRowNumber *= 10L;
        }

        this.conf.setLong(KEY.OPTION_BASEROWNUM, this.baseRowNumber);
        this.conf.setLong(KEY.OPTION_REGIONROWNUM, this.rowNum / this.regionNumber);

        this.conf.setBoolean(KEY.OPTION_USEBULKLOAD, useBulkLoad);

        System.out.println("cfnum = " + this.cfNum);
        System.out.println("colnum = " + this.colNum);
        System.out.println("colsFormat = " + colsFormat);
        System.out.println("rownum = " + this.rowNum);
        System.out.println("baseRowNumber = " + this.baseRowNumber);
        System.out.println("tablename = " + this.tableName);
        System.out.println("Presplit Region number = " + this.regionNumber);
        System.out.println("row per region = " + this.rowNum / this.regionNumber);
        System.out.println("Also create dot table = " + this.createDotTable);
        System.out.println("Data Block Encoding = " + this.encoding);
        System.out.println("Compression = " + this.compression);
        System.out.println("Use BulkLoad = " + this.useBulkLoad);
        return errCode;
    }

    /**
     * @param args
     * @throws Exception 
     */
    public static void main(final String[] args) throws Exception {
        GenerateTestTable gt = new GenerateTestTable();
        gt.init();
        if (gt.parseCommandLine(args) != 0) {
            System.err.println("fail to parse cmdline");
            return;
        }
        gt.createTable();

        if (gt.rowNum == 0) {
            System.out.println("rowNum=0, only create table");
            return;
        }

        gt.doMapReduce(RegionWriteInputFormat.class, GenerateRegionDataTask.class, gt.tableName, gt.cfPrefix,
                gt.colPrefix);

        if (gt.createDotTable) {
            gt.doMapReduce(RegionWriteInputFormat.class, GenerateRegionDataTask.class, gt.dotTableName, gt.cfPrefix,
                    gt.dotColPrefix);
        }

        if (gt.useBulkLoad) {
            System.out.println("######## bulkloading table... ###########");
            gt.doBulkLoad();
        } else {
            System.out.println("######## Major compacting table... ###########");
            gt.doMajorCompact();
        }
    }

    /**
     * for test usage.
     * @param args
     * @throws Exception
     */
    public static void testmain(final String[] args, Configuration conf) throws Exception {
        GenerateTestTable gt = new GenerateTestTable();
        gt.conf = conf;
        gt.admin = new HBaseAdmin(gt.conf);
        if (gt.parseCommandLine(args) != 0) {
            System.err.println("fail to parse cmdline");
            return;
        }
        gt.createTable();

        if (gt.rowNum == 0) {
            System.out.println("rowNum=0, only create table");
            return;
        }

        gt.doMapReduce(RegionWriteInputFormat.class, GenerateRegionDataTask.class, gt.tableName, gt.cfPrefix,
                gt.colPrefix);

        if (gt.createDotTable) {
            gt.doMapReduce(RegionWriteInputFormat.class, GenerateRegionDataTask.class, gt.dotTableName, gt.cfPrefix,
                    gt.colPrefix);
        }

        if (gt.useBulkLoad) {
            System.out.println("######## bulkloading table... ###########");
            gt.doBulkLoad();
        } else {
            System.out.println("######## Major compacting table... ###########");
            gt.doMajorCompact();
        }
    }
}