pl.edu.icm.coansys.richimporttsv.jobs.mapreduce.TestRichImportTsv.java Source code

Java tutorial

Introduction

Here is the source code for pl.edu.icm.coansys.richimporttsv.jobs.mapreduce.TestRichImportTsv.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with this
 * work for additional information regarding copyright ownership. The ASF
 * licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package pl.edu.icm.coansys.richimporttsv.jobs.mapreduce;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Assert;
import static org.junit.Assert.*;

import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.mapreduce.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;

public class TestRichImportTsv {

    private static final Log LOG = LogFactory.getLog(TestRichImportTsv.class);
    private static HBaseTestingUtility UTIL;
    // some prefefinied names
    final protected long TEST_ROW_COUNT = 100;
    final protected String S_ROW_PREFIX = "row";
    final protected String S_COLUMN_FAMILY = "cf";
    final protected String S_COLUMN_QUALIFIER = "cq";
    final protected String S_COLUMN_QUALIFIER2 = "cq2";
    final protected byte[] B_COLUMN_FAMILY = Bytes.toBytes(S_COLUMN_FAMILY);
    final protected byte[] B_COLUMN_QUALIFIER = Bytes.toBytes(S_COLUMN_QUALIFIER);
    final protected byte[] B_COLUMN_QUALIFIER2 = Bytes.toBytes(S_COLUMN_QUALIFIER2);
    final protected byte[] B_VALUE = Bytes.toBytes("value");

    private String getCurrentDateAppended(String name) {
        return name + "-" + new Date().getTime();
    }

    private void dropTable(String tableName) {
        try {
            UTIL.deleteTable(Bytes.toBytes(tableName));
        } catch (IOException ex) {
            LOG.info("Table can not be deleted: " + tableName + "\n" + ex.getLocalizedMessage());
        }
    }

    private HTable createAndPopulateDefaultTable(String tableName, long rowCount)
            throws IOException, InterruptedException {
        HTable htable = UTIL.createTable(Bytes.toBytes(tableName), B_COLUMN_FAMILY);
        List<Row> putList = new ArrayList<Row>();
        for (long i = 0; i < rowCount; ++i) {
            Put put = new Put(Bytes.toBytes(S_ROW_PREFIX + i));
            put.add(B_COLUMN_FAMILY, B_COLUMN_QUALIFIER, B_VALUE);
            putList.add(put);
        }
        htable.batch(putList);
        return htable;
    }

    @BeforeClass
    public static void beforeClass() throws Exception {

        Configuration conf = new Configuration();
        //        File workingDirectory = new File("./");
        //        System.setProperty("test.build.data", workingDirectory.getAbsolutePath());
        //        conf.set("test.build.data", new File(workingDirectory, "zookeeper").getAbsolutePath());
        //        conf.set("fs.default.name", "file:///");
        //        conf.set("zookeeper.session.timeout", "180000");
        conf.set("hbase.zookeeper.peerport", "2889");
        conf.set("hbase.zookeeper.property.clientPort", "2182");
        conf.set("hbase.master.port", "6001");
        conf.set("hbase.master.info.port", "6011");
        conf.set("hbase.regionserver.port", "6021");
        conf.set("hbase.regionserver.info.port", "6031");

        UTIL = new HBaseTestingUtility(conf);
        UTIL.startMiniCluster();
        UTIL.startMiniMapReduceCluster();
    }

    @AfterClass
    public static void afterClass() throws Exception {
        UTIL.shutdownMiniMapReduceCluster();
        UTIL.shutdownMiniCluster();
    }

    @Test(timeout = 1800000)
    public void testTableRichImportTsv() throws Exception {

        String tableInitName = getCurrentDateAppended("testTableRichImportTsv");
        String inputFileName = "InputFile.dat";

        String[] args = new String[] { "-Dimporttsv.record.separator=#", "-Dimporttsv.separator=$",
                "-Dimporttsv.columns=HBASE_ROW_KEY," + S_COLUMN_FAMILY + ":" + S_COLUMN_QUALIFIER, tableInitName,
                inputFileName };

        HTable htableImport = doMROnTableTest(inputFileName, S_COLUMN_FAMILY, tableInitName,
                "KEY1$VALUE\n1#KEY2$VALUE2#\nKEY3$VALUE3", args);

        Result key1 = htableImport.get(new Get(Bytes.toBytes("KEY1")));
        assertNotNull(key1);
        assertEquals("VALUE\n1", Bytes.toString(key1.getValue(B_COLUMN_FAMILY, B_COLUMN_QUALIFIER)));
        Result key3 = htableImport.get(new Get(Bytes.toBytes("\nKEY3")));
        assertNotNull(key3);
        assertEquals("VALUE3", Bytes.toString(key3.getValue(B_COLUMN_FAMILY, B_COLUMN_QUALIFIER)));

        dropTable(tableInitName);
    }

    @Test(timeout = 1800000)
    public void testMultiCharacterSeparatorsTableRichImportTsv() throws Exception {

        String tableInitName = getCurrentDateAppended("testTableRichImportTsv");
        String inputFileName = "InputFile.dat";

        String[] args = new String[] { "-Dimporttsv.record.separator=###", "-Dimporttsv.separator=$$$",
                "-Dimporttsv.columns=HBASE_ROW_KEY," + S_COLUMN_FAMILY + ":" + S_COLUMN_QUALIFIER, tableInitName,
                inputFileName };

        HTable htableImport = doMROnTableTest(inputFileName, S_COLUMN_FAMILY, tableInitName,
                "KEY1$$$VALUE\n1###KEY2$$$VALUE2###\nKEY3$$$VALUE3", args);

        Result key1 = htableImport.get(new Get(Bytes.toBytes("KEY1")));
        assertNotNull(key1);
        assertEquals("VALUE\n1", Bytes.toString(key1.getValue(B_COLUMN_FAMILY, B_COLUMN_QUALIFIER)));
        Result key3 = htableImport.get(new Get(Bytes.toBytes("\nKEY3")));
        assertNotNull(key3);
        assertEquals("VALUE3", Bytes.toString(key3.getValue(B_COLUMN_FAMILY, B_COLUMN_QUALIFIER)));

        dropTable(tableInitName);
    }

    @Test(timeout = 1800000)
    public void testMultiCharacterSeparatorsMultiColumnInputTableRichImportTsv() throws Exception {

        String tableInitName = getCurrentDateAppended("testTableRichImportTsv");
        String inputFileName = "InputFile.dat";

        String[] args = new String[] { "-Dimporttsv.record.separator=###", "-Dimporttsv.separator=$$$",
                "-Dimporttsv.columns=HBASE_ROW_KEY," + S_COLUMN_FAMILY + ":" + S_COLUMN_QUALIFIER + ","
                        + S_COLUMN_FAMILY + ":" + S_COLUMN_QUALIFIER2,
                tableInitName, inputFileName };

        HTable htableImport = doMROnTableTest(inputFileName, S_COLUMN_FAMILY, tableInitName,
                "KEY1$$$VALUEa$$$VALUEb\n###KEY2$$$VALUE2$$$VALUE2b###\nKEY3$$$VALUE3$$$VALUE3b", args);

        Result key1 = htableImport.get(new Get(Bytes.toBytes("KEY1")));
        assertNotNull(key1);
        assertEquals("VALUEa", Bytes.toString(key1.getValue(B_COLUMN_FAMILY, B_COLUMN_QUALIFIER)));
        assertEquals("VALUEb\n", Bytes.toString(key1.getValue(B_COLUMN_FAMILY, B_COLUMN_QUALIFIER2)));

        Result key2 = htableImport.get(new Get(Bytes.toBytes("KEY2")));
        assertNotNull(key2);
        assertEquals("VALUE2", Bytes.toString(key2.getValue(B_COLUMN_FAMILY, B_COLUMN_QUALIFIER)));
        assertEquals("VALUE2b", Bytes.toString(key2.getValue(B_COLUMN_FAMILY, B_COLUMN_QUALIFIER2)));

        Result key3 = htableImport.get(new Get(Bytes.toBytes("\nKEY3")));
        assertNotNull(key3);
        assertEquals("VALUE3", Bytes.toString(key3.getValue(B_COLUMN_FAMILY, B_COLUMN_QUALIFIER)));
        assertEquals("VALUE3b", Bytes.toString(key3.getValue(B_COLUMN_FAMILY, B_COLUMN_QUALIFIER2)));

        dropTable(tableInitName);
    }

    @Test(timeout = 1800000)
    public void testTextInputFormatTableRichImportTsv() throws Exception {

        String tableInitName = getCurrentDateAppended("testTableRichImportTsv");
        String inputFileName = "InputFile.dat";

        String[] args = new String[] {
                "-Dimporttsv.input.format.class=org.apache.hadoop.mapreduce.lib.input.TextInputFormat",
                "-Dimporttsv.separator=$",
                "-Dimporttsv.columns=HBASE_ROW_KEY," + S_COLUMN_FAMILY + ":" + S_COLUMN_QUALIFIER, tableInitName,
                inputFileName };

        HTable htableImport = doMROnTableTest(inputFileName, S_COLUMN_FAMILY, tableInitName,
                "KEY1$VALUE1\nKEY2$VALUE2\nKEY3$VALUE3", args);

        Result key1 = htableImport.get(new Get(Bytes.toBytes("KEY1")));
        assertNotNull(key1);
        assertEquals("VALUE1", Bytes.toString(key1.getValue(B_COLUMN_FAMILY, B_COLUMN_QUALIFIER)));
        Result key3 = htableImport.get(new Get(Bytes.toBytes("KEY3")));
        assertNotNull(key3);
        assertEquals("VALUE3", Bytes.toString(key3.getValue(B_COLUMN_FAMILY, B_COLUMN_QUALIFIER)));

        dropTable(tableInitName);
    }

    @Test
    public void testDirRichImportTsv() throws Exception {
        String tableInitName = getCurrentDateAppended("testDirRichImportTsv");
        String inputFileName = "InputFile.dat";
        String outputDirName = getCurrentDateAppended("richtsv-output");
        FileSystem dfs = UTIL.getDFSCluster().getFileSystem();

        Path qualifiedOutputDir = dfs.makeQualified(new Path(outputDirName));
        assertFalse(dfs.exists(qualifiedOutputDir));

        // Prepare the arguments required for the test.
        String[] args = new String[] { "-Dimporttsv.record.separator=#", "-Dimporttsv.separator=$",
                "-Dimporttsv.columns=HBASE_ROW_KEY," + S_COLUMN_FAMILY + ":" + S_COLUMN_QUALIFIER,
                "-Dimporttsv.bulk.output=" + outputDirName, tableInitName, inputFileName };

        doMROnTableTest(inputFileName, S_COLUMN_FAMILY, tableInitName, "KEY1$VALUE\n1#KEY2$VALUE2#\nKEY3$VALUE3",
                args);

        assertTrue(dfs.exists(qualifiedOutputDir));
    }

    private HTable doMROnTableTest(String inputFile, String family, String tableName, String line, String[] args)
            throws Exception {

        GenericOptionsParser opts = new GenericOptionsParser(UTIL.getConfiguration(), args);
        Configuration config = UTIL.getConfiguration();
        args = opts.getRemainingArgs();

        FileSystem fs = UTIL.getDFSCluster().getFileSystem();
        FSDataOutputStream op = fs.create(new Path(inputFile), true);
        op.write(line.getBytes(HConstants.UTF8_ENCODING));
        op.close();

        assertTrue(fs.exists(new Path(inputFile)));

        final byte[] FAM = Bytes.toBytes(family);
        final byte[] TAB = Bytes.toBytes(tableName);

        HTable htableImport = UTIL.createTable(TAB, FAM);
        assertEquals(0, UTIL.countRows(htableImport));

        Job job = RichImportTsv.createSubmittableJob(config, args);
        job.waitForCompletion(false);
        assertTrue(job.isSuccessful());
        return htableImport;
    }

    //@Test(timeout = 1800000)
    public void testRowCounter() throws Exception {
        String tableInitName = getCurrentDateAppended("testRowCounter");
        createAndPopulateDefaultTable(tableInitName, TEST_ROW_COUNT);

        Job job = RowCounter.createSubmittableJob(UTIL.getConfiguration(), new String[] { tableInitName });
        job.waitForCompletion(true);
        long count = job.getCounters()
                .findCounter("org.apache.hadoop.hbase.mapreduce.RowCounter$RowCounterMapper$Counters", "ROWS")
                .getValue();
        Assert.assertEquals(TEST_ROW_COUNT, count);

        dropTable(tableInitName);
    }

    //@Test(timeout = 1800000)
    public void testCopy() throws Exception {

        String tableInitName = getCurrentDateAppended("testCopy");
        createAndPopulateDefaultTable(tableInitName, TEST_ROW_COUNT);

        final String tableCopyName = tableInitName + "Copy";
        HTable htableCopy = UTIL.createTable(Bytes.toBytes(tableCopyName), B_COLUMN_FAMILY);

        Job job = CopyTable.createSubmittableJob(UTIL.getConfiguration(),
                new String[] { "--new.name=" + tableCopyName, tableInitName });
        job.waitForCompletion(true);
        Assert.assertEquals(TEST_ROW_COUNT, (long) UTIL.countRows(htableCopy));

        dropTable(tableInitName);
        dropTable(tableCopyName);
    }

    //@Test(timeout = 1800000)
    public void testExportImport() throws Exception {

        String tableInitName = getCurrentDateAppended("testExportImport");
        createAndPopulateDefaultTable(tableInitName, TEST_ROW_COUNT);

        FileSystem dfs = UTIL.getDFSCluster().getFileSystem();
        Path qualifiedTempDir = dfs.makeQualified(new Path("export-import-temp-dir"));
        Assert.assertFalse(dfs.exists(qualifiedTempDir));

        Job jobExport = Export.createSubmittableJob(UTIL.getConfiguration(),
                new String[] { tableInitName, qualifiedTempDir.toString() });
        jobExport.waitForCompletion(true);

        Assert.assertTrue(dfs.exists(qualifiedTempDir));

        final String tableImportName = tableInitName + "Import";
        HTable htableImport = UTIL.createTable(Bytes.toBytes(tableImportName), B_COLUMN_FAMILY);

        Job jobImport = Import.createSubmittableJob(UTIL.getConfiguration(),
                new String[] { tableImportName, qualifiedTempDir.toString() });
        jobImport.waitForCompletion(true);
        Assert.assertEquals(TEST_ROW_COUNT, (long) UTIL.countRows(htableImport));

        dropTable(tableInitName);
        dropTable(tableImportName);
    }
}