com.splunk.shuttl.integration.hadoop.hbase.JobRunner.java Source code

Introduction

Here is the source code for com.splunk.shuttl.integration.hadoop.hbase.JobRunner.java
Source

// Copyright (C) 2011 Splunk Inc.
//
// Splunk Inc. licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.splunk.shuttl.integration.hadoop.hbase;

import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStreamReader;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat;
import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class JobRunner {
    private static HBaseAdmin admin;

    private static final String CONFIG_FILENAME = "csvmapper.filename";
    private static final String CONFIG_OUTPUTPATH = "csvmapper.outputPath";
    private static final String CONFIG_TABLENAME = "csvmapper.tableName";

    public JobRunner() {
    }

    public static void main(String[] args) throws Exception {

        Job job = CSVJobFactory.getConfiguredJob(args);

        Configuration jobConfiguration = job.getConfiguration();
        admin = new HBaseAdmin(jobConfiguration);

        JobRunner jobRunner = new JobRunner();
        jobRunner.run(job);
    }

    /**
     * @param job
     * @param jobConfiguration
     * @param configuration
     * @throws Exception
     */
    private boolean run(Job job) throws Exception {

        Configuration configuration = job.getConfiguration();

        Path inputPath = new Path(configuration.get(CONFIG_FILENAME));
        Path outputPath = new Path(configuration.get(CONFIG_OUTPUTPATH));

        FileSystem fSystem = FileSystem.get(configuration);

        CreateHBaseTableIfNotExists(configuration.get(CONFIG_TABLENAME));

        DeleteOutputPathIfExists(fSystem, outputPath);

        FileSystem fs = FileSystem.get(configuration);
        String headerString = readFirstLine(fs.open(new Path("")));
        configuration.set(JobConfigurationConstants.HEADER_STRING, headerString);

        HTable hTable = new HTable(job.getConfiguration(), configuration.get(CONFIG_TABLENAME));

        // Auto configure partitioner and reducer
        HFileOutputFormat.configureIncrementalLoad(job, hTable);

        FileInputFormat.addInputPath(job, inputPath);
        FileOutputFormat.setOutputPath(job, outputPath);

        boolean complete = job.waitForCompletion(true);

        if (complete) {
            LoadIncrementalHFiles loader = new LoadIncrementalHFiles(configuration);
            loader.doBulkLoad(outputPath, hTable);
        }

        fSystem.deleteOnExit(outputPath);

        return complete;
    }

    /**
     * @throws IOException
     * 
     */
    private void DeleteOutputPathIfExists(FileSystem fs, Path outputPath) throws IOException {
        if (fs.exists(outputPath))
            fs.delete(outputPath, true);
    }

    /**
     * @throws IOException
     * 
     */
    private void CreateHBaseTableIfNotExists(String tableName) throws IOException {
        if (!admin.tableExists(tableName))
            admin.createTable(new HTableDescriptor(tableName));
    }

    private String readFirstLine(FSDataInputStream fsDataInputStream) throws IOException {
        DataInputStream in = new DataInputStream(fsDataInputStream);
        return new BufferedReader(new InputStreamReader(in)).readLine();
    }

}