Java tutorial
// Copyright (C) 2011 Splunk Inc. // // Splunk Inc. licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.splunk.shuttl.integration.hadoop.hbase; import java.io.BufferedReader; import java.io.DataInputStream; import java.io.IOException; import java.io.InputStreamReader; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat; import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class JobRunner { private static HBaseAdmin admin; private static final String CONFIG_FILENAME = "csvmapper.filename"; private static final String CONFIG_OUTPUTPATH = "csvmapper.outputPath"; private static final String CONFIG_TABLENAME = "csvmapper.tableName"; public JobRunner() { } public static void main(String[] args) throws Exception { Job job = CSVJobFactory.getConfiguredJob(args); Configuration jobConfiguration = job.getConfiguration(); admin = new HBaseAdmin(jobConfiguration); JobRunner jobRunner = new JobRunner(); jobRunner.run(job); } /** * @param job * @param jobConfiguration * @param configuration * @throws Exception */ private boolean run(Job job) throws Exception { Configuration configuration = job.getConfiguration(); Path inputPath = new Path(configuration.get(CONFIG_FILENAME)); Path outputPath = new Path(configuration.get(CONFIG_OUTPUTPATH)); FileSystem fSystem = FileSystem.get(configuration); CreateHBaseTableIfNotExists(configuration.get(CONFIG_TABLENAME)); DeleteOutputPathIfExists(fSystem, outputPath); FileSystem fs = FileSystem.get(configuration); String headerString = readFirstLine(fs.open(new Path(""))); configuration.set(JobConfigurationConstants.HEADER_STRING, headerString); HTable hTable = new HTable(job.getConfiguration(), configuration.get(CONFIG_TABLENAME)); // Auto configure partitioner and reducer HFileOutputFormat.configureIncrementalLoad(job, hTable); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); boolean complete = job.waitForCompletion(true); if (complete) { LoadIncrementalHFiles loader = new LoadIncrementalHFiles(configuration); loader.doBulkLoad(outputPath, hTable); } fSystem.deleteOnExit(outputPath); return complete; } /** * @throws IOException * */ private void DeleteOutputPathIfExists(FileSystem fs, Path outputPath) throws IOException { if (fs.exists(outputPath)) fs.delete(outputPath, true); } /** * @throws IOException * */ private void CreateHBaseTableIfNotExists(String tableName) throws IOException { if (!admin.tableExists(tableName)) admin.createTable(new HTableDescriptor(tableName)); } private String readFirstLine(FSDataInputStream fsDataInputStream) throws IOException { DataInputStream in = new DataInputStream(fsDataInputStream); return new BufferedReader(new InputStreamReader(in)).readLine(); } }