Example usage for org.apache.hadoop.mapreduce Job waitForCompletion

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job waitForCompletion.

Prototype

public boolean waitForCompletion(boolean verbose)
        throws IOException, InterruptedException, ClassNotFoundException

Source Link

Document

Submit the job to the cluster and wait for it to finish.

Usage

From source file:com.benchmark.mapred.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "wordcount");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {/*  ww w. j  a va2  s  .  com*/
            if ("-r".equals(args[i])) {
                job.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            System.err.println("Usage: wordcount <numReduces> <in> <out>");
            System.exit(2);
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            System.err.println("Usage: wordcount <numReduces> <in> <out>");
            System.exit(2);
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        System.err.println("Usage: wordcount <numReduces> <in> <out>");
        System.exit(2);
    }

    FileInputFormat.addInputPath(job, new Path(other_args.get(0)));
    FileOutputFormat.setOutputPath(job, new Path(other_args.get(1)));
    Date startIteration = new Date();
    Boolean waitforCompletion = job.waitForCompletion(true);
    Date endIteration = new Date();
    System.out.println(
            "The iteration took " + (endIteration.getTime() - startIteration.getTime()) / 1000 + " seconds.");
    System.exit(waitforCompletion ? 0 : 1);
}

From source file:com.bigdog.hadoop.mapreduce.combine.WordCountCombineApp.java

public void combine() throws Exception {
    Configuration conf = new Configuration();
    final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf);
    final Path outPath = new Path(OUT_PATH);
    if (fileSystem.exists(outPath)) {
        fileSystem.delete(outPath, true);
    }/*from   w  w w .  j a  v  a 2  s . c  om*/

    final Job job = new Job(conf, WordCountCombineApp.class.getSimpleName());
    //1.1??
    FileInputFormat.setInputPaths(job, INPUT_PATH);
    //????
    //job.setInputFormatClass(TextInputFormat.class);

    //1.2 map
    job.setMapperClass(MyMapper.class);
    //map<k,v><k3,v3><k2,v2>??
    //job.setMapOutputKeyClass(Text.class);
    //job.setMapOutputValueClass(LongWritable.class);

    //1.3 
    //job.setPartitionerClass(HashPartitioner.class);
    //reduce?
    //job.setNumReduceTasks(1);

    //1.4 TODO ??

    //1.5 
    job.setCombinerClass(MyCombiner.class);

    //2.2 reduce
    job.setReducerClass(MyReducer.class);
    //reduce
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    //2.3 
    FileOutputFormat.setOutputPath(job, outPath);
    //?
    //job.setOutputFormatClass(TextOutputFormat.class);

    //job??JobTracker?
    job.waitForCompletion(true);
}

From source file:com.bigdog.hadoop.mapreduce.counter.WordCountCounterApp.java

public void CustomerCounter() throws Exception {
    Configuration conf = new Configuration();
    final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf);
    final Path outPath = new Path(OUT_PATH);
    if (fileSystem.exists(outPath)) {
        fileSystem.delete(outPath, true);
    }//from   w  ww  .j  a v  a2s  . c  o  m

    final Job job = new Job(conf, WordCountCounterApp.class.getSimpleName());
    //1.1??
    FileInputFormat.setInputPaths(job, INPUT_PATH);
    //????
    //job.setInputFormatClass(TextInputFormat.class);

    //1.2 map
    job.setMapperClass(MyMapper.class);
    //map<k,v><k3,v3><k2,v2>??
    //job.setMapOutputKeyClass(Text.class);
    //job.setMapOutputValueClass(LongWritable.class);

    //1.3 
    //job.setPartitionerClass(HashPartitioner.class);
    //reduce?
    //job.setNumReduceTasks(1);

    //1.4 TODO ??

    //1.5 TODO 

    //2.2 reduce
    job.setReducerClass(MyReducer.class);
    //reduce
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    //2.3 
    FileOutputFormat.setOutputPath(job, outPath);
    //?
    //job.setOutputFormatClass(TextOutputFormat.class);

    //job??JobTracker?
    job.waitForCompletion(true);
}

From source file:com.bigdog.hadoop.mapreduce.group.GroupApp.java

public void group() throws Exception {
    final Configuration configuration = new Configuration();

    final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), configuration);
    if (fileSystem.exists(new Path(OUT_PATH))) {
        fileSystem.delete(new Path(OUT_PATH), true);
    }//from  w ww . j  a v a2 s  .co  m

    final Job job = new Job(configuration, GroupApp.class.getSimpleName());

    //1.1 
    FileInputFormat.setInputPaths(job, INPUT_PATH);
    //??
    job.setInputFormatClass(TextInputFormat.class);

    //1.2Mapper
    job.setMapperClass(MyMapper.class);
    //<k2,v2>
    job.setMapOutputKeyClass(NewK2.class);
    job.setMapOutputValueClass(LongWritable.class);

    //1.3 
    job.setPartitionerClass(HashPartitioner.class);
    job.setNumReduceTasks(1);

    //1.4 TODO ??
    job.setGroupingComparatorClass(MyGroupingComparator.class);
    //1.5  TODO ??

    //2.2 reduce
    job.setReducerClass(MyReducer.class);
    //<k3,v3>
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(LongWritable.class);

    //2.3 
    FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));
    //?
    job.setOutputFormatClass(TextOutputFormat.class);

    //???JobTracker
    job.waitForCompletion(true);
}

From source file:com.bigdog.hadoop.mapreduce.partition.KpiApp.java

public void kpi() throws Exception {
    final Job job = new Job(new Configuration(), KpiApp.class.getSimpleName());

    job.setJarByClass(KpiApp.class);

    //1.1 /*from  w  w  w  .  j  av  a 2 s.c  om*/
    FileInputFormat.setInputPaths(job, INPUT_PATH);
    //??
    job.setInputFormatClass(TextInputFormat.class);

    //1.2Mapper
    job.setMapperClass(MyMapper.class);
    //<k2,v2>
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(KpiWritable.class);

    //1.3 
    job.setPartitionerClass(KpiPartitioner.class);
    job.setNumReduceTasks(2);

    //1.4 TODO ??
    //1.5  TODO ??
    //2.2 reduce
    job.setReducerClass(MyReducer.class);
    //<k3,v3>
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(KpiWritable.class);

    //2.3 
    FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));
    //?
    job.setOutputFormatClass(TextOutputFormat.class);

    //???JobTracker
    job.waitForCompletion(true);
}

From source file:com.bigfishgames.biginsights.upsight.mapreduce.MapReduceAvroWordCount.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: AvroWordCount <input path> <output path>");
        return -1;
    }//  ww w  .  j  av  a 2  s.  co m

    Job job = new Job(getConf());
    job.setJarByClass(MapReduceAvroWordCount.class);
    job.setJobName("wordcount");

    // We call setOutputSchema first so we can override the configuration
    // parameters it sets
    // AvroJob.setOutputKeySchema(job,
    //                         Pair.getPairSchema(Schema.create(Type.STRING),
    //                                           Schema.create(Type.NULL)));
    AvroJob.setOutputKeySchema(job, Event.getClassSchema());

    job.setOutputValueClass(NullWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(MyAvroKeyOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setSortComparatorClass(Text.Comparator.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);

    return 0;
}

From source file:com.binbo.wordcount.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from   w w w . j a  v  a2s  .  c o  m*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class); // Set the combiner
    job.setPartitionerClass(WordPartitioner.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.bizosys.hsearch.kv.indexer.KVIndexer.java

License:Apache License

/**
 * Given a indexing parameters it starts a indexing.
 * Different indexing type are://from   w ww  .j  a  va 2s  .c o m
 * SF2HB = Simple File(csv,tsv) to hbase directly.
 * SF2HF = Simple File(csv,tsv) to HFile, which can be loaded to Hbase using LoadIncrementalHfiles. class from hbase.
 * SF2MF = Simple File(csv,tsv) to MapFile (key as {@link Text} and value as {@link BytesWritable})
 * MF2HB = Map File(key and value as csv,tsv) to hbase.
 * MF2HF = Map File(key and value as csv,tsv) to HFile, which can be loaded to Hbase using LoadIncrementalHfiles. class from hbase.
 * MF2MF = Map File(key and value as csv,tsv) to MapFile(key as {@link Text} and value as {@link BytesWritable})
 * HB2HB = Hbase to Hbase
 * HB2HF = Hbase to HFile which can be loaded to Hbase using LoadIncrementalHfiles. class from hbase.
 * HB2MF = Hbase to MapFile(key as {@link Text} and value as {@link BytesWritable})
 * @param args
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public void execute(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    if (args.length < 7) {
        String err = "Usage : " + KVIndexer.class
                + " <<Job Type(SF2HB|SF2HF|SF2MF...)>> <<Input Source>> <<Output Sink>> <<XML File Configuration>> <<Skip Header(true|false)>> <<Run KeyGeneration Job>> <<Number Of reducer>> <<Speculative Execution>> <<scanner-cache-size>> <<filter>>";
        IdSearchLog.l.fatal(err);
        System.exit(1);
    }

    String msg = this.getClass().getName() + " > Initializing indexer job.";
    IdSearchLog.l.info(msg);

    int seq = 0;
    int len = args.length;

    String jobType = (len > seq) ? args[seq++] : "";
    String inputSource = (len > seq) ? args[seq++] : "";
    String outputSink = (len > seq) ? args[seq++] : "/tmp/hsearch-index";
    String xmlFilePath = (len > seq) ? args[seq++] : "";
    String skipHeader = (len > seq) ? args[seq++] : "false";
    boolean runKeyGenJob = (len > seq) ? args[seq++].trim().equalsIgnoreCase("true") : false;
    int numberOfReducer = (len > seq) ? Integer.parseInt(args[seq++].trim()) : 1;
    boolean speculativeExecution = (len > seq) ? args[seq++].trim().equalsIgnoreCase("true") : true;
    int scannerCacheSize = (len > seq) ? Integer.parseInt(args[seq++].trim()) : 300;
    String filter = (len > seq) ? args[seq++] : "";

    if (isEmpty(jobType)) {
        String err = this.getClass().getName()
                + " > Please enter Job type as one of these :\n SF2HB|SF2HF|SF2MF|MF2HB|MF2HF|MF2MF|HB2HB|HB2HF|HB2MF|IMF2HF";
        System.err.println(err);
        throw new IOException(err);
    }

    if (isEmpty(inputSource)) {
        String err = this.getClass().getName() + " > Please enter input file path.";
        System.err.println(err);
        throw new IOException(err);
    }

    Configuration conf = HBaseConfiguration.create();

    FieldMapping fm = createFieldMapping(conf, xmlFilePath, new StringBuilder());
    outputSink = outputSink.charAt(outputSink.length() - 1) == '/' ? outputSink : outputSink + "/";
    outputSink = outputSink + fm.tableName;

    createHBaseTable(fm);

    KVIndexer.FAM_NAME = fm.familyName.getBytes();
    KVIndexer.FIELD_SEPARATOR = fm.fieldSeparator;

    conf.set(XML_FILE_PATH, xmlFilePath);
    conf.set(OUTPUT_FOLDER, outputSink);
    conf.set(SKIP_HEADER, skipHeader);
    conf.set(RAW_FILE_SEPATATOR, String.valueOf(fm.fieldSeparator));

    Job job = Job.getInstance(conf, "com.bizosys.hsearch.kv.indexing.KVIndexer type : " + jobType + "\n"
            + inputSource + "\n" + outputSink);
    job.setJarByClass(this.getClass());
    job.setNumReduceTasks(numberOfReducer);

    Integer jobTypeI = JobTypeMapping.get(jobType);
    if (jobTypeI == null)
        throw new IOException("Invalid Jobtype " + jobType);

    /**
     *  if internal keyIndex is given then generate the keys first and then do indexing 
     *  else just run indexer by creating keys from hbase 
     */
    boolean keyGenjobStatus = false;
    if (-1 != fm.internalKey && runKeyGenJob) {

        Configuration keyGenConf = HBaseConfiguration.create();
        keyGenConf.set(INPUT_SOURCE, inputSource);
        keyGenConf.set(XML_FILE_PATH, xmlFilePath);
        keyGenConf.set(OUTPUT_FOLDER, outputSink);
        keyGenConf.set(SKIP_HEADER, skipHeader);

        Job keyGenJob = Job.getInstance(keyGenConf, "Creating Keys KVKeyGenerator for " + inputSource);

        switch (jobTypeI) {
        case SF2HB:
        case SF2HF:
        case SF2MF: {

            FileInputFormat.addInputPath(keyGenJob, new Path(inputSource));

            keyGenJob.setMapperClass(KVKeyGeneratorMapperFile.class);
            keyGenJob.setInputFormatClass(TextInputFormat.class);
            keyGenJob.setMapOutputKeyClass(Text.class);
            keyGenJob.setMapOutputValueClass(Text.class);

            keyGenJob.setReducerClass(KVKeyGeneratorReducerFile.class);
            keyGenJob.setNumReduceTasks(numberOfReducer);
            keyGenJob.setOutputKeyClass(NullWritable.class);
            keyGenJob.setOutputValueClass(Text.class);

            inputSource = outputSink + "_" + INPUTWITH_KEY;
            Path intermediatePath = new Path(inputSource);
            System.out.println("Final input path " + inputSource);
            FileOutputFormat.setOutputPath(keyGenJob, intermediatePath);

            keyGenjobStatus = keyGenJob.waitForCompletion(true);
            if (!keyGenjobStatus) {
                throw new IOException("Error in running Job for Key Generation");
            }

            break;
        }
        case HB2HB:
        case HB2HF:
        case HB2MF: {

            Scan scan = new Scan();
            scan.setCaching(scannerCacheSize);
            scan.setCacheBlocks(false);

            byte[] family = fm.familyName.getBytes();
            for (String name : fm.nameWithField.keySet()) {

                Field fld = fm.nameWithField.get(name);
                if (!fld.isMergedKey)
                    continue;
                scan.addColumn(family, fld.sourceName.trim().getBytes());
            }

            TableMapReduceUtil.initTableMapperJob(inputSource, // input table
                    scan, // Scan instance to control CF and attribute selection
                    KVKeyGeneratorMapperHBase.class, // mapper class
                    Text.class, // mapper output key
                    ImmutableBytesWritable.class, // mapper output value
                    keyGenJob);

            TableMapReduceUtil.initTableReducerJob(inputSource, // output table
                    KVKeyGeneratorReducerHBase.class, // reducer class
                    keyGenJob);

            keyGenjobStatus = keyGenJob.waitForCompletion(true);
            if (!keyGenjobStatus) {
                throw new IOException("Error in running Job for Key Generation");
            }
            break;
        }
        default:
            break;
        }
    }
    /*
     * Run job based on job type eg. SF2HB,SF2MF,SF2HF etc.
     */
    System.out.println("Sending path " + inputSource);
    runJob(jobTypeI, job, fm, inputSource, outputSink, scannerCacheSize, filter);
}

From source file:com.bizosys.hsearch.kv.indexer.KVIndexer.java

License:Apache License

private static int runJob(int jobTypeI, Job job, FieldMapping fm, String input, String output,
        int scannerCacheSize, String filter) throws IOException, InterruptedException, ClassNotFoundException {

    int jobStatus = -1;

    switch (jobTypeI) {
    case SF2HB: {

        IdSearchLog.l.info("Starting Job for SF2HB input field separator " + KVIndexer.FIELD_SEPARATOR
                + " using hbase table : " + fm.tableName + " and output folder " + output);

        FileInputFormat.addInputPath(job, new Path(input));

        job.setMapperClass(KVMapperFile.class);
        job.setInputFormatClass(TextInputFormat.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(BytesWritable.class);

        job.setReducerClass(KVReducerHBase.class);
        TableMapReduceUtil.initTableReducerJob(fm.tableName, KVReducerHBase.class, job);
        jobStatus = job.waitForCompletion(true) ? 0 : 1;
        return jobStatus;
    }/*from w  w w.  j  a v a  2 s  .c om*/
    case SF2MF: {

        IdSearchLog.l.info("Starting Job for SF2MF input field separator " + KVIndexer.FIELD_SEPARATOR
                + " using hbase table : " + fm.tableName + " and output folder " + output);

        FileInputFormat.addInputPath(job, new Path(input));

        job.setMapperClass(KVMapperFile.class);
        job.setInputFormatClass(TextInputFormat.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(BytesWritable.class);

        job.setReducerClass(KVReducerMapFile.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(Text.class);
        LazyOutputFormat.setOutputFormatClass(job, NullOutputFormat.class);

        jobStatus = job.waitForCompletion(true) ? 0 : 1;
        return jobStatus;

    }
    case SF2HF: {

        /*
         * First creates map file and then convert to hfile.
         * create intermediate dir for map file output
         * 
         */

        String intermediateFolder = output + "_intermediate";
        Path intermediateOutpurDir = new Path(intermediateFolder);

        IdSearchLog.l.info("Starting Job for SF2HF input field separator " + KVIndexer.FIELD_SEPARATOR
                + " using hbase table : " + fm.tableName + " and intremediate output folder "
                + intermediateFolder + " final output dir " + output);

        //reset the output folder to intermediate folder
        Configuration conf = job.getConfiguration();
        conf.set(OUTPUT_FOLDER, intermediateFolder);
        int jobT = JobTypeMapping.get("SF2MF");
        jobStatus = runJob(jobT, job, fm, input, intermediateFolder, scannerCacheSize, filter);

        if (jobStatus == 0) {

            Configuration hfileConf = HBaseConfiguration.create();
            hfileConf.set(XML_FILE_PATH, conf.get(XML_FILE_PATH));
            Job hfileJob = Job.getInstance(hfileConf, "Creating Hfile");
            String dataInputPath = intermediateFolder + "/" + MapFile.DATA_FILE_NAME;
            jobT = JobTypeMapping.get("IMF2HF");
            jobStatus = runJob(jobT, hfileJob, fm, dataInputPath, output, scannerCacheSize, filter);
        }

        //delete intermediate dir
        FileSystem.get(conf).delete(intermediateOutpurDir, true);
        //delete the empty _SUCCESS folder
        FileSystem.get(conf).delete(new Path(output, "_SUCCESS"), true);

        return jobStatus;
    }
    case HB2HB: {

        if (fm.tableName.equals(input)) {
            throw new IOException("Input table and index table can not be same");
        }

        Scan scan = new Scan();
        scan.setCaching(scannerCacheSize);
        scan.setCacheBlocks(false);
        scan.addFamily(fm.familyName.getBytes());
        if (null != filter) {
            if (filter.trim().length() > 0) {
                int index = filter.indexOf('=');
                scan.setFilter(new SingleColumnValueFilter(fm.familyName.getBytes(),
                        filter.substring(0, index).getBytes(), CompareOp.EQUAL,
                        filter.substring(index + 1).getBytes()));
            }
        }

        TableMapReduceUtil.initTableMapperJob(input, // input table
                scan, // Scan instance to control CF and attribute selection
                KVMapperHBase.class, // mapper class
                Text.class, // mapper output key
                BytesWritable.class, // mapper output value
                job);

        TableMapReduceUtil.initTableReducerJob(fm.tableName, // output table
                KVReducerHBase.class, // reducer class
                job);

        jobStatus = job.waitForCompletion(true) ? 0 : 1;
        return jobStatus;

    }
    case HB2HF: {

        String intermediateFolder = output + "_intermediate";
        Path intermediateOutpurDir = new Path(intermediateFolder);

        IdSearchLog.l.info("Starting Job for HB2HF input field separator " + KVIndexer.FIELD_SEPARATOR
                + " using hbase table : " + fm.tableName + " and intremediate output folder "
                + intermediateFolder + " final output dir " + output);

        //reset the output folder to intermediate folder
        Configuration conf = job.getConfiguration();
        conf.set(OUTPUT_FOLDER, intermediateFolder);
        int jobT = JobTypeMapping.get("HB2MF");
        jobStatus = runJob(jobT, job, fm, input, intermediateFolder, scannerCacheSize, filter);

        if (jobStatus == 0) {

            Configuration hfileConf = HBaseConfiguration.create();
            hfileConf.set(XML_FILE_PATH, conf.get(XML_FILE_PATH));
            Job hfileJob = Job.getInstance(hfileConf, "Creating Hfile");
            String dataInputPath = intermediateFolder + "/" + MapFile.DATA_FILE_NAME;
            jobT = JobTypeMapping.get("IMF2HF");
            jobStatus = runJob(jobT, hfileJob, fm, dataInputPath, output, scannerCacheSize, filter);
        }

        //delete intermediate dir
        FileSystem.get(conf).delete(intermediateOutpurDir, true);
        //delete the empty _SUCCESS folder
        FileSystem.get(conf).delete(new Path(output, "_SUCCESS"), true);

        return jobStatus;
    }
    case HB2MF: {

        if (fm.tableName.equals(input)) {
            throw new IOException("Input table and index table can not be same");
        }

        Scan scan = new Scan();
        scan.setCaching(scannerCacheSize);
        scan.setCacheBlocks(false);
        scan.addFamily(fm.familyName.getBytes());

        if (null != filter) {
            if (filter.trim().length() > 0) {
                int index = filter.indexOf('=');
                scan.setFilter(new SingleColumnValueFilter(fm.familyName.getBytes(),
                        filter.substring(0, index).getBytes(), CompareOp.EQUAL,
                        filter.substring(index + 1).getBytes()));
            }
        }

        TableMapReduceUtil.initTableMapperJob(input, // input table
                scan, // Scan instance to control CF and attribute selection
                KVMapperHBase.class, // mapper class
                Text.class, // mapper output key
                BytesWritable.class, // mapper output value
                job);

        job.setReducerClass(KVReducerMapFile.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(Text.class);
        LazyOutputFormat.setOutputFormatClass(job, NullOutputFormat.class);

        jobStatus = job.waitForCompletion(true) ? 0 : 1;
        return jobStatus;
    }
    case IMF2HF: {

        Path finalOutputDir = new Path(output);
        job.setJarByClass(KVIndexer.class);
        job.setMapperClass(KVMapperHFile.class);

        job.setInputFormatClass(SequenceFileInputFormat.class);
        SequenceFileInputFormat.addInputPath(job, new Path(input));
        FileOutputFormat.setOutputPath(job, finalOutputDir);

        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(KeyValue.class);

        HTable hTable = new HTable(job.getConfiguration(), fm.tableName);
        HFileOutputFormat.configureIncrementalLoad(job, hTable);

        jobStatus = job.waitForCompletion(true) ? 0 : 1;
        return jobStatus;
    }

    default:
        throw new IOException("Invalid Jobtype " + jobTypeI);
    }
}

From source file:com.bizosys.hsearch.kv.indexing.KVIndexer.java

License:Apache License

/**
 * Given a indexing parameters it starts a indexing.
 * Different indexing type are:/*from ww  w.j av  a2 s  .  com*/
 * SF2HB = Simple File(csv,tsv) to hbase directly.
 * SF2HF = Simple File(csv,tsv) to HFile, which can be loaded to Hbase using LoadIncrementalHfiles. class from hbase.
 * SF2MF = Simple File(csv,tsv) to MapFile (key as {@link Text} and value as {@link BytesWritable})
 * MF2HB = Map File(key and value as csv,tsv) to hbase.
 * MF2HF = Map File(key and value as csv,tsv) to HFile, which can be loaded to Hbase using LoadIncrementalHfiles. class from hbase.
 * MF2MF = Map File(key and value as csv,tsv) to MapFile(key as {@link Text} and value as {@link BytesWritable})
 * HB2HB = Hbase to Hbase
 * HB2HF = Hbase to HFile which can be loaded to Hbase using LoadIncrementalHfiles. class from hbase.
 * HB2MF = Hbase to MapFile(key as {@link Text} and value as {@link BytesWritable})
 * @param args
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public void execute(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    if (args.length < 7) {
        String err = "Usage : " + KVIndexer.class
                + " <<Job Type(SF2HB|SF2HF|SF2MF...)>> <<Input Source>> <<Output Sink>> <<XML File Configuration>> <<Skip Header(true|false)>> <<Run KeyGeneration Job>> <<Number Of reducer>> <<Speculative Execution>> <<scanner-cache-size>> <<filter>>";
        IdSearchLog.l.fatal(err);
        System.exit(1);
    }

    String msg = this.getClass().getName() + " > Initializing indexer job.";
    IdSearchLog.l.info(msg);

    int seq = 0;
    int len = args.length;

    String jobType = (len > seq) ? args[seq++] : "";
    String inputSource = (len > seq) ? args[seq++] : "";
    String outputSink = (len > seq) ? args[seq++] : "/tmp/hsearch-index";
    String xmlFilePath = (len > seq) ? args[seq++] : "";
    String skipHeader = (len > seq) ? args[seq++] : "false";
    boolean runKeyGenJob = (len > seq) ? args[seq++].trim().equalsIgnoreCase("true") : false;
    int numberOfReducer = (len > seq) ? Integer.parseInt(args[seq++].trim()) : 1;
    boolean speculativeExecution = (len > seq) ? args[seq++].trim().equalsIgnoreCase("true") : true;
    int scannerCacheSize = (len > seq) ? Integer.parseInt(args[seq++].trim()) : 300;
    String filter = (len > seq) ? args[seq++] : "";

    if (isEmpty(jobType)) {
        String err = this.getClass().getName()
                + " > Please enter Job type as one of these :\n SF2HB|SF2HF|SF2MF|MF2HB|MF2HF|MF2MF|HB2HB|HB2HF|HB2MF|IMF2HF";
        System.err.println(err);
        throw new IOException(err);
    }

    if (isEmpty(inputSource)) {
        String err = this.getClass().getName() + " > Please enter input file path.";
        System.err.println(err);
        throw new IOException(err);
    }

    Configuration conf = HBaseConfiguration.create();

    FieldMapping fm = createFieldMapping(conf, xmlFilePath, new StringBuilder());
    outputSink = outputSink.charAt(outputSink.length() - 1) == '/' ? outputSink : outputSink + "/";
    outputSink = outputSink + fm.tableName;

    createHBaseTable(fm);

    KVIndexer.FAM_NAME = fm.familyName.getBytes();
    KVIndexer.FIELD_SEPARATOR = fm.fieldSeparator;

    conf.set(XML_FILE_PATH, xmlFilePath);
    conf.set(OUTPUT_FOLDER, outputSink);
    conf.set(SKIP_HEADER, skipHeader);
    conf.setBoolean("mapreduce.map.speculative", speculativeExecution);

    Job job = Job.getInstance(conf, "com.bizosys.hsearch.kv.indexing.KVIndexer type : " + jobType + "\n"
            + inputSource + "\n" + outputSink);
    job.setJarByClass(this.getClass());
    job.setNumReduceTasks(numberOfReducer);

    Integer jobTypeI = JobTypeMapping.get(jobType);
    if (jobTypeI == null)
        throw new IOException("Invalid Jobtype " + jobType);

    /**
     *  if internal keyIndex is given then generate the keys first and then do indexing 
     *  else just run indexer by creating keys from hbase 
     */
    boolean keyGenjobStatus = false;
    if (-1 != fm.internalKey && runKeyGenJob) {

        Configuration keyGenConf = HBaseConfiguration.create();
        keyGenConf.set(INPUT_SOURCE, inputSource);
        keyGenConf.set(XML_FILE_PATH, xmlFilePath);
        keyGenConf.set(OUTPUT_FOLDER, outputSink);
        keyGenConf.set(SKIP_HEADER, skipHeader);

        Job keyGenJob = Job.getInstance(keyGenConf, "Creating Keys KVKeyGenerator for " + inputSource);

        switch (jobTypeI) {
        case SF2HB:
        case SF2HF:
        case SF2MF: {

            FileInputFormat.addInputPath(keyGenJob, new Path(inputSource));

            keyGenJob.setMapperClass(KVKeyGeneratorMapperFile.class);
            keyGenJob.setInputFormatClass(TextInputFormat.class);
            keyGenJob.setMapOutputKeyClass(Text.class);
            keyGenJob.setMapOutputValueClass(Text.class);

            keyGenJob.setReducerClass(KVKeyGeneratorReducerFile.class);
            keyGenJob.setNumReduceTasks(numberOfReducer);
            keyGenJob.setOutputKeyClass(NullWritable.class);
            keyGenJob.setOutputValueClass(Text.class);

            inputSource = outputSink + "_" + INPUTWITH_KEY;
            Path intermediatePath = new Path(inputSource);
            System.out.println("Final input path " + inputSource);
            FileOutputFormat.setOutputPath(keyGenJob, intermediatePath);

            keyGenjobStatus = keyGenJob.waitForCompletion(true);
            if (!keyGenjobStatus) {
                throw new IOException("Error in running Job for Key Generation");
            }

            break;
        }
        case HB2HB:
        case HB2HF:
        case HB2MF: {

            Scan scan = new Scan();
            scan.setCaching(scannerCacheSize);
            scan.setCacheBlocks(false);

            // Added Filter
            if (null != filter) {
                if (filter.trim().length() > 0) {
                    int index = filter.indexOf('=');
                    scan.setFilter(new SingleColumnValueFilter(fm.familyName.getBytes(),
                            filter.substring(0, index).getBytes(), CompareOp.EQUAL,
                            filter.substring(index + 1).getBytes()));
                }
            }

            byte[] family = fm.familyName.getBytes();
            for (String name : fm.nameWithField.keySet()) {

                Field fld = fm.nameWithField.get(name);
                if (!fld.isMergedKey)
                    continue;
                scan.addColumn(family, fld.sourceName.trim().getBytes());
            }

            TableMapReduceUtil.initTableMapperJob(inputSource, // input table
                    scan, // Scan instance to control CF and attribute selection
                    KVKeyGeneratorMapperHBase.class, // mapper class
                    Text.class, // mapper output key
                    ImmutableBytesWritable.class, // mapper output value
                    keyGenJob);

            TableMapReduceUtil.initTableReducerJob(inputSource, // output table
                    KVKeyGeneratorReducerHBase.class, // reducer class
                    keyGenJob);

            keyGenjobStatus = keyGenJob.waitForCompletion(true);
            if (!keyGenjobStatus) {
                throw new IOException("Error in running Job for Key Generation");
            }
            break;
        }
        case MF2HB:
        case MF2HF:
        case MF2MF: {
            break;
        }
        default:
            break;
        }
    }
    /*
     * Run job based on job type eg. SF2HB,SF2MF,SF2HF etc.
     */
    System.out.println("Sending path " + inputSource);
    runJob(jobTypeI, job, fm, inputSource, outputSink, scannerCacheSize, filter);
}