Example usage for org.apache.hadoop.mapreduce Job waitForCompletion

List of usage examples for org.apache.hadoop.mapreduce Job waitForCompletion

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job waitForCompletion.

Prototype

public boolean waitForCompletion(boolean verbose)
        throws IOException, InterruptedException, ClassNotFoundException 

Source Link

Document

Submit the job to the cluster and wait for it to finish.

Usage

From source file:clustering.similarity.PreDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.printf(//from  w w  w  .  j  a v a2s.  com
                "usage: %s inverted_index_result_dir output_dir"
                        + " [compress_or_not] [reducer_number] [deci_number]\n",
                this.getClass().getSimpleName());
        System.exit(1);
    }
    Configuration conf = getConf();

    conf = MapReduceUtils.initConf(conf);
    conf.set("mapreduce.reduce.speculative", "false");

    // TODO: 17-4-24 calculate split number from reducer number
    conf.setInt("split.num", 8);

    if (args.length > 3) {
        conf.setInt("reducer.num", Integer.valueOf(args[3]));
    } else {
        conf.setInt("reducer.num", 29);
    }
    if (args.length > 4) {
        conf.setInt("deci.number", Integer.valueOf(args[4]));
    } else {
        conf.setInt("deci.number", 3);
    }

    Job job = Job.getInstance(conf, "pre job");
    job.setJarByClass(PreDriver.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    job.setInputFormatClass(KeyValueTextInputFormat.class);

    job.setMapperClass(PreMapper.class);
    job.setMapOutputKeyClass(IntIntTupleWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setPartitionerClass(PrePartitioner.class);

    job.setNumReduceTasks(conf.getInt("reducer.num", 29));
    job.setReducerClass(PreReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // set default compression
    if (args.length > 2 && args[2].equals("0")) {
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
    } else {
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        SequenceFileOutputFormat.setCompressOutput(job, true);
        SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
        SequenceFileOutputFormat.setOutputCompressorClass(job, org.apache.hadoop.io.compress.GzipCodec.class);
        SequenceFileOutputFormat.setOutputPath(job, new Path(args[1]));
    }

    long starttime = System.currentTimeMillis();
    boolean complete = job.waitForCompletion(true);
    long endtime = System.currentTimeMillis();
    System.out.println("inverted similarity pre job finished in: " + (endtime - starttime) / 1000 + " seconds");

    return complete ? 0 : 1;
}

From source file:clustering.tf_idf.WorkflowDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.printf("usage: %s simhash_result_dir output_dir " + "[gname_weight]\n",
                getClass().getSimpleName());
        System.exit(1);//  www .j  a  v  a 2s  . c  o  m
    }

    String docCntDir = args[1] + "/docCnt";
    String step1_outputDir = args[1] + "/step1";
    String step2_outputDir = args[1] + "/step2";
    String step3_outputDir = args[1] + "/result";

    Configuration conf = getConf();
    conf = initConf(conf);

    JobControl jobControl = new JobControl("tf-idf jobs");

    /* pre step, count documents number in the corpus */
    DocCntDriver docCntDriver = new DocCntDriver();
    String[] preJobArgs = new String[2];
    preJobArgs[0] = args[0];
    preJobArgs[1] = docCntDir;

    Job preJob = docCntDriver.configJob(preJobArgs);

    ControlledJob controlledPreJob = new ControlledJob(conf);
    controlledPreJob.setJob(preJob);
    jobControl.addJob(controlledPreJob);

    /* step 1, calculate term count of each document */
    TermCntDriver termCntDriver = new TermCntDriver();
    String[] job1Args = new String[2];
    job1Args[0] = args[0];
    job1Args[1] = step1_outputDir;
    Job job1 = termCntDriver.configJob(job1Args);

    ControlledJob controlledJob1 = new ControlledJob(conf);
    controlledJob1.setJob(job1);
    jobControl.addJob(controlledJob1);

    /* step 2, calculate the term frequency of each document */
    TermFreqDriver termFreqDriver = new TermFreqDriver();

    String gnameWeight = args.length > 2 ? args[2] : "1.0";
    conf.setDouble("gname.weight", Double.valueOf(gnameWeight));

    String[] job2Args = args.length > 2 ? new String[3] : new String[2];
    job2Args[0] = step1_outputDir;
    job2Args[1] = step2_outputDir;
    if (args.length > 2) {
        job2Args[2] = args[2];
    }
    Job job2 = termFreqDriver.configJob(job2Args);

    ControlledJob controlledJob2 = new ControlledJob(conf);
    controlledJob2.setJob(job2);
    controlledJob2.addDependingJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    /* step 3, calculate tf_idf */
    TF_IDF_Driver tf_idf_driver = new TF_IDF_Driver();
    String[] job3Args = new String[3];
    job3Args[0] = docCntDir;
    job3Args[1] = step2_outputDir;
    job3Args[2] = step3_outputDir;
    Job job3 = tf_idf_driver.configJob(job3Args);

    ControlledJob controlledJob3 = new ControlledJob(conf);
    controlledJob3.setJob(job3);
    controlledJob3.addDependingJob(controlledJob2);
    controlledJob3.addDependingJob(controlledPreJob);

    jobControl.addJob(controlledJob3);

    // run jobs
    runJobs(jobControl);

    return job3.waitForCompletion(true) ? 0 : 1;
}

From source file:cn.chinahadoop.hive.mr.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);//from   ww  w.  j  av  a  2 s .c  om
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:cn.chinahadoop.practise.TestCombiner.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);//from www .j a v  a 2 s. c  o m
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(TestCombiner.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumCombiner.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:cn.edu.bjut.hadoop.WcAndIk.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);//from  w  w  w  .  j a v a 2 s  . c  o  m
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WcAndIk.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:cn.edu.hfut.dmic.webcollector.crawldb.DBReader.java

public static void main(String[] args) throws Exception {
    Path crawlPath = new Path("task2");
    Path currentPath = new Path(crawlPath, "crawldb/current");
    Path output = new Path("output");

    Configuration config = CrawlerConfiguration.create();
    FileSystem fs = FileSystem.get(config);

    if (fs.exists(output)) {
        fs.delete(output);//from ww w.ja v a2  s  . c  om
    }

    Job job = new Job(config);
    job.setJobName("dbreader " + crawlPath.toString());
    job.setMapperClass(DBReaderMapper.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, currentPath);
    FileOutputFormat.setOutputPath(job, output);

    job.waitForCompletion(true);

}

From source file:cn.edu.hfut.dmic.webcollector.crawldb.Generator.java

public static String generate(Path crawlPath, Configuration conf) throws Exception {
    SegmentUtil.initSegments(crawlPath, conf);
    String segmentName = SegmentUtil.createSegment(crawlPath, conf);

    Path currentPath = new Path(crawlPath, "crawldb/current");
    Path generatePath = new Path(crawlPath, "segments/" + segmentName + "/generate");

    Job job = new Job(conf);
    job.setJobName("generate " + crawlPath.toString());
    job.setJarByClass(Generator.class);

    job.setReducerClass(GeneratorReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(CrawlDatum.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(CrawlDatum.class);
    FileInputFormat.addInputPath(job, currentPath);
    FileOutputFormat.setOutputPath(job, generatePath);
    job.waitForCompletion(true);
    long count = job.getCounters().findCounter("generator", "count").getValue();
    System.out.println("total generate:" + count);
    if (count == 0) {
        return null;
    } else {/*from   w w w  .  j av  a2s. c o m*/
        return segmentName;
    }

}

From source file:cn.edu.hfut.dmic.webcollector.crawldb.Merge.java

public static void merge(Path crawlPath, Path[] mergePaths, Configuration conf, String jobName)
        throws Exception {

    Job job = new Job(conf);
    job.setJobName(jobName + "  " + crawlPath.toString());
    job.setJarByClass(Merge.class);
    // job.getConfiguration().set("mapred", "/home/hu/mygit/WebCollector2/WebCollectorCluster/target/WebCollectorCluster-2.0.jar");
    Path crawldbPath = new Path(crawlPath, "crawldb");
    Path newdb = new Path(crawldbPath, "new");
    Path currentdb = new Path(crawldbPath, "current");

    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(currentdb)) {
        FileInputFormat.addInputPath(job, currentdb);
    }/* www .j  a  va2  s . c  o  m*/

    if (fs.exists(newdb)) {
        fs.delete(newdb);
    }
    for (Path mergePath : mergePaths) {
        FileInputFormat.addInputPath(job, mergePath);
    }
    FileOutputFormat.setOutputPath(job, newdb);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(CrawlDatum.class);

    job.setMapperClass(MergeMap.class);
    job.setReducerClass(MergeReduce.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(CrawlDatum.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.waitForCompletion(true);

}

From source file:cn.edu.hfut.dmic.webcollector.fetcher.Fetcher.java

public static void fetch(Path crawlPath, String segmentName, Configuration conf) throws Exception {
    Path segmentPath = new Path(crawlPath, "segments/" + segmentName);
    Path generatePath = new Path(segmentPath, "generate");

    Job job = new Job(conf);
    job.setJobName("fetch " + crawlPath.toString());
    job.setJarByClass(Fetcher.class);

    job.setReducerClass(FetcherReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(FetcherOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(CrawlDatum.class);

    FileInputFormat.addInputPath(job, generatePath);
    FileOutputFormat.setOutputPath(job, segmentPath);

    job.waitForCompletion(true);
}

From source file:cn.edu.hfut.dmic.webcollectorcluster.fetcher.DbUpdater.java

@Override
public int run(String[] args) throws Exception {

    Path crawldb = new Path(args[0]);
    Path segmentPath = new Path(args[1]);
    Job job = Merge.createJob(CrawlerConfiguration.create(), crawldb);
    job.setJarByClass(DbUpdater.class);
    org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, new Path(segmentPath, "fetch"));
    org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job,
            new Path(segmentPath, "parse_temp"));
    job.waitForCompletion(true);
    Merge.install(crawldb);/*from w w  w .  jav a  2 s  .  c o m*/
    return 0;
}