List of usage examples for org.apache.hadoop.mapreduce Job waitForCompletion
public boolean waitForCompletion(boolean verbose) throws IOException, InterruptedException, ClassNotFoundException
From source file:clustering.similarity.PreDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.printf(//from w w w . j a v a2s. com "usage: %s inverted_index_result_dir output_dir" + " [compress_or_not] [reducer_number] [deci_number]\n", this.getClass().getSimpleName()); System.exit(1); } Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); conf.set("mapreduce.reduce.speculative", "false"); // TODO: 17-4-24 calculate split number from reducer number conf.setInt("split.num", 8); if (args.length > 3) { conf.setInt("reducer.num", Integer.valueOf(args[3])); } else { conf.setInt("reducer.num", 29); } if (args.length > 4) { conf.setInt("deci.number", Integer.valueOf(args[4])); } else { conf.setInt("deci.number", 3); } Job job = Job.getInstance(conf, "pre job"); job.setJarByClass(PreDriver.class); FileInputFormat.addInputPath(job, new Path(args[0])); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setMapperClass(PreMapper.class); job.setMapOutputKeyClass(IntIntTupleWritable.class); job.setMapOutputValueClass(Text.class); job.setPartitionerClass(PrePartitioner.class); job.setNumReduceTasks(conf.getInt("reducer.num", 29)); job.setReducerClass(PreReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // set default compression if (args.length > 2 && args[2].equals("0")) { FileOutputFormat.setOutputPath(job, new Path(args[1])); } else { job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); SequenceFileOutputFormat.setOutputCompressorClass(job, org.apache.hadoop.io.compress.GzipCodec.class); SequenceFileOutputFormat.setOutputPath(job, new Path(args[1])); } long starttime = System.currentTimeMillis(); boolean complete = job.waitForCompletion(true); long endtime = System.currentTimeMillis(); System.out.println("inverted similarity pre job finished in: " + (endtime - starttime) / 1000 + " seconds"); return complete ? 0 : 1; }
From source file:clustering.tf_idf.WorkflowDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.printf("usage: %s simhash_result_dir output_dir " + "[gname_weight]\n", getClass().getSimpleName()); System.exit(1);// www .j a v a 2s . c o m } String docCntDir = args[1] + "/docCnt"; String step1_outputDir = args[1] + "/step1"; String step2_outputDir = args[1] + "/step2"; String step3_outputDir = args[1] + "/result"; Configuration conf = getConf(); conf = initConf(conf); JobControl jobControl = new JobControl("tf-idf jobs"); /* pre step, count documents number in the corpus */ DocCntDriver docCntDriver = new DocCntDriver(); String[] preJobArgs = new String[2]; preJobArgs[0] = args[0]; preJobArgs[1] = docCntDir; Job preJob = docCntDriver.configJob(preJobArgs); ControlledJob controlledPreJob = new ControlledJob(conf); controlledPreJob.setJob(preJob); jobControl.addJob(controlledPreJob); /* step 1, calculate term count of each document */ TermCntDriver termCntDriver = new TermCntDriver(); String[] job1Args = new String[2]; job1Args[0] = args[0]; job1Args[1] = step1_outputDir; Job job1 = termCntDriver.configJob(job1Args); ControlledJob controlledJob1 = new ControlledJob(conf); controlledJob1.setJob(job1); jobControl.addJob(controlledJob1); /* step 2, calculate the term frequency of each document */ TermFreqDriver termFreqDriver = new TermFreqDriver(); String gnameWeight = args.length > 2 ? args[2] : "1.0"; conf.setDouble("gname.weight", Double.valueOf(gnameWeight)); String[] job2Args = args.length > 2 ? new String[3] : new String[2]; job2Args[0] = step1_outputDir; job2Args[1] = step2_outputDir; if (args.length > 2) { job2Args[2] = args[2]; } Job job2 = termFreqDriver.configJob(job2Args); ControlledJob controlledJob2 = new ControlledJob(conf); controlledJob2.setJob(job2); controlledJob2.addDependingJob(controlledJob1); jobControl.addJob(controlledJob2); /* step 3, calculate tf_idf */ TF_IDF_Driver tf_idf_driver = new TF_IDF_Driver(); String[] job3Args = new String[3]; job3Args[0] = docCntDir; job3Args[1] = step2_outputDir; job3Args[2] = step3_outputDir; Job job3 = tf_idf_driver.configJob(job3Args); ControlledJob controlledJob3 = new ControlledJob(conf); controlledJob3.setJob(job3); controlledJob3.addDependingJob(controlledJob2); controlledJob3.addDependingJob(controlledPreJob); jobControl.addJob(controlledJob3); // run jobs runJobs(jobControl); return job3.waitForCompletion(true) ? 0 : 1; }
From source file:cn.chinahadoop.hive.mr.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2);//from ww w. j av a 2 s .c om } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:cn.chinahadoop.practise.TestCombiner.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2);//from www .j a v a 2 s. c o m } Job job = new Job(conf, "word count"); job.setJarByClass(TestCombiner.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumCombiner.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:cn.edu.bjut.hadoop.WcAndIk.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2);//from w w w . j a v a 2 s . c o m } Job job = new Job(conf, "word count"); job.setJarByClass(WcAndIk.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:cn.edu.hfut.dmic.webcollector.crawldb.DBReader.java
public static void main(String[] args) throws Exception { Path crawlPath = new Path("task2"); Path currentPath = new Path(crawlPath, "crawldb/current"); Path output = new Path("output"); Configuration config = CrawlerConfiguration.create(); FileSystem fs = FileSystem.get(config); if (fs.exists(output)) { fs.delete(output);//from ww w.ja v a2 s . c om } Job job = new Job(config); job.setJobName("dbreader " + crawlPath.toString()); job.setMapperClass(DBReaderMapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, currentPath); FileOutputFormat.setOutputPath(job, output); job.waitForCompletion(true); }
From source file:cn.edu.hfut.dmic.webcollector.crawldb.Generator.java
public static String generate(Path crawlPath, Configuration conf) throws Exception { SegmentUtil.initSegments(crawlPath, conf); String segmentName = SegmentUtil.createSegment(crawlPath, conf); Path currentPath = new Path(crawlPath, "crawldb/current"); Path generatePath = new Path(crawlPath, "segments/" + segmentName + "/generate"); Job job = new Job(conf); job.setJobName("generate " + crawlPath.toString()); job.setJarByClass(Generator.class); job.setReducerClass(GeneratorReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(CrawlDatum.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(CrawlDatum.class); FileInputFormat.addInputPath(job, currentPath); FileOutputFormat.setOutputPath(job, generatePath); job.waitForCompletion(true); long count = job.getCounters().findCounter("generator", "count").getValue(); System.out.println("total generate:" + count); if (count == 0) { return null; } else {/*from w w w . j av a2s. c o m*/ return segmentName; } }
From source file:cn.edu.hfut.dmic.webcollector.crawldb.Merge.java
public static void merge(Path crawlPath, Path[] mergePaths, Configuration conf, String jobName) throws Exception { Job job = new Job(conf); job.setJobName(jobName + " " + crawlPath.toString()); job.setJarByClass(Merge.class); // job.getConfiguration().set("mapred", "/home/hu/mygit/WebCollector2/WebCollectorCluster/target/WebCollectorCluster-2.0.jar"); Path crawldbPath = new Path(crawlPath, "crawldb"); Path newdb = new Path(crawldbPath, "new"); Path currentdb = new Path(crawldbPath, "current"); FileSystem fs = FileSystem.get(conf); if (fs.exists(currentdb)) { FileInputFormat.addInputPath(job, currentdb); }/* www .j a va2 s . c o m*/ if (fs.exists(newdb)) { fs.delete(newdb); } for (Path mergePath : mergePaths) { FileInputFormat.addInputPath(job, mergePath); } FileOutputFormat.setOutputPath(job, newdb); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(CrawlDatum.class); job.setMapperClass(MergeMap.class); job.setReducerClass(MergeReduce.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(CrawlDatum.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.waitForCompletion(true); }
From source file:cn.edu.hfut.dmic.webcollector.fetcher.Fetcher.java
public static void fetch(Path crawlPath, String segmentName, Configuration conf) throws Exception { Path segmentPath = new Path(crawlPath, "segments/" + segmentName); Path generatePath = new Path(segmentPath, "generate"); Job job = new Job(conf); job.setJobName("fetch " + crawlPath.toString()); job.setJarByClass(Fetcher.class); job.setReducerClass(FetcherReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(FetcherOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(CrawlDatum.class); FileInputFormat.addInputPath(job, generatePath); FileOutputFormat.setOutputPath(job, segmentPath); job.waitForCompletion(true); }
From source file:cn.edu.hfut.dmic.webcollectorcluster.fetcher.DbUpdater.java
@Override public int run(String[] args) throws Exception { Path crawldb = new Path(args[0]); Path segmentPath = new Path(args[1]); Job job = Merge.createJob(CrawlerConfiguration.create(), crawldb); job.setJarByClass(DbUpdater.class); org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, new Path(segmentPath, "fetch")); org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, new Path(segmentPath, "parse_temp")); job.waitForCompletion(true); Merge.install(crawldb);/*from w w w . jav a 2 s . c o m*/ return 0; }