List of usage examples for org.apache.hadoop.mapreduce Job waitForCompletion
public boolean waitForCompletion(boolean verbose) throws IOException, InterruptedException, ClassNotFoundException
From source file:ca.uwaterloo.cs.bigdata2017w.assignment0.PerfectX.java
License:Apache License
/** * Runs this tool./*from w w w . j a va 2 s.com*/ */ @Override public int run(String[] argv) throws Exception { final Args args = new Args(); CmdLineParser parser = new CmdLineParser(args, ParserProperties.defaults().withUsageWidth(100)); try { parser.parseArgument(argv); } catch (CmdLineException e) { System.err.println(e.getMessage()); parser.printUsage(System.err); return -1; } LOG.info("Tool: " + PerfectX.class.getSimpleName()); LOG.info(" - input path: " + args.input); LOG.info(" - output path: " + args.output); LOG.info(" - number of reducers: " + args.numReducers); LOG.info(" - use in-mapper combining: " + args.imc); Configuration conf = getConf(); Job job = Job.getInstance(conf); job.setJobName(PerfectX.class.getSimpleName()); job.setJarByClass(PerfectX.class); job.setNumReduceTasks(args.numReducers); FileInputFormat.setInputPaths(job, new Path(args.input)); FileOutputFormat.setOutputPath(job, new Path(args.output)); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(args.imc ? MyMapperIMC.class : MyMapper.class); job.setCombinerClass(MyReducer.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. Path outputDir = new Path(args.output); FileSystem.get(conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:ca.uwaterloo.cs.bigdata2017w.assignment0.WordCount.java
License:Apache License
/** * Runs this tool./* www . java 2 s . c o m*/ */ @Override public int run(String[] argv) throws Exception { final Args args = new Args(); CmdLineParser parser = new CmdLineParser(args, ParserProperties.defaults().withUsageWidth(100)); try { parser.parseArgument(argv); } catch (CmdLineException e) { System.err.println(e.getMessage()); parser.printUsage(System.err); return -1; } LOG.info("Tool: " + WordCount.class.getSimpleName()); LOG.info(" - input path: " + args.input); LOG.info(" - output path: " + args.output); LOG.info(" - number of reducers: " + args.numReducers); LOG.info(" - use in-mapper combining: " + args.imc); Configuration conf = getConf(); Job job = Job.getInstance(conf); job.setJobName(WordCount.class.getSimpleName()); job.setJarByClass(WordCount.class); job.setNumReduceTasks(args.numReducers); FileInputFormat.setInputPaths(job, new Path(args.input)); FileOutputFormat.setOutputPath(job, new Path(args.output)); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(args.imc ? MyMapperIMC.class : MyMapper.class); job.setCombinerClass(MyReducer.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. Path outputDir = new Path(args.output); FileSystem.get(conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:ca.uwaterloo.cs.bigdata2017w.assignment4.BuildPersonalizedPageRankRecords.java
License:Apache License
/** * Runs this tool.//from w w w.ja v a 2 s .co m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES)); options.addOption( OptionBuilder.withArgName("sources").hasArg().withDescription("source nodes").create(SOURCES)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES)); String sourcesString = cmdline.getOptionValue(SOURCES); String[] sources = sourcesString.split(","); for (int i = 0; i < sources.length; i++) { sources[i] = sources[i].trim(); } LOG.info("Tool name: " + BuildPersonalizedPageRankRecords.class.getSimpleName()); LOG.info(" - inputDir: " + inputPath); LOG.info(" - outputDir: " + outputPath); LOG.info(" - numNodes: " + n); LOG.info(" - use sources: " + sourcesString); Configuration conf = getConf(); conf.setInt(NODE_CNT_FIELD, n); conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); conf.setStrings(SOURCES, sources); Job job = Job.getInstance(conf); job.setJobName(BuildPersonalizedPageRankRecords.class.getSimpleName() + ":" + inputPath); job.setJarByClass(BuildPersonalizedPageRankRecords.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNode.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNode.class); job.setMapperClass(MyMapper.class); // Delete the output directory if it exists already. FileSystem.get(conf).delete(new Path(outputPath), true); job.waitForCompletion(true); return 0; }
From source file:ca.uwaterloo.iss4e.hadoop.meterperfile.ThreelMain.java
License:Open Source License
public int run(String[] args) throws IOException { Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: ca.uwaterloo.iss4e.hadoop.meterperfile.ThreelMain <input> <output>"); System.exit(2);/*from www. java 2 s . c o m*/ } conf.set("mapreduce.input.fileinputformat.split.maxsize", "100"); Job job = new Job(conf, "ThreelMain"); job.setJarByClass(ThreelMain.class); job.setInputFormatClass(UnsplitableTextInputFormat.class); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(0); // job.setOutputKeyClass(LongWritable.class); //job.setOutputValueClass(Text.class); FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.setInputPaths(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.out.println("\nStarting Job ..."); final long startTime = System.currentTimeMillis(); try { if (!job.waitForCompletion(true)) { System.out.println("Job failed."); System.exit(1); } } catch (Exception e) { throw new RuntimeException(e); } finally { final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Duration is " + duration + " seconds."); } return 0; }
From source file:ca.uwaterloo.iss4e.hadoop.pointperrow.CosineMain.java
License:Open Source License
public int run(String[] args) throws IOException { Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: ca.uwaterloo.iss4e.hadoop.pointperrow.ConsineMain <input> <output>"); System.exit(2);//from ww w. java 2s . c om } Job job1 = new Job(conf, "ConsineMain"); job1.setJarByClass(CosineMain.class); job1.setMapperClass(AggregateReadingsMapper.class); job1.setMapOutputKeyClass(LongWritable.class); job1.setMapOutputValueClass(DoubleWritable.class); job1.setReducerClass(AggregateReadingsReducer.class); job1.setOutputKeyClass(LongWritable.class); job1.setOutputValueClass(Text.class); FileInputFormat.setInputDirRecursive(job1, true); FileInputFormat.setInputPaths(job1, new Path(otherArgs[0])); int lastIdx = otherArgs[0].lastIndexOf("/"); String tempOutput = otherArgs[0].substring(0, lastIdx) + "/temp"; FileOutputFormat.setOutputPath(job1, new Path(tempOutput)); System.out.println("\nStarting Job-1 ..."); final long startTime = System.currentTimeMillis(); try { final long startTimeJob1 = System.currentTimeMillis(); if (!job1.waitForCompletion(true)) { System.out.println("Job-1 failed."); } else { System.out.println("Duration of Job1 " + ((System.currentTimeMillis() - startTimeJob1) / 1000.0) + " seconds."); final Job job2 = new Job(conf, "ConsineMain Aggregate"); job2.setJarByClass(CosineMain.class); job2.setInputFormatClass(CartesianInputFormat.class); CartesianInputFormat.setLeftInputInfo(job2, TextInputFormat.class, tempOutput); CartesianInputFormat.setRightInputInfo(job2, TextInputFormat.class, tempOutput); FileOutputFormat.setOutputPath(job2, new Path(otherArgs[1])); job2.setMapperClass(CartesianProductMapper.class); job2.setMapOutputKeyClass(DoubleWritable.class); job2.setMapOutputValueClass(Text.class); job2.setSortComparatorClass(DescendingKeyComparator.class); job2.setReducerClass(CartesianProductReducer.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(DoubleWritable.class); job2.setNumReduceTasks(10); final long startTimeJob2 = System.currentTimeMillis(); System.out.println("\nStarting Job-2 ..."); if (!job2.waitForCompletion(true)) { System.out.println("Job-2 failed."); } else { System.out.println("Duration of Job2: " + ((System.currentTimeMillis() - startTimeJob2) / 1000.0) + " seconds."); } } FileSystem fs = FileSystem.get(conf); fs.delete(new Path(tempOutput), true); } catch (Exception e) { throw new RuntimeException(e); } finally { final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Total Duration: " + duration + " seconds."); } return 0; }
From source file:ca.uwaterloo.iss4e.hadoop.pointperrow.CosineMain.java
License:Open Source License
public int run1(String[] args) throws IOException { if (args.length != 3) { System.err.println("Usage: java " + getClass().getName() + " <inputDir> <outDir> <ntasks>"); ToolRunner.printGenericCommandUsage(System.err); return -1; }//from w ww. jav a 2s . co m Configuration conf = getConf(); final Job job2 = new Job(conf, "ConsineMain cartesian product"); job2.setJarByClass(CosineMain.class); job2.setInputFormatClass(CartesianInputFormat.class); CartesianInputFormat.setLeftInputInfo(job2, TextInputFormat.class, args[0]); CartesianInputFormat.setRightInputInfo(job2, TextInputFormat.class, args[0]); FileOutputFormat.setOutputPath(job2, new Path(args[1])); job2.setMapperClass(CartesianProductMapper.class); job2.setMapOutputKeyClass(DoubleWritable.class); job2.setMapOutputValueClass(Text.class); job2.setSortComparatorClass(DescendingKeyComparator.class); job2.setReducerClass(CartesianProductReducer.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(DoubleWritable.class); job2.setNumReduceTasks(Integer.parseInt(args[2])); System.out.println("\nStarting Job-2 ..."); final long startTime = System.currentTimeMillis(); try { if (!job2.waitForCompletion(true)) { System.out.println("Job-2 failed."); System.exit(1); } } catch (Exception e) { throw new RuntimeException(e); } finally { final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Duration is " + duration + " seconds."); } return 0; }
From source file:ca.uwaterloo.iss4e.hadoop.pointperrow.HistogramMain.java
License:Open Source License
public int run(String[] args) throws IOException { Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: ca.uwaterloo.iss4e.hadoop.pointperrow.HistogramMain <input> <output>"); System.exit(2);/*from w w w. jav a2s.co m*/ } Job job = new Job(conf, "HistogramMain"); job.setJarByClass(HistogramMain.class); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(DoubleWritable.class); job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.setInputPaths(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.out.println("\nStarting Job ..."); final long startTime = System.currentTimeMillis(); try { if (!job.waitForCompletion(true)) { System.out.println("Job failed."); System.exit(1); } } catch (Exception e) { throw new RuntimeException(e); } finally { final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Duration is " + duration + " seconds."); } return 0; }
From source file:ca.uwaterloo.iss4e.hadoop.pointperrow.PARMain.java
License:Open Source License
public int run(String[] args) throws IOException { Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: ca.uwaterloo.iss4e.hadoop.pointperrow.PARMain <input> <output>"); System.exit(2);/*w ww . ja v a 2 s. co m*/ } Job job = new Job(conf, "PARMain"); job.setJarByClass(PARMain.class); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(DoubleWritable.class); job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.setInputPaths(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.out.println("\nStarting Job ..."); final long startTime = System.currentTimeMillis(); try { if (!job.waitForCompletion(true)) { System.out.println("Job failed."); System.exit(1); } } catch (Exception e) { throw new RuntimeException(e); } finally { final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Duration is " + duration + " seconds."); } return 0; }
From source file:ca.uwaterloo.iss4e.hadoop.pointperrow.ThreelMain.java
License:Open Source License
public int run(String[] args) throws IOException { Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: ca.uwaterloo.iss4e.hadoop.pointperrow.ThreelMain <input> <output>"); System.exit(2);// w w w.jav a2s.c o m } Job job = new Job(conf, "ThreelMain"); job.setJarByClass(ThreelMain.class); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(ArrayPrimitiveWritable.class); job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.setInputPaths(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.out.println("\nStarting Job ..."); final long startTime = System.currentTimeMillis(); try { if (!job.waitForCompletion(true)) { System.out.println("Job failed."); System.exit(1); } } catch (Exception e) { throw new RuntimeException(e); } finally { final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Duration is " + duration + " seconds."); } return 0; }
From source file:CalculateSentiment.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Path tempDir = new Path("wordcount-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: wordcount <in> <out> <category>"); System.exit(2);/* w ww . j a v a 2 s .c om*/ } conf.set("category", otherArgs[2]); // try { // String filePath = otherArgs[0]; // BufferedReader br = new BufferedReader(new FileReader(filePath)); // String line = br.readLine(); // conf.set("category", line); // } catch (Exception e) { // e.printStackTrace(); // } // conf.set("category", WordCount.read(otherArgs[2])); DistributedCache.createSymlink(conf); String path = "CalculateSentiment.obj"; Path filePath = new Path(path); String uriWithLink = filePath.toUri().toString() + "#" + "object"; DistributedCache.addCacheFile(new URI(uriWithLink), conf); // DistributedCache.addCacheFile(new URI("/CalculateSentiment.obj"), conf); Job job = new Job(conf, "Test"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(DoubleSumReducer.class); job.setReducerClass(DoubleSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }