List of usage examples for org.apache.hadoop.mapreduce Job waitForCompletion
public boolean waitForCompletion(boolean verbose) throws IOException, InterruptedException, ClassNotFoundException
From source file:com.bah.applefox.main.plugins.imageindex.ImageLoader.java
License:Apache License
/** * run takes the comandline args as arguments (in this case from a * configuration file), creates a new job, configures it, initiates it, * waits for completion, and returns 0 if it is successful (1 if it is not) * // ww w . j a va 2s.c o m * @param args * the commandline arguments (in this case from a configuration * file) * * @return 0 if the job ran successfully and 1 if it isn't */ public int run(String[] args) throws Exception { checkedImages = args[18]; hashTable = args[17]; tagTable = args[19]; divsFile = args[20]; UserAgent = args[6]; // Create the table AccumuloUtils.setSplitSize(args[23]); AccumuloUtils.connectBatchWrite(checkedImages).close(); // Give the job a name String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis(); // Create the job and set its jar Job job = new Job(getConf(), jobName); job.setJarByClass(this.getClass()); // Set the url table to read from String urlTable = args[5]; job.setInputFormatClass(AccumuloInputFormat.class); InputFormatBase.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); InputFormatBase.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), urlTable, new Authorizations()); job.setMapperClass(MapperClass.class); job.setMapOutputKeyClass(Key.class); job.setMapOutputValueClass(Value.class); job.setNumReduceTasks(Integer.parseInt(args[4])); job.setReducerClass(ReducerClass.class); job.setOutputFormatClass(AccumuloOutputFormat.class); job.setOutputKeyClass(Key.class); job.setOutputValueClass(Value.class); AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true, urlTable); AccumuloUtils.setSplitSize(args[22]); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:com.bah.applefox.main.plugins.pageranking.utilities.CountURLs.java
License:Apache License
public int run(String[] args) throws Exception { String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis(); Job job = new Job(getConf(), jobName); job.setJarByClass(this.getClass()); mappedInput = args[12] + "From"; job.setInputFormatClass(AccumuloInputFormat.class); InputFormatBase.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); InputFormatBase.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), mappedInput, new Authorizations()); job.setMapperClass(MapperClass.class); job.setMapOutputKeyClass(Key.class); job.setMapOutputValueClass(Value.class); job.setReducerClass(ReducerClass.class); job.setOutputFormatClass(AccumuloOutputFormat.class); job.setOutputKeyClass(Key.class); job.setOutputValueClass(Value.class); AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true, args[15]); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:com.bah.applefox.main.plugins.pageranking.utilities.DampenTable.java
License:Apache License
public int run(String[] args) throws Exception { String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis(); Job job = new Job(getConf(), jobName); job.setJarByClass(this.getClass()); tablePrefix = args[13];//from ww w . j a v a2 s .co m dampeningFactor = Double.parseDouble(args[14]); job.setInputFormatClass(AccumuloInputFormat.class); AccumuloInputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); AccumuloInputFormat.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), tablePrefix + "New", new Authorizations()); job.setMapperClass(MapperClass.class); job.setMapOutputKeyClass(Key.class); job.setMapOutputValueClass(Value.class); job.setReducerClass(ReducerClass.class); job.setOutputFormatClass(AccumuloOutputFormat.class); job.setOutputKeyClass(Key.class); job.setOutputValueClass(Value.class); AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true, tablePrefix + "New"); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:com.bah.applefox.main.plugins.pageranking.utilities.InitializePRTables.java
License:Apache License
public int run(String[] args) throws Exception { tablePrefix = args[13];//from www. j a v a2 s . c om String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis(); Job job = new Job(getConf(), jobName); job.setJarByClass(this.getClass()); job.setInputFormatClass(AccumuloInputFormat.class); InputFormatBase.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); InputFormatBase.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), args[12] + "To", new Authorizations()); job.setMapperClass(MapperClass.class); job.setMapOutputKeyClass(Key.class); job.setMapOutputValueClass(Value.class); job.setReducerClass(ReducerClass.class); job.setOutputFormatClass(AccumuloOutputFormat.class); job.setOutputKeyClass(Key.class); job.setOutputValueClass(Value.class); AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true, tablePrefix + "Old"); AccumuloUtils.connectBatchWrite(tablePrefix + "New"); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:com.bah.applefox.main.plugins.pageranking.utilities.MRPageRanking.java
License:Apache License
public int run(String[] args) throws Exception { String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis(); Job job = new Job(getConf(), jobName); job.setJarByClass(this.getClass()); tablePrefix = args[13];//from w ww. j a v a2 s . co m outboundLinks = args[15]; job.setInputFormatClass(AccumuloInputFormat.class); AccumuloInputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); AccumuloInputFormat.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), args[12] + "To", new Authorizations()); job.setMapperClass(MapperClass.class); job.setMapOutputKeyClass(Key.class); job.setMapOutputValueClass(Value.class); job.setReducerClass(ReducerClass.class); job.setOutputFormatClass(AccumuloOutputFormat.class); job.setOutputKeyClass(Key.class); job.setOutputValueClass(Value.class); AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true, tablePrefix + "New"); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:com.bah.applefox.main.plugins.webcrawler.WebCrawler.java
License:Apache License
/** * run takes the comandline args as arguments (in this case from a * configuration file), creates a new job, configures it, initiates it, * waits for completion, and returns 0 if it is successful (1 if it is not) * //from w w w.j a v a2 s .com * @param args * the commandline arguments (in this case from a configuration * file) * * @return 0 if the job ran successfully and 1 if it isn't */ public int run(String[] args) throws Exception { userAgent = args[6]; String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis(); Job job = new Job(getConf(), jobName); job.setJarByClass(this.getClass()); String clone = args[5]; String clone2 = args[12]; table = clone; AccumuloUtils.setSplitSize(args[24]); table2 = clone2 + "From"; table3 = clone2 + "To"; job.setInputFormatClass(AccumuloInputFormat.class); InputFormatBase.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); InputFormatBase.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), clone, new Authorizations()); job.setMapperClass(MapperClass.class); job.setMapOutputKeyClass(Key.class); job.setMapOutputValueClass(Value.class); job.setNumReduceTasks(0); job.setOutputFormatClass(NullOutputFormat.class); job.setOutputKeyClass(Key.class); job.setOutputValueClass(Value.class); AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true, clone); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:com.baidu.cloud.bmr.mapreduce.AccessLogAnalyzer.java
License:Open Source License
public static void main(String[] args) { Configuration conf = new Configuration(); if (args.length != 2) { System.err.println("Usage: AccessLogAnalyzer <input path> <output path>"); System.exit(-1);/* w w w. ja va2s. c o m*/ } String inputPath = args[0]; String outputPath = args[1]; try { Job job = new Job(conf, "AccessLogAnalyzer"); job.setJarByClass(AccessLogAnalyzer.class); job.setMapperClass(AccessLogAnalyzerMapper.class); job.setReducerClass(AccessLogAnalyzerReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, new Path(outputPath)); System.exit(job.waitForCompletion(true) ? 0 : 1); } catch (IOException | ClassNotFoundException | InterruptedException e) { } }
From source file:com.bark.hadoop.lab3.PageRank.java
@Override public int run(String args[]) { String tmp = "/tmp/" + new Date().getTime(); // long timeStamp = new Date().getTime(); try {//w ww . j a v a2 s .co m /** * Job 1: Parse XML input and read title,links */ Configuration conf = new Configuration(); conf.set("xmlinput.start", "<page>"); conf.set("xmlinput.end", "</page>"); Job job = Job.getInstance(conf); job.setJarByClass(PageRank.class); // specify a mapper job.setMapperClass(RedLinkMapper.class); // specify a reducer job.setReducerClass(RedLinkReducer.class); // specify output types job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // specify input and output DIRECTORIES FileInputFormat.addInputPath(job, new Path(args[0])); job.setInputFormatClass(XmlInputFormat.class); FileOutputFormat.setOutputPath(job, new Path((args[1] + tmp + "/job1"))); job.setOutputFormatClass(TextOutputFormat.class); job.waitForCompletion(true); } catch (InterruptedException | ClassNotFoundException | IOException ex) { Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex); System.err.println("Error during mapreduce job1."); return 2; } /** * Job 2: Adjacency outGraph */ try { Configuration conf2 = new Configuration(); Job job2 = Job.getInstance(conf2); job2.setJarByClass(PageRank.class); // specify a mapper job2.setMapperClass(AdjMapper.class); // specify a reducer job2.setReducerClass(AdjReducer.class); // specify output types job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); // specify input and output DIRECTORIES FileInputFormat.addInputPath(job2, new Path((args[1] + tmp + "/job1"))); job2.setInputFormatClass(TextInputFormat.class); FileOutputFormat.setOutputPath(job2, new Path((args[1] + tmp + "/job2"))); job2.setOutputFormatClass(TextOutputFormat.class); job2.waitForCompletion(true); } catch (InterruptedException | ClassNotFoundException | IOException ex) { Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex); System.err.println("Error during mapreduce job2."); return 2; } /** * Job 3: PageCount */ try { Configuration conf3 = new Configuration(); /** * Change output separator to "=" instead of default \t for this job */ conf3.set("mapreduce.output.textoutputformat.separator", "="); Job job3 = Job.getInstance(conf3); job3.setJarByClass(PageRank.class); // specify a mapper job3.setMapperClass(PageCountMapper.class); // specify a reducer job3.setReducerClass(PageCountReducer.class); // specify output types job3.setOutputKeyClass(Text.class); job3.setOutputValueClass(IntWritable.class); // specify input and output DIRECTORIES FileInputFormat.addInputPath(job3, new Path((args[1] + tmp + "/job2"))); job3.setInputFormatClass(TextInputFormat.class); FileOutputFormat.setOutputPath(job3, new Path((args[1] + tmp + "/job3"))); job3.setOutputFormatClass(TextOutputFormat.class); job3.waitForCompletion(true); } catch (InterruptedException | ClassNotFoundException | IOException ex) { Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex); System.err.println("Error during mapreduce job3."); return 2; } /** * Job 4: PageRank */ for (int i = 1; i < 9; i++) { try { Configuration conf4 = new Configuration(); /** * Read number of nodes from the output of job 3 : pageCount */ Path path = new Path((args[1] + tmp + "/job3")); FileSystem fs = path.getFileSystem(conf4); RemoteIterator<LocatedFileStatus> ri = fs.listFiles(path, true); int n = 0; Pattern pt = Pattern.compile("(\\d+)"); while (ri.hasNext()) { LocatedFileStatus lfs = ri.next(); if (lfs.isFile() && n == 0) { FSDataInputStream inputStream = fs.open(lfs.getPath()); BufferedReader br = new BufferedReader(new InputStreamReader(inputStream)); String s = null; while ((s = br.readLine()) != null) { Matcher mt = pt.matcher(s); if (mt.find()) { n = new Integer(mt.group(1)); break; } } } } /** * Done reading number of nodes, make it available to MapReduce * job key: N */ conf4.setInt("N", n); Job job4 = Job.getInstance(conf4); job4.setJarByClass(PageRank.class); // specify a mapper job4.setMapperClass(PageRankMapper.class); // specify a reducer job4.setReducerClass(PageRankReducer.class); // specify output types job4.setOutputKeyClass(Text.class); job4.setOutputValueClass(Text.class); // specify input and output DIRECTORIES if (i == 1) { FileInputFormat.addInputPath(job4, new Path((args[1] + tmp + "/job2"))); } else { FileInputFormat.addInputPath(job4, new Path((args[1] + tmp + "/job4/" + (i - 1)))); } job4.setInputFormatClass(TextInputFormat.class); FileOutputFormat.setOutputPath(job4, new Path((args[1] + tmp + "/job4/" + i))); job4.setOutputFormatClass(TextOutputFormat.class); job4.waitForCompletion(true); } catch (InterruptedException | ClassNotFoundException | IOException ex) { Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex); System.err.println("Error during mapreduce job4."); return 2; } } /** * Job 5: Sort iteration 1 and iteration 8 */ int returnCode = 0; for (int i = 0; i < 2; i++) { try { Configuration conf5 = new Configuration(); /** * Read number of nodes from the output of job 3 : pageCount */ Path path = new Path((args[1] + tmp + "/job3")); FileSystem fs = path.getFileSystem(conf5); RemoteIterator<LocatedFileStatus> ri = fs.listFiles(path, true); int n = 0; Pattern pt = Pattern.compile("(\\d+)"); while (ri.hasNext()) { LocatedFileStatus lfs = ri.next(); if (lfs.isFile() && n == 0) { FSDataInputStream inputStream = fs.open(lfs.getPath()); BufferedReader br = new BufferedReader(new InputStreamReader(inputStream)); String s = null; while ((s = br.readLine()) != null) { Matcher mt = pt.matcher(s); if (mt.find()) { n = new Integer(mt.group(1)); break; } } } } /** * Done reading number of nodes, make it available to MapReduce * job key: N */ conf5.setInt("N", n); Job job5 = Job.getInstance(conf5); /** * one reducer only */ job5.setNumReduceTasks(1); job5.setSortComparatorClass(MyWritableComparator.class); job5.setJarByClass(PageRank.class); // specify a mapper job5.setMapperClass(SortMapper.class); job5.setMapOutputKeyClass(DoubleWritable.class); job5.setMapOutputValueClass(Text.class); // specify a reducer job5.setReducerClass(SortReducer.class); // specify output types job5.setOutputKeyClass(Text.class); job5.setOutputValueClass(DoubleWritable.class); // specify input and output DIRECTORIES int y = 7 * i + 1; FileInputFormat.addInputPath(job5, new Path((args[1] + tmp + "/job4/" + y))); job5.setInputFormatClass(TextInputFormat.class); FileOutputFormat.setOutputPath(job5, new Path((args[1] + tmp + "/job5/" + y))); job5.setOutputFormatClass(TextOutputFormat.class); returnCode = job5.waitForCompletion(true) ? 0 : 1; } catch (InterruptedException | ClassNotFoundException | IOException ex) { Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex); System.err.println("Error during mapreduce job5."); return 2; } } /** * Copy necessary output files to args[1] /** * Copy necessary output files to args[1] */ /** * Rename and copy OutLinkGraph */ try { Configuration conf = new Configuration(); Path outLinkGraph = new Path((args[1] + tmp + "/job2/part-r-00000")); FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf); Path output = new Path(args[1] + "/results/PageRank.outlink.out"); FileSystem outputFS = output.getFileSystem(conf); org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf); } catch (IOException ex) { Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex); System.err.println("Error while copying results."); return 2; } /** * Rename and copy total number of pages */ try { Configuration conf = new Configuration(); Path outLinkGraph = new Path((args[1] + tmp + "/job3/part-r-00000")); FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf); Path output = new Path(args[1] + "/results/PageRank.n.out"); FileSystem outputFS = output.getFileSystem(conf); org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf); } catch (IOException ex) { Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex); System.err.println("Error while copying results."); return 2; } /** * Rename and copy iteration 1 */ try { Configuration conf = new Configuration(); Path outLinkGraph = new Path((args[1] + tmp + "/job5/1/part-r-00000")); FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf); Path output = new Path(args[1] + "/results/PageRank.iter1.out"); FileSystem outputFS = output.getFileSystem(conf); org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf); } catch (IOException ex) { Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex); System.err.println("Error while copying results."); return 2; } /** * Rename and copy iteration 8 */ try { Configuration conf = new Configuration(); Path outLinkGraph = new Path((args[1] + tmp + "/job5/8/part-r-00000")); FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf); Path output = new Path(args[1] + "/results/PageRank.iter8.out"); FileSystem outputFS = output.getFileSystem(conf); org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf); } catch (IOException ex) { Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex); System.err.println("Error while copying results."); return 2; } return returnCode; }
From source file:com.basho.riak.hadoop.RiakWordCount.java
License:Apache License
public int run(String[] args) throws Exception { String[] keys = new String[10000]; for (int i = 0; i < 10000; i++) { keys[i] = String.valueOf(i + 1000); }//from w w w.ja va 2s.c o m Configuration conf = getConf(); conf = RiakConfig.setKeyLister(conf, new BucketKeyLister("wordcount")); conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 11087)); conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 12087)); conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 13087)); conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 14087)); conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 15087)); conf = RiakConfig.setOutputBucket(conf, "wordcount_out"); conf = RiakConfig.setHadoopClusterSize(conf, 4); Job job = new Job(conf, "Riak-WordCount"); job.setJarByClass(RiakWordCount.class); job.setInputFormatClass(RiakInputFormat.class); job.setMapperClass(TokenCounterMapper.class); job.setReducerClass(TokenCounterReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(RiakOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(WordCountResult.class); job.setNumReduceTasks(4); job.submit(); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.benchmark.mapred.SecondarySort.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: secondarysrot <in> <out>"); System.exit(2);// w w w . ja v a 2s . com } Job job = new Job(conf, "secondary sort"); job.setJarByClass(SecondarySort.class); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); // group and partition by the first int in the pair job.setPartitionerClass(FirstPartitioner.class); job.setGroupingComparatorClass(FirstGroupingComparator.class); // the map output is IntPair, IntWritable job.setMapOutputKeyClass(IntPair.class); job.setMapOutputValueClass(IntWritable.class); // the reduce output is Text, IntWritable job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }