List of usage examples for org.apache.hadoop.mapreduce Job waitForCompletion
public boolean waitForCompletion(boolean verbose) throws IOException, InterruptedException, ClassNotFoundException
From source file:RunPageRankBasic.java
License:Apache License
private void phase2(int i, int j, float missing, String basePath, int numNodes) throws Exception { Job job = Job.getInstance(getConf()); job.setJobName("PageRank:Basic:iteration" + j + ":Phase2"); job.setJarByClass(RunPageRankBasic.class); LOG.info("missing PageRank mass: " + missing); LOG.info("number of nodes: " + numNodes); String in = basePath + "/iter" + formatter.format(j) + "t"; String out = basePath + "/iter" + formatter.format(j); LOG.info("PageRank: iteration " + j + ": Phase2"); LOG.info(" - input: " + in); LOG.info(" - output: " + out); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); job.getConfiguration().setFloat("MissingMass", (float) missing); job.getConfiguration().setInt("NodeCount", numNodes); job.setNumReduceTasks(0);// w w w . j a v a 2 s . c o m FileInputFormat.setInputPaths(job, new Path(in)); FileOutputFormat.setOutputPath(job, new Path(out)); job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNode.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNode.class); job.setMapperClass(MapPageRankMassDistributionClass.class); FileSystem.get(getConf()).delete(new Path(out), true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); }
From source file:lab2_2.java
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); FileSystem.get(conf).delete(new Path(args[1]), true); Job job = Job.getInstance(conf, "drive time lab 2.1"); job.setJarByClass(lab2_1.class); job.setMapperClass(PartitioningMapper.class); job.setPartitionerClass(TypePartitioner.class); job.setReducerClass(IdentityReducer.class); job.setNumReduceTasks(6);/*from www. j a va2s.co m*/ job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:TweetCategorizer.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); // conf.addResource(new Path("../../env_vars")); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: TweetCategorizer <in> <out>"); System.exit(2);/*from ww w .j a va 2 s . co m*/ } // ---------------------------------------------------------- // READ FILTER FILE // ---------------------------------------------------------- // Path pt=new Path("hdfs://pathTofile"); //Path pt = new Path("../www/hfilters.json"); String l; String line = ""; //FileSystem fs = FileSystem.get(conf); BufferedReader br = new BufferedReader(new FileReader("../www/json/filters.json")); try { //BufferedReader br = new BufferedReader(new FileReader(fs.open(pt))); while ((l = br.readLine()) != null) { line += l; //System.out.println(line); } } finally { // you should close out the BufferedReader br.close(); } // ---------------------------------------------------------- // PARSE JSON //http://stackoverflow.com/questions/6697147/json-iterate-through-jsonarray //http://juliusdavies.ca/json-simple-1.1.1-javadocs/org/json/simple/JSONObject.html // ---------------------------------------------------------- JSONParser parser = new JSONParser(); JSONObject jsonObject = (JSONObject) parser.parse(line); Set<String> filters = jsonObject.keySet(); // inside each object there is a "name" field, get value and add to keyword_list for (String i : filters) { JSONObject objects = (JSONObject) jsonObject.get(i); String keyword = ((String) objects.get("name")).toLowerCase(); TokenizerMapper.keyname_list.add(i); TokenizerMapper.keyword_list.add(keyword); } // ---------------------------------------------------------- Job job = new Job(conf, "categorize tweets"); job.setJarByClass(TweetCategorizer.class); job.setMapperClass(TokenizerMapper.class); // job.setCombinerClass(IntSumReducer.class); // job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:WordLines.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 3) { System.err.println("Usage: wordlines <in> [<in>...] <SearchTerm> <out>"); System.exit(2);//from w w w . j a v a 2 s. c o m } conf.set("searchWord", otherArgs[otherArgs.length - 2]); Job job = new Job(conf, "word lines"); job.setJarByClass(WordLines.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); for (int i = 0; i < otherArgs.length - 2; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:WordCountSplitTest.java
License:Apache License
private final static void test(boolean use_shards, boolean use_chunks, Boolean slaveok) throws Exception { did_start = false;//from ww w . jav a 2s. c om final Configuration conf = new Configuration(); MongoConfigUtil.setInputURI(conf, "mongodb://localhost:30000/test.lines"); conf.setBoolean(MongoConfigUtil.SPLITS_USE_SHARDS, use_shards); conf.setBoolean(MongoConfigUtil.SPLITS_USE_CHUNKS, use_chunks); String output_table = null; if (use_chunks) { if (use_shards) output_table = "with_shards_and_chunks"; else output_table = "with_chunks"; } else { if (use_shards) output_table = "with_shards"; else output_table = "no_splits"; } if (slaveok != null) { output_table += "_" + slaveok; } MongoConfigUtil.setOutputURI(conf, "mongodb://localhost:30000/test." + output_table); System.out.println("Conf: " + conf); final Job job = new Job(conf, "word count " + output_table); job.setJarByClass(WordCountSplitTest.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(MongoInputFormat.class); job.setOutputFormatClass(MongoOutputFormat.class); final long start = System.currentTimeMillis(); System.out.println(" ----------------------- running test " + output_table + " --------------------"); try { boolean result = job.waitForCompletion(true); System.out.println("job.waitForCompletion( true ) returned " + result); } catch (Exception e) { System.out.println("job.waitForCompletion( true ) threw Exception"); e.printStackTrace(); } final long end = System.currentTimeMillis(); final float seconds = ((float) (end - start)) / 1000; java.text.NumberFormat nf = java.text.NumberFormat.getInstance(); nf.setMaximumFractionDigits(3); System.out.println("finished run in " + nf.format(seconds) + " seconds"); com.mongodb.Mongo m = new com.mongodb.Mongo( new com.mongodb.MongoURI("mongodb://localhost:30000/?slaveok=true")); com.mongodb.DB db = m.getDB("test"); com.mongodb.DBCollection coll = db.getCollection(output_table); com.mongodb.BasicDBObject query = new com.mongodb.BasicDBObject(); query.put("_id", "the"); com.mongodb.DBCursor cur = coll.find(query); if (!cur.hasNext()) System.out.println("FAILURE: could not find count of \'the\'"); else System.out.println("'the' count: " + cur.next()); // if (! result) // System.exit( 1 ); }
From source file:LoadClue.java
License:Apache License
/** * Main entry point./*from www.ja v a 2 s. c o m*/ * * @param args The command line parameters. * @throws Exception When running the job fails. */ public static void main(String[] args) throws Exception { Configuration conf = HBaseConfiguration.create(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Wrong number of arguments: " + otherArgs.length); System.err.println("Usage: " + NAME + " <input> <tablename>"); System.exit(-1); } Job job = configureJob(conf, otherArgs); boolean result = job.waitForCompletion(true); System.out.println("Flushing table " + otherArgs[1]); HBaseAdmin admin = new HBaseAdmin(conf); admin.flush(otherArgs[1]); System.exit((result == true) ? 0 : 1); }
From source file:BigramRelativeFrequency.java
License:Apache License
/** * Runs this tool.//from www .j ava 2 s. c om */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool name: " + BigramRelativeFrequency.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - num reducers: " + reduceTasks); Job job = Job.getInstance(getConf()); job.setJobName(BigramRelativeFrequency.class.getSimpleName()); job.setJarByClass(BigramRelativeFrequency.class); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapOutputKeyClass(PairOfStrings.class); job.setMapOutputValueClass(FloatWritable.class); job.setOutputKeyClass(PairOfStrings.class); job.setOutputValueClass(FloatWritable.class); //job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducer.class); job.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:CategoriesInvertedIndex.java
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Inverted Index"); job.setJarByClass(CategoriesInvertedIndex.class); job.setMapperClass(CategoriesMapper.class); job.setReducerClass(CategoriesReducer.class); job.setCombinerClass(CategoriesReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:First.java
License:Apache License
public int run(String[] args) throws Exception { String outputReducerType = "filesystem"; if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR)) { String[] s = args[0].split("="); if (s != null && s.length == 2) outputReducerType = s[1];//from w ww .j a v a 2 s .c o m } logger.info("output reducer type: " + outputReducerType); for (int i = 2000; i < 2012; i++) { String columnName = Integer.toString(i); getConf().set(CONF_COLUMN_NAME, columnName); Job job = new Job(getConf(), "app"); job.setJarByClass(First.class); job.setMapperClass(TokenizerMapper.class); if (outputReducerType.equalsIgnoreCase("filesystem")) { job.setCombinerClass(ReducerToFilesystem.class); job.setReducerClass(ReducerToFilesystem.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i)); } else { job.setReducerClass(ReducerToCassandra.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(ByteBuffer.class); job.setOutputValueClass(List.class); job.setOutputFormatClass(ColumnFamilyOutputFormat.class); ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY); } job.setInputFormatClass(ColumnFamilyInputFormat.class); ConfigHelper.setRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate() .setColumn_names(Arrays.asList(ByteBuffer.wrap(columnName.getBytes()))); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); job.waitForCompletion(true); } return 0; }
From source file:MapReduce3.java
public static void main(String[] args) throws Exception { ///* ww w . java2s .c om*/ String dst = "hdfs://localhost:9000/data/2006a.csv"; //?? // String dstOut = "hdfs://localhost:9000/mapreduce/result3/1"; String dstOut = "/Users/wendyzhuo/NetBeansProjects/final_Hadoop/src/output3/1"; String outFiles = "/Users/wendyzhuo/NetBeansProjects/final_Hadoop/src/output3/2"; Configuration hadoopConfig = new Configuration(); hadoopConfig.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); hadoopConfig.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); Job job = new Job(hadoopConfig); Job job2 = new Job(hadoopConfig); FileInputFormat.addInputPath(job, new Path(dst)); FileOutputFormat.setOutputPath(job, new Path(dstOut)); FileInputFormat.addInputPath(job2, new Path(dstOut)); FileOutputFormat.setOutputPath(job2, new Path(outFiles)); JobConf map1Conf = new JobConf(false); ChainMapper.addMapper(job, TempMapper.class, LongWritable.class, Text.class, CompositeKey_wd.class, IntWritable.class, map1Conf); JobConf reduceConf = new JobConf(false); ChainReducer.setReducer(job, TempReducer.class, CompositeKey_wd.class, IntWritable.class, CompositeKey_wd.class, IntWritable.class, reduceConf); JobConf map2Conf = new JobConf(false); ChainMapper.addMapper(job2, TempMapper2.class, LongWritable.class, Text.class, IntWritable.class, CompositeKey_wd.class, map2Conf); JobConf map3Conf = new JobConf(false); ChainReducer.setReducer(job2, TempReduce2.class, IntWritable.class, CompositeKey_wd.class, Text.class, IntWritable.class, map3Conf); // // JobClient.runJob(job); //MapperReducer? // job.setMapperClass(TempMapper.class); // // job.setReducerClass(TempReducer.class); //?KeyValue job.setOutputKeyClass(CompositeKey_wd.class); job.setOutputValueClass(IntWritable.class); job2.setMapOutputKeyClass(IntWritable.class); job2.setMapOutputValueClass(CompositeKey_wd.class); // job2.setSortComparatorClass(LongWritable.DecreasingComparator.class); //job? job.waitForCompletion(true); System.out.println("Finished1"); job2.waitForCompletion(true); System.out.println("Finished2"); }