List of usage examples for org.apache.hadoop.mapreduce Job waitForCompletion
public boolean waitForCompletion(boolean verbose) throws IOException, InterruptedException, ClassNotFoundException
From source file:com.github.ygf.pagerank.PageRank.java
License:Apache License
private void pageRankIteration(int iter, Configuration conf, Path outputDir) throws Exception { // This job performs an iteration of the power iteration method to // compute PageRank. The map task processes each block M_{i,j}, loads // the corresponding stripe j of the vector v_{k-1} and produces the // partial result of the stripe i of the vector v_k. The reduce task // sums all the partial results of v_k and adds the teleportation factor // (the combiner only sums all the partial results). See Section 5.2 // (and 5.2.3 in particular) of Mining of Massive Datasets // (http://infolab.stanford.edu/~ullman/mmds.html) for details. The // output is written in a "vk" subdir of the output dir, where k is the // iteration number. MapFileOutputFormat is used to keep an array of the // stripes of v. Job job = Job.getInstance(conf, "PageRank:Iteration"); job.setJarByClass(PageRank.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(PageRankIterationMapper.class); job.setMapOutputKeyClass(ShortWritable.class); job.setMapOutputValueClass(FloatArrayWritable.class); job.setCombinerClass(PageRankIterationCombiner.class); job.setReducerClass(PageRankIterationReducer.class); job.setOutputFormatClass(MapFileOutputFormat.class); job.setOutputKeyClass(ShortWritable.class); job.setOutputValueClass(FloatArrayWritable.class); FileInputFormat.addInputPath(job, new Path(outputDir, "M")); FileOutputFormat.setOutputPath(job, new Path(outputDir, "v" + iter)); job.waitForCompletion(true); }
From source file:com.github.ygf.pagerank.PageRank.java
License:Apache License
private void summarizeResults(int iter, Configuration conf, Path outputDir) throws Exception { // This job creates a plain text file with the top N PageRanks and the // titles of the pages. Each map task emits the top N PageRanks it // receives, and the reduce task merges the partial results into the // global top N PageRanks. A single reducer is used in the job in order // to have access to all the individual top N PageRanks from the // mappers. The reducer looks up the titles in the index built by // TitleIndex. This job was designed considering that N is small. int topResults = Integer.parseInt(conf.get("pagerank.top_results")); Job job = Job.getInstance(conf, "PageRank:TopN"); job.setJarByClass(PageRank.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(PageRankTopNMapper.class); job.setMapOutputKeyClass(FloatWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(PageRankTopNReducer.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(FloatWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(outputDir, "v" + iter)); FileOutputFormat.setOutputPath(job, new Path(outputDir, "v" + iter + "-top" + topResults)); job.setNumReduceTasks(1);//from w ww . ja v a 2 s . c o m job.waitForCompletion(true); }
From source file:com.github.ygf.pagerank.TitleIndex.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.out.println("Usage: TitleIndex <titles-sorted.txt> <output-dir>"); ToolRunner.printGenericCommandUsage(System.out); return 2; }/*from w ww . ja va 2 s . co m*/ Path titlesFile = new Path(args[0]); Path outputDir = new Path(args[1]); Configuration conf = getConf(); // Do not create _SUCCESS files. MapFileOutputFormat.getReaders calls // try to read the _SUCCESS as another MapFile dir. conf.set("mapreduce.fileoutputcommitter.marksuccessfuljobs", "false"); // This job creates a MapFile of the titles indexed by the page id. // UnsplittableTextInputFormat is used to ensure that the same map task // gets all the lines in the titlesFile and it can count the line // numbers. The number of reduce tasks is set to 0. Job job = Job.getInstance(conf, "TitleIndex"); job.setJarByClass(InLinks.class); job.setInputFormatClass(UnsplittableTextInputFormat.class); job.setMapperClass(TitleIndexMapper.class); job.setOutputFormatClass(MapFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, titlesFile); FileOutputFormat.setOutputPath(job, outputDir); job.setNumReduceTasks(0); job.waitForCompletion(true); return 0; }
From source file:com.goldsaxfoundation.bigdata.Module5.SimpleMapReduce.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "wordcount"); job.setJarByClass(SimpleMapReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); }
From source file:com.google.cloud.bigtable.hbase.TestImport.java
License:Open Source License
@Test @Category(KnownGap.class) public void testMapReduce() throws IOException, ClassNotFoundException, InterruptedException { Admin admin = getConnection().getAdmin(); admin.disableTable(TABLE_NAME);/*from w w w. j av a 2s .co m*/ admin.deleteTable(TABLE_NAME); IntegrationTests.createTable(TABLE_NAME); // Put a value. byte[] rowKey = dataHelper.randomData("testrow-"); byte[] qual = dataHelper.randomData("testQualifier-"); byte[] value = dataHelper.randomData("testValue-"); try (Table oldTable = getConnection().getTable(TABLE_NAME)) { Put put = new Put(rowKey); put.addColumn(COLUMN_FAMILY, qual, value); oldTable.put(put); // Assert the value is there. Get get = new Get(rowKey); Result result = oldTable.get(get); List<Cell> cells = result.listCells(); Assert.assertEquals(1, cells.size()); Assert.assertArrayEquals(CellUtil.cloneValue(cells.get(0)), value); } // Run the export. Configuration conf = getConnection().getConfiguration(); //conf.set("fs.defaultFS", "file:///"); FileSystem dfs = IntegrationTests.getMiniCluster().getFileSystem(); String tempDir = "hdfs://" + dfs.getCanonicalServiceName() + "/tmp/backup"; String[] args = new String[] { TABLE_NAME.getNameAsString(), tempDir }; Job job = Export.createSubmittableJob(conf, args); // So it looks for jars in the local FS, not HDFS. job.getConfiguration().set("fs.defaultFS", "file:///"); Assert.assertTrue(job.waitForCompletion(true)); // Create new table. TableName newTableName = IntegrationTests.newTestTableName(); try (Table newTable = getConnection().getTable(newTableName)) { // Change for method in IntegrationTests HColumnDescriptor hcd = new HColumnDescriptor(IntegrationTests.COLUMN_FAMILY); HTableDescriptor htd = new HTableDescriptor(newTableName); htd.addFamily(hcd); admin.createTable(htd); // Run the import. args = new String[] { newTableName.getNameAsString(), tempDir }; job = Import.createSubmittableJob(conf, args); job.getConfiguration().set("fs.defaultFS", "file:///"); Assert.assertTrue(job.waitForCompletion(true)); // Assert the value is there. Get get = new Get(rowKey); Result result = newTable.get(get); List<Cell> cells = result.listCells(); Assert.assertEquals(1, cells.size()); Assert.assertArrayEquals(CellUtil.cloneValue(cells.get(0)), value); } finally { admin.disableTable(newTableName); admin.deleteTable(newTableName); } }
From source file:com.google.cloud.bigtable.mapreduce.Export.java
License:Apache License
/** * Main entry point.//from w w w.j a va2s.c om * * @param args The command line parameters. * @throws java.lang.Exception When running the job fails. */ public static void main(String[] args) throws Exception { Configuration conf = HBaseConfiguration.create(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { usage("Wrong number of arguments: " + otherArgs.length); System.exit(-1); } if (conf.get(BigtableOptionsFactory.PROJECT_ID_KEY) == null) { usage("Must specify the property " + BigtableOptionsFactory.PROJECT_ID_KEY); System.exit(-1); } if (conf.get(BigtableOptionsFactory.INSTANCE_ID_KEY) == null) { usage("Must specify the property" + BigtableOptionsFactory.INSTANCE_ID_KEY); System.exit(-1); } Job job = createSubmittableJob(conf, otherArgs); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.google.cloud.bigtable.mapreduce.Import.java
License:Open Source License
@Override public int run(String[] args) throws Exception { String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs(); if (otherArgs.length < 2) { usage("Wrong number of arguments: " + otherArgs.length); return -1; }/*from w w w. j a v a2 s.c om*/ String inputVersionString = System.getProperty(ResultSerialization.IMPORT_FORMAT_VER); if (inputVersionString != null) { getConf().set(ResultSerialization.IMPORT_FORMAT_VER, inputVersionString); } Job job = createSubmittableJob(getConf(), otherArgs); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:com.gsinnovations.howdah.Driver.java
License:Apache License
public static void job(Path input, Path output, int numReduceTasks) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); Job job = new Job(conf); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(TikaMapper.class); //job.setCombinerClass(KMeansCombiner.class); //job.setReducerClass(KMeansReducer.class); job.setNumReduceTasks(numReduceTasks); FileInputFormat.addInputPath(job, input); FileOutputFormat.setOutputPath(job, output); job.setJarByClass(Driver.class); HadoopUtil.overwriteOutput(output);//from w ww . j a v a 2s . c o m job.waitForCompletion(true); }
From source file:com.gsvic.csmr.CSMRBase.java
License:Apache License
public static void generatePairs(String in, String out) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); path = out;/*from ww w. j a v a 2 s .c o m*/ Job job; Path input, output; input = new Path(in); output = new Path(path + "/CSMRPairs"); job = new Job(conf); job.setJobName("CSMR Pairs Job"); job.setJarByClass(CSMRBase.class); FileInputFormat.addInputPath(job, input); FileOutputFormat.setOutputPath(job, output); job.setMapperClass(CSMRMapper.class); job.setReducerClass(CSMRReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(DocumentWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(VectorArrayWritable.class); job.waitForCompletion(true); }
From source file:com.gsvic.csmr.CSMRBase.java
License:Apache License
public static void StartCSMR() throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job; job = new Job(conf); job.setJobName("CSMR Cosine Similarity Job"); job.setJarByClass(CSMRBase.class); FileInputFormat.addInputPath(job, new Path(path + "/CSMRPairs/part-r-00000")); FileOutputFormat.setOutputPath(job, new Path(path + "/Results")); job.setMapperClass(Mapper.class); job.setReducerClass(CosineSimilarityReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(VectorArrayWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); System.exit(job.waitForCompletion(true) ? 1 : 0); }