Example usage for org.apache.hadoop.mapreduce Job waitForCompletion

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job waitForCompletion.

Prototype

public boolean waitForCompletion(boolean verbose)
        throws IOException, InterruptedException, ClassNotFoundException

Source Link

Document

Submit the job to the cluster and wait for it to finish.

Usage

From source file:com.github.ygf.pagerank.PageRank.java

License:Apache License

private void pageRankIteration(int iter, Configuration conf, Path outputDir) throws Exception {

    // This job performs an iteration of the power iteration method to
    // compute PageRank. The map task processes each block M_{i,j}, loads 
    // the corresponding stripe j of the vector v_{k-1} and produces the
    // partial result of the stripe i of the vector v_k. The reduce task
    // sums all the partial results of v_k and adds the teleportation factor
    // (the combiner only sums all the partial results). See Section 5.2
    // (and 5.2.3 in particular) of Mining of Massive Datasets
    // (http://infolab.stanford.edu/~ullman/mmds.html) for details. The
    // output is written in a "vk" subdir of the output dir, where k is the
    // iteration number. MapFileOutputFormat is used to keep an array of the
    // stripes of v.

    Job job = Job.getInstance(conf, "PageRank:Iteration");

    job.setJarByClass(PageRank.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapperClass(PageRankIterationMapper.class);
    job.setMapOutputKeyClass(ShortWritable.class);
    job.setMapOutputValueClass(FloatArrayWritable.class);
    job.setCombinerClass(PageRankIterationCombiner.class);
    job.setReducerClass(PageRankIterationReducer.class);
    job.setOutputFormatClass(MapFileOutputFormat.class);
    job.setOutputKeyClass(ShortWritable.class);
    job.setOutputValueClass(FloatArrayWritable.class);
    FileInputFormat.addInputPath(job, new Path(outputDir, "M"));
    FileOutputFormat.setOutputPath(job, new Path(outputDir, "v" + iter));

    job.waitForCompletion(true);
}

From source file:com.github.ygf.pagerank.PageRank.java

License:Apache License

private void summarizeResults(int iter, Configuration conf, Path outputDir) throws Exception {

    // This job creates a plain text file with the top N PageRanks and the
    // titles of the pages. Each map task emits the top N PageRanks it
    // receives, and the reduce task merges the partial results into the
    // global top N PageRanks. A single reducer is used in the job in order
    // to have access to all the individual top N PageRanks from the
    // mappers. The reducer looks up the titles in the index built by
    // TitleIndex. This job was designed considering that N is small.

    int topResults = Integer.parseInt(conf.get("pagerank.top_results"));

    Job job = Job.getInstance(conf, "PageRank:TopN");

    job.setJarByClass(PageRank.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapperClass(PageRankTopNMapper.class);
    job.setMapOutputKeyClass(FloatWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setReducerClass(PageRankTopNReducer.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(FloatWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(outputDir, "v" + iter));
    FileOutputFormat.setOutputPath(job, new Path(outputDir, "v" + iter + "-top" + topResults));

    job.setNumReduceTasks(1);//from  w ww . ja v a 2  s . c  o m
    job.waitForCompletion(true);
}

From source file:com.github.ygf.pagerank.TitleIndex.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.out.println("Usage: TitleIndex <titles-sorted.txt> <output-dir>");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }/*from  w ww  . ja va  2 s .  co m*/

    Path titlesFile = new Path(args[0]);
    Path outputDir = new Path(args[1]);

    Configuration conf = getConf();

    // Do not create _SUCCESS files. MapFileOutputFormat.getReaders calls
    // try to read the _SUCCESS as another MapFile dir.
    conf.set("mapreduce.fileoutputcommitter.marksuccessfuljobs", "false");

    // This job creates a MapFile of the titles indexed by the page id.
    // UnsplittableTextInputFormat is used to ensure that the same map task
    // gets all the lines in the titlesFile and it can count the line
    // numbers. The number of reduce tasks is set to 0.

    Job job = Job.getInstance(conf, "TitleIndex");

    job.setJarByClass(InLinks.class);
    job.setInputFormatClass(UnsplittableTextInputFormat.class);
    job.setMapperClass(TitleIndexMapper.class);
    job.setOutputFormatClass(MapFileOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, titlesFile);
    FileOutputFormat.setOutputPath(job, outputDir);

    job.setNumReduceTasks(0);
    job.waitForCompletion(true);

    return 0;
}

From source file:com.goldsaxfoundation.bigdata.Module5.SimpleMapReduce.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job = new Job(conf, "wordcount");
    job.setJarByClass(SimpleMapReduce.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);
}

From source file:com.google.cloud.bigtable.hbase.TestImport.java

License:Open Source License

@Test
@Category(KnownGap.class)
public void testMapReduce() throws IOException, ClassNotFoundException, InterruptedException {
    Admin admin = getConnection().getAdmin();

    admin.disableTable(TABLE_NAME);/*from   w  w w. j  av  a 2s .co m*/
    admin.deleteTable(TABLE_NAME);
    IntegrationTests.createTable(TABLE_NAME);
    // Put a value.
    byte[] rowKey = dataHelper.randomData("testrow-");
    byte[] qual = dataHelper.randomData("testQualifier-");
    byte[] value = dataHelper.randomData("testValue-");

    try (Table oldTable = getConnection().getTable(TABLE_NAME)) {
        Put put = new Put(rowKey);
        put.addColumn(COLUMN_FAMILY, qual, value);
        oldTable.put(put);

        // Assert the value is there.
        Get get = new Get(rowKey);
        Result result = oldTable.get(get);
        List<Cell> cells = result.listCells();
        Assert.assertEquals(1, cells.size());
        Assert.assertArrayEquals(CellUtil.cloneValue(cells.get(0)), value);
    }

    // Run the export.
    Configuration conf = getConnection().getConfiguration();

    //conf.set("fs.defaultFS", "file:///");
    FileSystem dfs = IntegrationTests.getMiniCluster().getFileSystem();
    String tempDir = "hdfs://" + dfs.getCanonicalServiceName() + "/tmp/backup";

    String[] args = new String[] { TABLE_NAME.getNameAsString(), tempDir };
    Job job = Export.createSubmittableJob(conf, args);
    // So it looks for jars in the local FS, not HDFS.
    job.getConfiguration().set("fs.defaultFS", "file:///");
    Assert.assertTrue(job.waitForCompletion(true));

    // Create new table.
    TableName newTableName = IntegrationTests.newTestTableName();
    try (Table newTable = getConnection().getTable(newTableName)) {
        // Change for method in IntegrationTests
        HColumnDescriptor hcd = new HColumnDescriptor(IntegrationTests.COLUMN_FAMILY);
        HTableDescriptor htd = new HTableDescriptor(newTableName);
        htd.addFamily(hcd);
        admin.createTable(htd);

        // Run the import.
        args = new String[] { newTableName.getNameAsString(), tempDir };
        job = Import.createSubmittableJob(conf, args);
        job.getConfiguration().set("fs.defaultFS", "file:///");
        Assert.assertTrue(job.waitForCompletion(true));

        // Assert the value is there.
        Get get = new Get(rowKey);
        Result result = newTable.get(get);
        List<Cell> cells = result.listCells();
        Assert.assertEquals(1, cells.size());
        Assert.assertArrayEquals(CellUtil.cloneValue(cells.get(0)), value);
    } finally {
        admin.disableTable(newTableName);
        admin.deleteTable(newTableName);
    }
}

From source file:com.google.cloud.bigtable.mapreduce.Export.java

License:Apache License

/**
 * Main entry point.//from w  w w.j a va2s.c  om
 *
 * @param args  The command line parameters.
 * @throws java.lang.Exception When running the job fails.
 */
public static void main(String[] args) throws Exception {
    Configuration conf = HBaseConfiguration.create();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        usage("Wrong number of arguments: " + otherArgs.length);
        System.exit(-1);
    }
    if (conf.get(BigtableOptionsFactory.PROJECT_ID_KEY) == null) {
        usage("Must specify the property " + BigtableOptionsFactory.PROJECT_ID_KEY);
        System.exit(-1);
    }
    if (conf.get(BigtableOptionsFactory.INSTANCE_ID_KEY) == null) {
        usage("Must specify the property" + BigtableOptionsFactory.INSTANCE_ID_KEY);
        System.exit(-1);
    }

    Job job = createSubmittableJob(conf, otherArgs);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.google.cloud.bigtable.mapreduce.Import.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
    if (otherArgs.length < 2) {
        usage("Wrong number of arguments: " + otherArgs.length);
        return -1;
    }/*from  w  w w. j a  v  a2  s.c om*/
    String inputVersionString = System.getProperty(ResultSerialization.IMPORT_FORMAT_VER);
    if (inputVersionString != null) {
        getConf().set(ResultSerialization.IMPORT_FORMAT_VER, inputVersionString);
    }
    Job job = createSubmittableJob(getConf(), otherArgs);
    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.gsinnovations.howdah.Driver.java

License:Apache License

public static void job(Path input, Path output, int numReduceTasks)
        throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = new Configuration();

    Job job = new Job(conf);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapperClass(TikaMapper.class);
    //job.setCombinerClass(KMeansCombiner.class);
    //job.setReducerClass(KMeansReducer.class);
    job.setNumReduceTasks(numReduceTasks);

    FileInputFormat.addInputPath(job, input);
    FileOutputFormat.setOutputPath(job, output);

    job.setJarByClass(Driver.class);
    HadoopUtil.overwriteOutput(output);//from   w  ww  .  j a  v  a  2s .  c o m
    job.waitForCompletion(true);

}

From source file:com.gsvic.csmr.CSMRBase.java

License:Apache License

public static void generatePairs(String in, String out)
        throws IOException, InterruptedException, ClassNotFoundException {

    Configuration conf = new Configuration();
    path = out;/*from   ww w. j  a  v  a 2  s  .c  o m*/
    Job job;
    Path input, output;
    input = new Path(in);
    output = new Path(path + "/CSMRPairs");

    job = new Job(conf);
    job.setJobName("CSMR Pairs Job");
    job.setJarByClass(CSMRBase.class);

    FileInputFormat.addInputPath(job, input);
    FileOutputFormat.setOutputPath(job, output);

    job.setMapperClass(CSMRMapper.class);
    job.setReducerClass(CSMRReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(DocumentWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(VectorArrayWritable.class);

    job.waitForCompletion(true);
}

From source file:com.gsvic.csmr.CSMRBase.java

License:Apache License

public static void StartCSMR() throws IOException, InterruptedException, ClassNotFoundException {

    Configuration conf = new Configuration();
    Job job;
    job = new Job(conf);
    job.setJobName("CSMR Cosine Similarity Job");
    job.setJarByClass(CSMRBase.class);

    FileInputFormat.addInputPath(job, new Path(path + "/CSMRPairs/part-r-00000"));
    FileOutputFormat.setOutputPath(job, new Path(path + "/Results"));
    job.setMapperClass(Mapper.class);
    job.setReducerClass(CosineSimilarityReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(VectorArrayWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    System.exit(job.waitForCompletion(true) ? 1 : 0);

}