Example usage for org.apache.hadoop.mapreduce Job waitForCompletion

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job waitForCompletion.

Prototype

public boolean waitForCompletion(boolean verbose)
        throws IOException, InterruptedException, ClassNotFoundException

Source Link

Document

Submit the job to the cluster and wait for it to finish.

Usage

From source file:RunPageRankBasic.java

License:Apache License

private void phase2(int i, int j, float missing, String basePath, int numNodes) throws Exception {
    Job job = Job.getInstance(getConf());
    job.setJobName("PageRank:Basic:iteration" + j + ":Phase2");
    job.setJarByClass(RunPageRankBasic.class);

    LOG.info("missing PageRank mass: " + missing);
    LOG.info("number of nodes: " + numNodes);

    String in = basePath + "/iter" + formatter.format(j) + "t";
    String out = basePath + "/iter" + formatter.format(j);

    LOG.info("PageRank: iteration " + j + ": Phase2");
    LOG.info(" - input: " + in);
    LOG.info(" - output: " + out);

    job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
    job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    job.getConfiguration().setFloat("MissingMass", (float) missing);
    job.getConfiguration().setInt("NodeCount", numNodes);

    job.setNumReduceTasks(0);// w  w w . j  a v  a  2 s .  c o  m

    FileInputFormat.setInputPaths(job, new Path(in));
    FileOutputFormat.setOutputPath(job, new Path(out));

    job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNode.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNode.class);

    job.setMapperClass(MapPageRankMassDistributionClass.class);

    FileSystem.get(getConf()).delete(new Path(out), true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
}

From source file:lab2_2.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    FileSystem.get(conf).delete(new Path(args[1]), true);

    Job job = Job.getInstance(conf, "drive time lab 2.1");
    job.setJarByClass(lab2_1.class);
    job.setMapperClass(PartitioningMapper.class);
    job.setPartitionerClass(TypePartitioner.class);
    job.setReducerClass(IdentityReducer.class);
    job.setNumReduceTasks(6);/*from   www. j a va2s.co m*/

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:TweetCategorizer.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    // conf.addResource(new Path("../../env_vars"));

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: TweetCategorizer <in> <out>");
        System.exit(2);/*from  ww  w  .j a va  2 s . co  m*/
    }

    // ----------------------------------------------------------
    //         READ FILTER FILE
    // ----------------------------------------------------------
    // Path pt=new Path("hdfs://pathTofile");
    //Path pt = new Path("../www/hfilters.json");
    String l;
    String line = "";
    //FileSystem fs = FileSystem.get(conf);
    BufferedReader br = new BufferedReader(new FileReader("../www/json/filters.json"));

    try {
        //BufferedReader br = new BufferedReader(new FileReader(fs.open(pt)));

        while ((l = br.readLine()) != null) {
            line += l;
            //System.out.println(line);
        }

    } finally {
        // you should close out the BufferedReader
        br.close();
    }
    // ----------------------------------------------------------
    //         PARSE JSON
    //http://stackoverflow.com/questions/6697147/json-iterate-through-jsonarray
    //http://juliusdavies.ca/json-simple-1.1.1-javadocs/org/json/simple/JSONObject.html
    // ----------------------------------------------------------
    JSONParser parser = new JSONParser();
    JSONObject jsonObject = (JSONObject) parser.parse(line);

    Set<String> filters = jsonObject.keySet();

    // inside each object there is a "name" field, get value and add to keyword_list
    for (String i : filters) {
        JSONObject objects = (JSONObject) jsonObject.get(i);
        String keyword = ((String) objects.get("name")).toLowerCase();
        TokenizerMapper.keyname_list.add(i);
        TokenizerMapper.keyword_list.add(keyword);
    }
    // ----------------------------------------------------------

    Job job = new Job(conf, "categorize tweets");
    job.setJarByClass(TweetCategorizer.class);
    job.setMapperClass(TokenizerMapper.class);
    // job.setCombinerClass(IntSumReducer.class);
    // job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:WordLines.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 3) {
        System.err.println("Usage: wordlines <in> [<in>...] <SearchTerm> <out>");
        System.exit(2);//from   w  w  w  .  j  a  v  a 2  s. c  o m
    }
    conf.set("searchWord", otherArgs[otherArgs.length - 2]);
    Job job = new Job(conf, "word lines");
    job.setJarByClass(WordLines.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    for (int i = 0; i < otherArgs.length - 2; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:WordCountSplitTest.java

License:Apache License

private final static void test(boolean use_shards, boolean use_chunks, Boolean slaveok) throws Exception {
    did_start = false;//from  ww w  .  jav  a  2s.  c  om
    final Configuration conf = new Configuration();
    MongoConfigUtil.setInputURI(conf, "mongodb://localhost:30000/test.lines");
    conf.setBoolean(MongoConfigUtil.SPLITS_USE_SHARDS, use_shards);
    conf.setBoolean(MongoConfigUtil.SPLITS_USE_CHUNKS, use_chunks);
    String output_table = null;
    if (use_chunks) {
        if (use_shards)
            output_table = "with_shards_and_chunks";
        else
            output_table = "with_chunks";
    } else {
        if (use_shards)
            output_table = "with_shards";
        else
            output_table = "no_splits";
    }
    if (slaveok != null) {
        output_table += "_" + slaveok;
    }
    MongoConfigUtil.setOutputURI(conf, "mongodb://localhost:30000/test." + output_table);
    System.out.println("Conf: " + conf);

    final Job job = new Job(conf, "word count " + output_table);

    job.setJarByClass(WordCountSplitTest.class);

    job.setMapperClass(TokenizerMapper.class);

    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setInputFormatClass(MongoInputFormat.class);
    job.setOutputFormatClass(MongoOutputFormat.class);

    final long start = System.currentTimeMillis();
    System.out.println(" ----------------------- running test " + output_table + " --------------------");
    try {
        boolean result = job.waitForCompletion(true);
        System.out.println("job.waitForCompletion( true ) returned " + result);
    } catch (Exception e) {
        System.out.println("job.waitForCompletion( true ) threw Exception");
        e.printStackTrace();
    }
    final long end = System.currentTimeMillis();
    final float seconds = ((float) (end - start)) / 1000;
    java.text.NumberFormat nf = java.text.NumberFormat.getInstance();
    nf.setMaximumFractionDigits(3);
    System.out.println("finished run in " + nf.format(seconds) + " seconds");

    com.mongodb.Mongo m = new com.mongodb.Mongo(
            new com.mongodb.MongoURI("mongodb://localhost:30000/?slaveok=true"));
    com.mongodb.DB db = m.getDB("test");
    com.mongodb.DBCollection coll = db.getCollection(output_table);
    com.mongodb.BasicDBObject query = new com.mongodb.BasicDBObject();
    query.put("_id", "the");
    com.mongodb.DBCursor cur = coll.find(query);
    if (!cur.hasNext())
        System.out.println("FAILURE: could not find count of \'the\'");
    else
        System.out.println("'the' count: " + cur.next());

    //        if (! result)
    //           System.exit(  1 );
}

From source file:LoadClue.java

License:Apache License

/**
 * Main entry point./*from  www.ja  v a 2  s.  c  o  m*/
 * 
 * @param args  The command line parameters.
 * @throws Exception When running the job fails.
 */
public static void main(String[] args) throws Exception {
    Configuration conf = HBaseConfiguration.create();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Wrong number of arguments: " + otherArgs.length);
        System.err.println("Usage: " + NAME + " <input> <tablename>");
        System.exit(-1);
    }
    Job job = configureJob(conf, otherArgs);
    boolean result = job.waitForCompletion(true);
    System.out.println("Flushing table " + otherArgs[1]);
    HBaseAdmin admin = new HBaseAdmin(conf);
    admin.flush(otherArgs[1]);
    System.exit((result == true) ? 0 : 1);
}

From source file:BigramRelativeFrequency.java

License:Apache License

/**
 * Runs this tool.//from  www  .j  ava 2  s.  c  om
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool name: " + BigramRelativeFrequency.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - num reducers: " + reduceTasks);

    Job job = Job.getInstance(getConf());
    job.setJobName(BigramRelativeFrequency.class.getSimpleName());
    job.setJarByClass(BigramRelativeFrequency.class);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(PairOfStrings.class);
    job.setMapOutputValueClass(FloatWritable.class);
    job.setOutputKeyClass(PairOfStrings.class);
    job.setOutputValueClass(FloatWritable.class);
    //job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);
    job.setPartitionerClass(MyPartitioner.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:CategoriesInvertedIndex.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Inverted Index");
    job.setJarByClass(CategoriesInvertedIndex.class);
    job.setMapperClass(CategoriesMapper.class);
    job.setReducerClass(CategoriesReducer.class);
    job.setCombinerClass(CategoriesReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:First.java

License:Apache License

public int run(String[] args) throws Exception {
    String outputReducerType = "filesystem";
    if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR)) {
        String[] s = args[0].split("=");
        if (s != null && s.length == 2)
            outputReducerType = s[1];//from w  ww .j a  v  a  2  s .c  o  m
    }
    logger.info("output reducer type: " + outputReducerType);

    for (int i = 2000; i < 2012; i++) {
        String columnName = Integer.toString(i);
        getConf().set(CONF_COLUMN_NAME, columnName);

        Job job = new Job(getConf(), "app");
        job.setJarByClass(First.class);
        job.setMapperClass(TokenizerMapper.class);

        if (outputReducerType.equalsIgnoreCase("filesystem")) {
            job.setCombinerClass(ReducerToFilesystem.class);
            job.setReducerClass(ReducerToFilesystem.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
            FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i));
        } else {
            job.setReducerClass(ReducerToCassandra.class);

            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
            job.setOutputKeyClass(ByteBuffer.class);
            job.setOutputValueClass(List.class);

            job.setOutputFormatClass(ColumnFamilyOutputFormat.class);

            ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY);
        }

        job.setInputFormatClass(ColumnFamilyInputFormat.class);

        ConfigHelper.setRpcPort(job.getConfiguration(), "9160");
        ConfigHelper.setInitialAddress(job.getConfiguration(), "localhost");
        ConfigHelper.setPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner");
        ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY);
        SlicePredicate predicate = new SlicePredicate()
                .setColumn_names(Arrays.asList(ByteBuffer.wrap(columnName.getBytes())));
        ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate);

        job.waitForCompletion(true);
    }
    return 0;
}

From source file:MapReduce3.java

public static void main(String[] args) throws Exception {

    ///*  ww  w . java2s  .c om*/
    String dst = "hdfs://localhost:9000/data/2006a.csv";

    //??
    //  String dstOut = "hdfs://localhost:9000/mapreduce/result3/1";
    String dstOut = "/Users/wendyzhuo/NetBeansProjects/final_Hadoop/src/output3/1";
    String outFiles = "/Users/wendyzhuo/NetBeansProjects/final_Hadoop/src/output3/2";
    Configuration hadoopConfig = new Configuration();

    hadoopConfig.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());

    hadoopConfig.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());

    Job job = new Job(hadoopConfig);
    Job job2 = new Job(hadoopConfig);

    FileInputFormat.addInputPath(job, new Path(dst));
    FileOutputFormat.setOutputPath(job, new Path(dstOut));
    FileInputFormat.addInputPath(job2, new Path(dstOut));
    FileOutputFormat.setOutputPath(job2, new Path(outFiles));

    JobConf map1Conf = new JobConf(false);
    ChainMapper.addMapper(job, TempMapper.class, LongWritable.class, Text.class, CompositeKey_wd.class,
            IntWritable.class, map1Conf);
    JobConf reduceConf = new JobConf(false);
    ChainReducer.setReducer(job, TempReducer.class, CompositeKey_wd.class, IntWritable.class,
            CompositeKey_wd.class, IntWritable.class, reduceConf);

    JobConf map2Conf = new JobConf(false);
    ChainMapper.addMapper(job2, TempMapper2.class, LongWritable.class, Text.class, IntWritable.class,
            CompositeKey_wd.class, map2Conf);
    JobConf map3Conf = new JobConf(false);
    ChainReducer.setReducer(job2, TempReduce2.class, IntWritable.class, CompositeKey_wd.class, Text.class,
            IntWritable.class, map3Conf);
    //       
    //  JobClient.runJob(job);

    //MapperReducer?
    //        job.setMapperClass(TempMapper.class);
    //
    //        job.setReducerClass(TempReducer.class);

    //?KeyValue
    job.setOutputKeyClass(CompositeKey_wd.class);

    job.setOutputValueClass(IntWritable.class);

    job2.setMapOutputKeyClass(IntWritable.class);
    job2.setMapOutputValueClass(CompositeKey_wd.class);

    //  job2.setSortComparatorClass(LongWritable.DecreasingComparator.class);

    //job?
    job.waitForCompletion(true);
    System.out.println("Finished1");
    job2.waitForCompletion(true);
    System.out.println("Finished2");

}