Example usage for org.apache.hadoop.mapreduce Job waitForCompletion

List of usage examples for org.apache.hadoop.mapreduce Job waitForCompletion

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job waitForCompletion.

Prototype

public boolean waitForCompletion(boolean verbose)
        throws IOException, InterruptedException, ClassNotFoundException 

Source Link

Document

Submit the job to the cluster and wait for it to finish.

Usage

From source file:basic.PartitionGraph.java

License:Apache License

/**
 * Runs this tool./*from  w  w w.j av a2  s  . co  m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(new Option(RANGE, "use range partitioner"));

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of partitions")
            .create(NUM_PARTITIONS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)
            || !cmdline.hasOption(NUM_PARTITIONS)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inPath = cmdline.getOptionValue(INPUT);
    String outPath = cmdline.getOptionValue(OUTPUT);
    int nodeCount = Integer.parseInt(cmdline.getOptionValue(NUM_NODES));
    int numParts = Integer.parseInt(cmdline.getOptionValue(NUM_PARTITIONS));
    boolean useRange = cmdline.hasOption(RANGE);

    LOG.info("Tool name: " + PartitionGraph.class.getSimpleName());
    LOG.info(" - input dir: " + inPath);
    LOG.info(" - output dir: " + outPath);
    LOG.info(" - num partitions: " + numParts);
    LOG.info(" - node cnt: " + nodeCount);
    LOG.info(" - use range partitioner: " + useRange);

    Configuration conf = getConf();
    conf.setInt("NodeCount", nodeCount);

    Job job = Job.getInstance(conf);
    job.setJobName(PartitionGraph.class.getSimpleName() + ":" + inPath);
    job.setJarByClass(PartitionGraph.class);

    job.setNumReduceTasks(numParts);

    FileInputFormat.setInputPaths(job, new Path(inPath));
    FileOutputFormat.setOutputPath(job, new Path(outPath));

    job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNode.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNode.class);

    if (useRange) {
        job.setPartitionerClass(RangePartitioner.class);
    }

    FileSystem.get(conf).delete(new Path(outPath), true);

    job.waitForCompletion(true);

    return 0;
}

From source file:bb.BranchAndBound.java

License:Apache License

public static void main(String[] args) throws Exception {
    /*Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {/*from w  w w .  j  a v  a  2  s .co  m*/
       System.err.println("Usage: branchandbound <input> <output>");
       System.exit(2);
    }
    Job job = new Job(conf, "branch and bound");
    job.setJarByClass(BranchAndBound.class);
    job.setMapperClass(BBMapper.class);
    //      job.setCombinerClass(IntSumReducer.class);
    //      job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);*/
    int n;
    String[] inputargs = new GenericOptionsParser(new Configuration(), args).getRemainingArgs();
    if (inputargs.length != 2) {
        System.err.println("Usage: branchandbound <data directory> <n>");
        System.exit(2);
    }
    n = Integer.parseInt(inputargs[1]);
    String dataDir = inputargs[0];
    String prev_output = dataDir + "/input";
    /*      for( int i = 1 ; i <= n ; i++ ) {
             for( int j = 0 ; j < 2 ; j++ ) {
    String input = prev_output ;
    String output = inputargs[1] + "/iteration" + i + "_" + j ;
    Job job = getJob(input, output, i, j) ;
    job.waitForCompletion(true) ; // if failed ????
    prev_output = output;
             }
          }
    */
    //prev_output = dataDir + "/output" + "/iteration" + 17;
    long totalNodes = 0;
    long searchedNodes = 0;
    long cutbyDEE = 0;
    int mapTotal = 768;
    for (int i = 0; i <= n; i++) {
        iterRound = i;
        String input = prev_output;
        String output = dataDir + "/output" + "/iteration" + i;
        Job job = getJob(input, output, dataDir, i);
        if (i == n) {
            numReduceTasks = 1;
        }
        //job.setNumMapTasks(200);
        if (numOutput > mapTotal) {
            FileInputFormat.setMaxInputSplitSize(job, 10 * (8 * n + 10) + numOutput * (8 * n + 10) / 3000);
            FileInputFormat.setMinInputSplitSize(job, Math.max((8 * n + 10), numOutput * (8 * n + 10) / 5000));
        } else {
            FileInputFormat.setMaxInputSplitSize(job, (8 * n + 10));
        }
        /*
        if( i == 0 ) {
        job.setNumReduceTasks(1);
        } else {
        job.setNumReduceTasks(0);
        }
         */
        job.setNumReduceTasks(0);
        job.waitForCompletion(true); // if failed ????
        prev_output = output;
        Counters counters = job.getCounters();
        Counter counter = counters.findCounter("MyCounter", "Map Output Counter");
        numOutput = counter.getValue();
        totalNodes += numOutput;
        cutbyDEE += counters.findCounter("MyCounter", "Cut By DEE").getValue();
        searchedNodes += totalNodes + cutbyDEE + counters.findCounter("MyCounter", "Cut By Bound").getValue();
        System.out.println(numOutput + " " + (8 * n + 10) + " " + (numOutput * (8 * n + 10) / 768));
    }
    System.out.println("searchedNodes " + searchedNodes);
    System.out.println(totalNodes);
    System.out.println("cut by dee " + cutbyDEE);
}

From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java

License:Apache License

private static void runJob(Job job, String jobName)
        throws ClassNotFoundException, IOException, InterruptedException {
    long start = System.currentTimeMillis();
    job.waitForCompletion(true);
    long end = System.currentTimeMillis();
    System.out.println("Job " + jobName + " took " + (end - start) / 1000 + "s");
}

From source file:be.ugent.intec.halvade.MapReduceRunner.java

License:Open Source License

protected int runTimedJob(Job job, String jobname)
        throws IOException, InterruptedException, ClassNotFoundException {
    if (halvadeOpts.dryRun)
        return 0;
    Logger.DEBUG("Started " + jobname);
    Timer timer = new Timer();
    timer.start();// w w  w .  j a v a2s  .com
    int ret = job.waitForCompletion(true) ? 0 : 1;
    timer.stop();
    Logger.DEBUG("Finished " + jobname + " [runtime: " + timer.getFormattedElapsedTime() + "]");
    return ret;
}

From source file:biglayer.AutoCoder.java

License:Apache License

/**
 * Runs this tool./*from   www . j  a  v  a 2  s  . com*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    /*if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
       System.out.println("args: " + Arrays.toString(args));
       HelpFormatter formatter = new HelpFormatter();
       formatter.setWidth(120);
       formatter.printHelp(this.getClass().getName(), options);
       ToolRunner.printGenericCommandUsage(System.out);
       return -1;
    }*/

    //String inputPath = cmdline.getOptionValue(INPUT);
    //String outputPath = cmdline.getOptionValue(OUTPUT);

    String inputPath = "qiwang321/MNIST-mingled-key/part*";
    String outputPath = "shangfu/layeroutput";

    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool: " + AutoCoder.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of reducers: " + reduceTasks);
    Configuration conf = getConf();

    conf.setInt("num_reduce_task", reduceTasks);
    conf.set("sidepath", outputPath + "_side/");

    Job job0 = Job.getInstance(conf);
    job0.setJobName(AutoCoder.class.getSimpleName());
    job0.setJarByClass(AutoCoder.class);
    job0.setNumReduceTasks(reduceTasks);

    job0.getConfiguration().setInt("layer_ind", 0);

    FileInputFormat.setInputPaths(job0, new Path(inputPath));
    FileOutputFormat.setOutputPath(job0, new Path(outputPath + "_0"));

    job0.setInputFormatClass(KeyValueTextInputFormat.class);
    job0.setOutputFormatClass(SequenceFileOutputFormat.class);

    job0.setMapOutputKeyClass(PairOfInts.class);
    job0.setMapOutputValueClass(ModelNode.class);
    job0.setOutputKeyClass(PairOfInts.class);
    job0.setOutputValueClass(ModelNode.class);

    job0.setMapperClass(MyMapper0.class);
    job0.setReducerClass(MyReducer0.class);
    job0.setPartitionerClass(MyPartitioner.class);
    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath + "_0");
    FileSystem.get(getConf()).delete(outputDir, true);
    long startTime = System.currentTimeMillis();
    long codeStart = System.currentTimeMillis();
    double codeTimeSum = 0;
    job0.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0;

    for (int iterations = 1; iterations < GlobalUtil.NUM_LAYER + 1; iterations++) {
        Job job1 = Job.getInstance(conf);
        job1.setJobName(AutoCoder.class.getSimpleName());
        job1.setJarByClass(AutoCoder.class);
        job1.setNumReduceTasks(reduceTasks);
        job1.getConfiguration().setInt("layer_ind", iterations);
        FileInputFormat.setInputPaths(job1, new Path(outputPath + "_" + (iterations - 1)));
        FileOutputFormat.setOutputPath(job1, new Path(outputPath + "_" + iterations + "_train"));

        LOG.info("Tool: " + AutoCoder.class.getSimpleName());
        LOG.info(" - input path: " + outputPath + "_" + (iterations - 1));
        LOG.info(" - output path: " + outputPath + "_" + iterations + "_train");
        LOG.info(" - number of reducers: " + reduceTasks);

        job1.setInputFormatClass(SequenceFileInputFormat.class);
        job1.setOutputFormatClass(SequenceFileOutputFormat.class);

        job1.setMapOutputKeyClass(PairOfInts.class);
        job1.setMapOutputValueClass(ModelNode.class);
        job1.setOutputKeyClass(PairOfInts.class);
        job1.setOutputValueClass(ModelNode.class);

        job1.setMapperClass(MyMapper.class);
        job1.setReducerClass(MyReducer_Train.class);
        job1.setPartitionerClass(MyPartitioner.class);
        // Delete the output directory if it exists already.
        outputDir = new Path(outputPath + "_" + iterations + "_train");
        FileSystem.get(getConf()).delete(outputDir, true);
        startTime = System.currentTimeMillis();
        job1.waitForCompletion(true);
        LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
        codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0;

        Job job2 = Job.getInstance(conf);
        job2.setJobName(AutoCoder.class.getSimpleName());
        job2.setJarByClass(AutoCoder.class);
        job2.setNumReduceTasks(reduceTasks);
        job2.getConfiguration().setInt("layer_ind", iterations);
        FileInputFormat.setInputPaths(job2, new Path(outputPath + "_" + (iterations + "_train")));
        FileOutputFormat.setOutputPath(job2, new Path(outputPath + "_" + iterations));

        LOG.info("Tool: " + AutoCoder.class.getSimpleName());
        LOG.info(" - input path: " + outputPath + "_" + iterations + "_train");
        LOG.info(" - output path: " + outputPath + "_" + iterations);
        LOG.info(" - number of reducers: " + reduceTasks);

        job2.setInputFormatClass(SequenceFileInputFormat.class);
        job2.setOutputFormatClass(SequenceFileOutputFormat.class);

        job2.setMapOutputKeyClass(PairOfInts.class);
        job2.setMapOutputValueClass(ModelNode.class);
        job2.setOutputKeyClass(PairOfInts.class);
        job2.setOutputValueClass(ModelNode.class);

        job2.setMapperClass(MyMapper.class);
        job2.setReducerClass(MyReducer_GenData.class);
        job2.setPartitionerClass(MyPartitioner.class);
        // Delete the output directory if it exists already.
        outputDir = new Path(outputPath + "_" + iterations);
        FileSystem.get(getConf()).delete(outputDir, true);
        startTime = System.currentTimeMillis();
        job2.waitForCompletion(true);
        LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
        codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0;

    }

    LOG.info(" - input path: " + outputPath + "_" + GlobalUtil.NUM_LAYER);
    LOG.info(" - output path: " + outputPath);
    reduceTasks = 1;
    LOG.info(" - number of reducers: " + reduceTasks);

    Job job_super = Job.getInstance(conf);
    job_super.setJobName(AutoCoder.class.getSimpleName());
    job_super.setJarByClass(AutoCoder.class);
    job_super.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job_super, new Path(outputPath + "_" + GlobalUtil.NUM_LAYER));
    FileOutputFormat.setOutputPath(job_super, new Path(outputPath));

    job_super.setInputFormatClass(SequenceFileInputFormat.class);
    job_super.setOutputFormatClass(SequenceFileOutputFormat.class);

    job_super.setMapOutputKeyClass(PairOfInts.class);
    job_super.setMapOutputValueClass(ModelNode.class);
    job_super.setOutputKeyClass(NullWritable.class);
    job_super.setOutputValueClass(NullWritable.class);

    job_super.setMapperClass(MyMapper_Super.class);
    job_super.setReducerClass(MyReducer_Super.class);
    job_super.setPartitionerClass(MyPartitioner.class);

    // Delete the output directory if it exists already.
    outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    startTime = System.currentTimeMillis();
    job_super.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0;

    Log.info("Final Time: " + ((System.currentTimeMillis() - codeStart) / 1000.0) + " seconds,  " + codeTimeSum
            + " seconds.");
    //prepareNextIteration(inputPath0, outputPath,iterations,conf,reduceTasks);

    return 0;
}

From source file:bigmodel.AutoCoderLocal.java

License:Apache License

/**
 * Runs this tool.//  w w  w .j  a va  2s.c  om
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT) + "/part-r-00000";
    String outputPath = cmdline.getOptionValue(OUTPUT);
    String dataPath = cmdline.getOptionValue(INPUT) + "/common";
    //String inputPath = "/home/qiwang321/mapreduce-data/data/in-mingled1-5/part*";
    //String outputPath = "output";
    //String dataPath = "/home/qiwang321/mapreduce-data/data/in-mingled1-5/common";
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool: " + AutoCoderLocal.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of reducers: " + reduceTasks);
    Configuration conf = getConf();
    initialParameters(conf);

    conf.set("dataPath", dataPath);

    Job job = Job.getInstance(conf);
    job.setJobName(AutoCoderLocal.class.getSimpleName());
    job.setJarByClass(AutoCoderLocal.class);
    // set the path of the information of k clusters in this iteration
    job.getConfiguration().set("sidepath", inputPath + "/side_output");
    job.setNumReduceTasks(reduceTasks);

    dataShuffle();

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileInputFormat.setMinInputSplitSize(job, 1000 * 1024 * 1024);
    FileInputFormat.setMaxInputSplitSize(job, 1000 * 1024 * 1024);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(ModelNode.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(SuperModel.class);

    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);
    job.setPartitionerClass(MyPartitioner.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    //prepareNextIteration(inputPath0, outputPath,iterations,conf,reduceTasks);

    return 0;
}

From source file:bigsidemodel.AutoCoder.java

License:Apache License

/**
 * Runs this tool.//from   w ww  .ja va 2s . c  om
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    /*if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
       System.out.println("args: " + Arrays.toString(args));
       HelpFormatter formatter = new HelpFormatter();
       formatter.setWidth(120);
       formatter.printHelp(this.getClass().getName(), options);
       ToolRunner.printGenericCommandUsage(System.out);
       return -1;
    }*/

    //String inputPath = cmdline.getOptionValue(INPUT);
    //String outputPath = cmdline.getOptionValue(OUTPUT);

    String inputPath = "qiwang321/best5-mingled-key-56x56/part*";
    String outputPath = "shangfu/bigoutput";
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool: " + AutoCoder.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath + "0");
    LOG.info(" - number of reducers: " + reduceTasks);
    Configuration conf = getConf();
    conf.setInt("num_reduce_task", reduceTasks);
    conf.set("sidepath", outputPath + "_side/");

    Job job0 = Job.getInstance(conf);
    job0.setJobName(AutoCoder.class.getSimpleName());
    job0.setJarByClass(AutoCoder.class);
    // set the path of the information of k clusters in this iteration
    job0.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job0, new Path(inputPath));
    FileOutputFormat.setOutputPath(job0, new Path(outputPath + "0"));

    job0.setInputFormatClass(KeyValueTextInputFormat.class);
    job0.setOutputFormatClass(SequenceFileOutputFormat.class);

    job0.setMapOutputKeyClass(PairOfInts.class);
    job0.setMapOutputValueClass(DataNode.class);
    job0.setOutputKeyClass(PairOfInts.class);
    job0.setOutputValueClass(DataNode.class);

    job0.setMapperClass(MyMapper0.class);
    job0.setReducerClass(MyReducer0.class);
    job0.setPartitionerClass(MyPartitioner.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath + "0");
    FileSystem.get(getConf()).delete(outputDir, true);

    long codeStart = System.currentTimeMillis();
    double jobTimeSum = 0;

    long startTime = System.currentTimeMillis();
    job0.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    jobTimeSum += (System.currentTimeMillis() - startTime) / 1000.0;

    //======= Job 1
    LOG.info("Tool: " + AutoCoder.class.getSimpleName());
    LOG.info(" - input path: " + outputPath + "0");
    LOG.info(" - output path: " + outputPath + "1");
    LOG.info(" - number of reducers: " + 1);
    int nModel = reduceTasks;
    reduceTasks = 1;

    Job job1 = Job.getInstance(conf);
    job1.setJobName(AutoCoder.class.getSimpleName());
    job1.setJarByClass(AutoCoder.class);
    // set the path of the information of k clusters in this iteration
    job1.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job1, new Path(outputPath + "0"));
    FileOutputFormat.setOutputPath(job1, new Path(outputPath + "1"));

    job1.setInputFormatClass(SequenceFileInputFormat.class);
    job1.setOutputFormatClass(SequenceFileOutputFormat.class);

    job1.setMapOutputKeyClass(PairOfInts.class);
    job1.setMapOutputValueClass(DataNode.class);
    job1.setOutputKeyClass(NullWritable.class);
    job1.setOutputValueClass(NullWritable.class);

    job1.setMapperClass(MyMapper1.class);
    job1.setReducerClass(MyReducer1.class);
    job1.setPartitionerClass(MyPartitioner.class);

    // Delete the output directory if it exists already.
    outputDir = new Path(outputPath + "1");
    FileSystem.get(getConf()).delete(outputDir, true);

    startTime = System.currentTimeMillis();
    job1.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    jobTimeSum += (System.currentTimeMillis() - startTime) / 1000.0;
    LOG.info("Final Time: " + ((System.currentTimeMillis() - codeStart) / 1000.0) + " seconds,  " + jobTimeSum
            + " seconds.");

    return 0;
}

From source file:binningbycategories.BinningbyCategories.java

/**
 * @param args the command line arguments
 * @throws java.lang.Exception//w w w  .ja  v a  2  s . c  o  m
 */
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "Binning");
    job.setJarByClass(BinningbyCategories.class);
    job.setMapperClass(YouTubeBinMapper.class);
    job.setNumReduceTasks(0);

    TextInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    // Configure the MultipleOutputs by adding an output called "bins"
    // With the proper output format and mapper key/value pairs
    MultipleOutputs.addNamedOutput(job, "bins", TextOutputFormat.class, Text.class, NullWritable.class);

    // Enable the counters for the job
    // If there is a significant number of different named outputs, this
    // should be disabled
    MultipleOutputs.setCountersEnabled(job, true);

    System.exit(job.waitForCompletion(true) ? 0 : 2);
}

From source file:BinningByState.Driver.java

public static void main(String args[]) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "BinningByState");
    MultipleOutputs.addNamedOutput(job, "bins", TextOutputFormat.class, Text.class, NullWritable.class);
    MultipleOutputs.setCountersEnabled(job, true);
    job.setJarByClass(Driver.class);
    job.setMapperClass(BinningMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setNumReduceTasks(0);//www.  j a  v  a2 s  . c o m
    //        job.setOutputKeyClass(Text.class);
    //        job.setOutputValueClass(NullWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:boostingPL.driver.AdaBoostPLDriver.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    int status = commandAnalysis(args);
    if (status != 0) {
        return status;
    }/*w  w  w  .j a  v a 2 s.  co m*/

    @SuppressWarnings("deprecation")
    Job job = new Job(getConf());
    job.setJobName("AdaBoostPL:" + runModel + " " + dataPath.toString() + " " + modelPath.toString() + " "
            + numLinesPerMap + " " + numIterations);
    job.setJarByClass(AdaBoostPLDriver.class);

    job.setInputFormatClass(NLineInputFormat.class);
    NLineInputFormat.addInputPath(job, dataPath);
    NLineInputFormat.setNumLinesPerSplit(job, numLinesPerMap);

    if (runModel.equals("train")) {
        job.setMapperClass(AdaBoostPLMapper.class);

        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(ClassifierWritable.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(ClassifierWritable.class);

        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        SequenceFileOutputFormat.setOutputPath(job, modelPath);
    } else {
        job.setMapperClass(AdaBoostPLTestMapper.class);
        job.setReducerClass(AdaBoostPLTestReducer.class);
        job.setOutputFormatClass(NullOutputFormat.class);

        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(NullWritable.class);
    }

    Configuration conf = job.getConfiguration();
    conf.set("BoostingPL.boostingName", "AdaBoost");
    conf.set("BoostingPL.numIterations", String.valueOf(numIterations));
    conf.set("BoostingPL.modelPath", modelPath.toString());
    if (metadataPath == null) {
        conf.set("BoostingPL.metadata", dataPath.toString() + ".metadata");
    } else {
        conf.set("BoostingPL.metadata", metadataPath.toString());
    }
    if (outputFolder != null) {
        conf.set("BoostingPL.outputFolder", outputFolder.toString());
    }

    LOG.info(StringUtils.arrayToString(args));
    return job.waitForCompletion(true) == true ? 0 : -1;
}