Example usage for org.apache.hadoop.mapreduce Job setJarByClass

List of usage examples for org.apache.hadoop.mapreduce Job setJarByClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setJarByClass.

Prototype

public void setJarByClass(Class<?> cls) 

Source Link

Document

Set the Jar by finding where a given class came from.

Usage

From source file:be.ugent.intec.halvade.MapReduceRunner.java

License:Open Source License

protected int runHalvadeJob(Configuration halvadeConf, String tmpOutDir, int jobType)
        throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException {
    String pipeline = "";
    if (jobType == HalvadeResourceManager.RNA_SHMEM_PASS2) {
        HalvadeConf.setIsPass2(halvadeConf, true);
        HalvadeResourceManager.setJobResources(halvadeOpts, halvadeConf, jobType, false,
                halvadeOpts.useBamInput);
        pipeline = RNA_PASS2;/*  ww  w . j a  va  2s .  co  m*/
    } else if (jobType == HalvadeResourceManager.DNA) {
        HalvadeResourceManager.setJobResources(halvadeOpts, halvadeConf, jobType, false,
                halvadeOpts.useBamInput);
        pipeline = DNA;
    }
    HalvadeConf.setOutDir(halvadeConf, tmpOutDir);
    FileSystem outFs = FileSystem.get(new URI(tmpOutDir), halvadeConf);
    if (outFs.exists(new Path(tmpOutDir))) {
        Logger.INFO("The output directory \'" + tmpOutDir + "\' already exists.");
        Logger.INFO("ERROR: Please remove this directory before trying again.");
        System.exit(-2);
    }
    if (halvadeOpts.useBamInput)
        setHeaderFile(halvadeOpts.in, halvadeConf);

    Job halvadeJob = Job.getInstance(halvadeConf, "Halvade" + pipeline);
    halvadeJob.addCacheArchive(new URI(halvadeOpts.halvadeBinaries));
    halvadeJob.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.HalvadeMapper.class);
    addInputFiles(halvadeOpts.in, halvadeConf, halvadeJob);
    FileOutputFormat.setOutputPath(halvadeJob, new Path(tmpOutDir));

    if (jobType == HalvadeResourceManager.RNA_SHMEM_PASS2) {
        halvadeJob.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.StarAlignPassXMapper.class);
        halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.RnaGATKReducer.class);
    } else if (jobType == HalvadeResourceManager.DNA) {
        halvadeJob.setMapperClass(halvadeOpts.alignmentTools[halvadeOpts.aln]);
        halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.DnaGATKReducer.class);
    }

    halvadeJob.setMapOutputKeyClass(ChromosomeRegion.class);
    halvadeJob.setMapOutputValueClass(SAMRecordWritable.class);
    halvadeJob.setInputFormatClass(HalvadeTextInputFormat.class);
    halvadeJob.setOutputKeyClass(Text.class);
    if (halvadeOpts.mergeBam) {
        halvadeJob.setSortComparatorClass(SimpleChrRegionComparator.class);
        halvadeJob.setOutputValueClass(SAMRecordWritable.class);
    } else {
        halvadeJob.setPartitionerClass(ChrRgPartitioner.class);
        halvadeJob.setSortComparatorClass(ChrRgSortComparator.class);
        halvadeJob.setGroupingComparatorClass(ChrRgGroupingComparator.class);
        halvadeJob.setOutputValueClass(VariantContextWritable.class);
    }

    if (halvadeOpts.justAlign)
        halvadeJob.setNumReduceTasks(0);
    else if (halvadeOpts.mergeBam) {
        halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.BamMergeReducer.class);
        halvadeJob.setNumReduceTasks(1);
    } else
        halvadeJob.setNumReduceTasks(halvadeOpts.reduces);

    if (halvadeOpts.useBamInput) {
        halvadeJob.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.AlignedBamMapper.class);
        halvadeJob.setInputFormatClass(BAMInputFormat.class);
    }

    return runTimedJob(halvadeJob, "Halvade Job");
}

From source file:be.ugent.intec.halvade.MapReduceRunner.java

License:Open Source License

protected int runCombineJob(String halvadeOutDir, String mergeOutDir, boolean featureCount)
        throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException {
    Configuration combineConf = getConf();
    if (!halvadeOpts.out.endsWith("/"))
        halvadeOpts.out += "/";
    HalvadeConf.setInputDir(combineConf, halvadeOutDir);
    HalvadeConf.setOutDir(combineConf, mergeOutDir);
    FileSystem outFs = FileSystem.get(new URI(mergeOutDir), combineConf);
    if (outFs.exists(new Path(mergeOutDir))) {
        Logger.INFO("The output directory \'" + mergeOutDir + "\' already exists.");
        Logger.INFO("ERROR: Please remove this directory before trying again.");
        System.exit(-2);// w  w  w  . j ava  2 s  .  c om
    }
    HalvadeConf.setReportAllVariant(combineConf, halvadeOpts.reportAll);
    HalvadeResourceManager.setJobResources(halvadeOpts, combineConf, HalvadeResourceManager.COMBINE, false,
            halvadeOpts.useBamInput);
    Job combineJob = Job.getInstance(combineConf, "HalvadeCombineVCF");
    combineJob.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineMapper.class);

    addInputFiles(halvadeOutDir, combineConf, combineJob, featureCount ? ".count" : ".vcf");
    FileOutputFormat.setOutputPath(combineJob, new Path(mergeOutDir));

    combineJob.setMapperClass(featureCount ? be.ugent.intec.halvade.hadoop.mapreduce.HTSeqCombineMapper.class
            : be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineMapper.class);
    combineJob.setMapOutputKeyClass(featureCount ? Text.class : LongWritable.class);
    combineJob.setMapOutputValueClass(featureCount ? LongWritable.class : VariantContextWritable.class);
    combineJob.setInputFormatClass(featureCount ? TextInputFormat.class : VCFInputFormat.class);
    combineJob.setNumReduceTasks(1);
    combineJob.setReducerClass(featureCount ? be.ugent.intec.halvade.hadoop.mapreduce.HTSeqCombineReducer.class
            : be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineReducer.class);
    combineJob.setOutputKeyClass(Text.class);
    combineJob.setOutputValueClass(featureCount ? LongWritable.class : VariantContextWritable.class);

    return runTimedJob(combineJob, (featureCount ? "featureCounts" : "VCF") + " Combine Job");
}

From source file:biglayer.AutoCoder.java

License:Apache License

/**
 * Runs this tool.//from  www  .jav  a 2 s .  c  o  m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    /*if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
       System.out.println("args: " + Arrays.toString(args));
       HelpFormatter formatter = new HelpFormatter();
       formatter.setWidth(120);
       formatter.printHelp(this.getClass().getName(), options);
       ToolRunner.printGenericCommandUsage(System.out);
       return -1;
    }*/

    //String inputPath = cmdline.getOptionValue(INPUT);
    //String outputPath = cmdline.getOptionValue(OUTPUT);

    String inputPath = "qiwang321/MNIST-mingled-key/part*";
    String outputPath = "shangfu/layeroutput";

    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool: " + AutoCoder.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of reducers: " + reduceTasks);
    Configuration conf = getConf();

    conf.setInt("num_reduce_task", reduceTasks);
    conf.set("sidepath", outputPath + "_side/");

    Job job0 = Job.getInstance(conf);
    job0.setJobName(AutoCoder.class.getSimpleName());
    job0.setJarByClass(AutoCoder.class);
    job0.setNumReduceTasks(reduceTasks);

    job0.getConfiguration().setInt("layer_ind", 0);

    FileInputFormat.setInputPaths(job0, new Path(inputPath));
    FileOutputFormat.setOutputPath(job0, new Path(outputPath + "_0"));

    job0.setInputFormatClass(KeyValueTextInputFormat.class);
    job0.setOutputFormatClass(SequenceFileOutputFormat.class);

    job0.setMapOutputKeyClass(PairOfInts.class);
    job0.setMapOutputValueClass(ModelNode.class);
    job0.setOutputKeyClass(PairOfInts.class);
    job0.setOutputValueClass(ModelNode.class);

    job0.setMapperClass(MyMapper0.class);
    job0.setReducerClass(MyReducer0.class);
    job0.setPartitionerClass(MyPartitioner.class);
    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath + "_0");
    FileSystem.get(getConf()).delete(outputDir, true);
    long startTime = System.currentTimeMillis();
    long codeStart = System.currentTimeMillis();
    double codeTimeSum = 0;
    job0.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0;

    for (int iterations = 1; iterations < GlobalUtil.NUM_LAYER + 1; iterations++) {
        Job job1 = Job.getInstance(conf);
        job1.setJobName(AutoCoder.class.getSimpleName());
        job1.setJarByClass(AutoCoder.class);
        job1.setNumReduceTasks(reduceTasks);
        job1.getConfiguration().setInt("layer_ind", iterations);
        FileInputFormat.setInputPaths(job1, new Path(outputPath + "_" + (iterations - 1)));
        FileOutputFormat.setOutputPath(job1, new Path(outputPath + "_" + iterations + "_train"));

        LOG.info("Tool: " + AutoCoder.class.getSimpleName());
        LOG.info(" - input path: " + outputPath + "_" + (iterations - 1));
        LOG.info(" - output path: " + outputPath + "_" + iterations + "_train");
        LOG.info(" - number of reducers: " + reduceTasks);

        job1.setInputFormatClass(SequenceFileInputFormat.class);
        job1.setOutputFormatClass(SequenceFileOutputFormat.class);

        job1.setMapOutputKeyClass(PairOfInts.class);
        job1.setMapOutputValueClass(ModelNode.class);
        job1.setOutputKeyClass(PairOfInts.class);
        job1.setOutputValueClass(ModelNode.class);

        job1.setMapperClass(MyMapper.class);
        job1.setReducerClass(MyReducer_Train.class);
        job1.setPartitionerClass(MyPartitioner.class);
        // Delete the output directory if it exists already.
        outputDir = new Path(outputPath + "_" + iterations + "_train");
        FileSystem.get(getConf()).delete(outputDir, true);
        startTime = System.currentTimeMillis();
        job1.waitForCompletion(true);
        LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
        codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0;

        Job job2 = Job.getInstance(conf);
        job2.setJobName(AutoCoder.class.getSimpleName());
        job2.setJarByClass(AutoCoder.class);
        job2.setNumReduceTasks(reduceTasks);
        job2.getConfiguration().setInt("layer_ind", iterations);
        FileInputFormat.setInputPaths(job2, new Path(outputPath + "_" + (iterations + "_train")));
        FileOutputFormat.setOutputPath(job2, new Path(outputPath + "_" + iterations));

        LOG.info("Tool: " + AutoCoder.class.getSimpleName());
        LOG.info(" - input path: " + outputPath + "_" + iterations + "_train");
        LOG.info(" - output path: " + outputPath + "_" + iterations);
        LOG.info(" - number of reducers: " + reduceTasks);

        job2.setInputFormatClass(SequenceFileInputFormat.class);
        job2.setOutputFormatClass(SequenceFileOutputFormat.class);

        job2.setMapOutputKeyClass(PairOfInts.class);
        job2.setMapOutputValueClass(ModelNode.class);
        job2.setOutputKeyClass(PairOfInts.class);
        job2.setOutputValueClass(ModelNode.class);

        job2.setMapperClass(MyMapper.class);
        job2.setReducerClass(MyReducer_GenData.class);
        job2.setPartitionerClass(MyPartitioner.class);
        // Delete the output directory if it exists already.
        outputDir = new Path(outputPath + "_" + iterations);
        FileSystem.get(getConf()).delete(outputDir, true);
        startTime = System.currentTimeMillis();
        job2.waitForCompletion(true);
        LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
        codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0;

    }

    LOG.info(" - input path: " + outputPath + "_" + GlobalUtil.NUM_LAYER);
    LOG.info(" - output path: " + outputPath);
    reduceTasks = 1;
    LOG.info(" - number of reducers: " + reduceTasks);

    Job job_super = Job.getInstance(conf);
    job_super.setJobName(AutoCoder.class.getSimpleName());
    job_super.setJarByClass(AutoCoder.class);
    job_super.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job_super, new Path(outputPath + "_" + GlobalUtil.NUM_LAYER));
    FileOutputFormat.setOutputPath(job_super, new Path(outputPath));

    job_super.setInputFormatClass(SequenceFileInputFormat.class);
    job_super.setOutputFormatClass(SequenceFileOutputFormat.class);

    job_super.setMapOutputKeyClass(PairOfInts.class);
    job_super.setMapOutputValueClass(ModelNode.class);
    job_super.setOutputKeyClass(NullWritable.class);
    job_super.setOutputValueClass(NullWritable.class);

    job_super.setMapperClass(MyMapper_Super.class);
    job_super.setReducerClass(MyReducer_Super.class);
    job_super.setPartitionerClass(MyPartitioner.class);

    // Delete the output directory if it exists already.
    outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    startTime = System.currentTimeMillis();
    job_super.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0;

    Log.info("Final Time: " + ((System.currentTimeMillis() - codeStart) / 1000.0) + " seconds,  " + codeTimeSum
            + " seconds.");
    //prepareNextIteration(inputPath0, outputPath,iterations,conf,reduceTasks);

    return 0;
}

From source file:bigmodel.AutoCoderLocal.java

License:Apache License

/**
 * Runs this tool.//from   w w  w .  j av a  2s .  c o  m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT) + "/part-r-00000";
    String outputPath = cmdline.getOptionValue(OUTPUT);
    String dataPath = cmdline.getOptionValue(INPUT) + "/common";
    //String inputPath = "/home/qiwang321/mapreduce-data/data/in-mingled1-5/part*";
    //String outputPath = "output";
    //String dataPath = "/home/qiwang321/mapreduce-data/data/in-mingled1-5/common";
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool: " + AutoCoderLocal.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of reducers: " + reduceTasks);
    Configuration conf = getConf();
    initialParameters(conf);

    conf.set("dataPath", dataPath);

    Job job = Job.getInstance(conf);
    job.setJobName(AutoCoderLocal.class.getSimpleName());
    job.setJarByClass(AutoCoderLocal.class);
    // set the path of the information of k clusters in this iteration
    job.getConfiguration().set("sidepath", inputPath + "/side_output");
    job.setNumReduceTasks(reduceTasks);

    dataShuffle();

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileInputFormat.setMinInputSplitSize(job, 1000 * 1024 * 1024);
    FileInputFormat.setMaxInputSplitSize(job, 1000 * 1024 * 1024);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(ModelNode.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(SuperModel.class);

    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);
    job.setPartitionerClass(MyPartitioner.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    //prepareNextIteration(inputPath0, outputPath,iterations,conf,reduceTasks);

    return 0;
}

From source file:bigsidemodel.AutoCoder.java

License:Apache License

/**
 * Runs this tool./*from  w w  w . j  av  a  2 s.c o m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    /*if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
       System.out.println("args: " + Arrays.toString(args));
       HelpFormatter formatter = new HelpFormatter();
       formatter.setWidth(120);
       formatter.printHelp(this.getClass().getName(), options);
       ToolRunner.printGenericCommandUsage(System.out);
       return -1;
    }*/

    //String inputPath = cmdline.getOptionValue(INPUT);
    //String outputPath = cmdline.getOptionValue(OUTPUT);

    String inputPath = "qiwang321/best5-mingled-key-56x56/part*";
    String outputPath = "shangfu/bigoutput";
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool: " + AutoCoder.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath + "0");
    LOG.info(" - number of reducers: " + reduceTasks);
    Configuration conf = getConf();
    conf.setInt("num_reduce_task", reduceTasks);
    conf.set("sidepath", outputPath + "_side/");

    Job job0 = Job.getInstance(conf);
    job0.setJobName(AutoCoder.class.getSimpleName());
    job0.setJarByClass(AutoCoder.class);
    // set the path of the information of k clusters in this iteration
    job0.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job0, new Path(inputPath));
    FileOutputFormat.setOutputPath(job0, new Path(outputPath + "0"));

    job0.setInputFormatClass(KeyValueTextInputFormat.class);
    job0.setOutputFormatClass(SequenceFileOutputFormat.class);

    job0.setMapOutputKeyClass(PairOfInts.class);
    job0.setMapOutputValueClass(DataNode.class);
    job0.setOutputKeyClass(PairOfInts.class);
    job0.setOutputValueClass(DataNode.class);

    job0.setMapperClass(MyMapper0.class);
    job0.setReducerClass(MyReducer0.class);
    job0.setPartitionerClass(MyPartitioner.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath + "0");
    FileSystem.get(getConf()).delete(outputDir, true);

    long codeStart = System.currentTimeMillis();
    double jobTimeSum = 0;

    long startTime = System.currentTimeMillis();
    job0.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    jobTimeSum += (System.currentTimeMillis() - startTime) / 1000.0;

    //======= Job 1
    LOG.info("Tool: " + AutoCoder.class.getSimpleName());
    LOG.info(" - input path: " + outputPath + "0");
    LOG.info(" - output path: " + outputPath + "1");
    LOG.info(" - number of reducers: " + 1);
    int nModel = reduceTasks;
    reduceTasks = 1;

    Job job1 = Job.getInstance(conf);
    job1.setJobName(AutoCoder.class.getSimpleName());
    job1.setJarByClass(AutoCoder.class);
    // set the path of the information of k clusters in this iteration
    job1.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job1, new Path(outputPath + "0"));
    FileOutputFormat.setOutputPath(job1, new Path(outputPath + "1"));

    job1.setInputFormatClass(SequenceFileInputFormat.class);
    job1.setOutputFormatClass(SequenceFileOutputFormat.class);

    job1.setMapOutputKeyClass(PairOfInts.class);
    job1.setMapOutputValueClass(DataNode.class);
    job1.setOutputKeyClass(NullWritable.class);
    job1.setOutputValueClass(NullWritable.class);

    job1.setMapperClass(MyMapper1.class);
    job1.setReducerClass(MyReducer1.class);
    job1.setPartitionerClass(MyPartitioner.class);

    // Delete the output directory if it exists already.
    outputDir = new Path(outputPath + "1");
    FileSystem.get(getConf()).delete(outputDir, true);

    startTime = System.currentTimeMillis();
    job1.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    jobTimeSum += (System.currentTimeMillis() - startTime) / 1000.0;
    LOG.info("Final Time: " + ((System.currentTimeMillis() - codeStart) / 1000.0) + " seconds,  " + jobTimeSum
            + " seconds.");

    return 0;
}

From source file:binningbycategories.BinningbyCategories.java

/**
 * @param args the command line arguments
 * @throws java.lang.Exception/* ww w  . j  a  v  a2  s . c  om*/
 */
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "Binning");
    job.setJarByClass(BinningbyCategories.class);
    job.setMapperClass(YouTubeBinMapper.class);
    job.setNumReduceTasks(0);

    TextInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    // Configure the MultipleOutputs by adding an output called "bins"
    // With the proper output format and mapper key/value pairs
    MultipleOutputs.addNamedOutput(job, "bins", TextOutputFormat.class, Text.class, NullWritable.class);

    // Enable the counters for the job
    // If there is a significant number of different named outputs, this
    // should be disabled
    MultipleOutputs.setCountersEnabled(job, true);

    System.exit(job.waitForCompletion(true) ? 0 : 2);
}

From source file:BinningByState.Driver.java

public static void main(String args[]) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "BinningByState");
    MultipleOutputs.addNamedOutput(job, "bins", TextOutputFormat.class, Text.class, NullWritable.class);
    MultipleOutputs.setCountersEnabled(job, true);
    job.setJarByClass(Driver.class);
    job.setMapperClass(BinningMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setNumReduceTasks(0);//  w  w  w  .ja  va 2s. c  o  m
    //        job.setOutputKeyClass(Text.class);
    //        job.setOutputValueClass(NullWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:boostingPL.driver.AdaBoostPLDriver.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    int status = commandAnalysis(args);
    if (status != 0) {
        return status;
    }//  www .  j a va2s  . c o  m

    @SuppressWarnings("deprecation")
    Job job = new Job(getConf());
    job.setJobName("AdaBoostPL:" + runModel + " " + dataPath.toString() + " " + modelPath.toString() + " "
            + numLinesPerMap + " " + numIterations);
    job.setJarByClass(AdaBoostPLDriver.class);

    job.setInputFormatClass(NLineInputFormat.class);
    NLineInputFormat.addInputPath(job, dataPath);
    NLineInputFormat.setNumLinesPerSplit(job, numLinesPerMap);

    if (runModel.equals("train")) {
        job.setMapperClass(AdaBoostPLMapper.class);

        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(ClassifierWritable.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(ClassifierWritable.class);

        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        SequenceFileOutputFormat.setOutputPath(job, modelPath);
    } else {
        job.setMapperClass(AdaBoostPLTestMapper.class);
        job.setReducerClass(AdaBoostPLTestReducer.class);
        job.setOutputFormatClass(NullOutputFormat.class);

        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(NullWritable.class);
    }

    Configuration conf = job.getConfiguration();
    conf.set("BoostingPL.boostingName", "AdaBoost");
    conf.set("BoostingPL.numIterations", String.valueOf(numIterations));
    conf.set("BoostingPL.modelPath", modelPath.toString());
    if (metadataPath == null) {
        conf.set("BoostingPL.metadata", dataPath.toString() + ".metadata");
    } else {
        conf.set("BoostingPL.metadata", metadataPath.toString());
    }
    if (outputFolder != null) {
        conf.set("BoostingPL.outputFolder", outputFolder.toString());
    }

    LOG.info(StringUtils.arrayToString(args));
    return job.waitForCompletion(true) == true ? 0 : -1;
}

From source file:boostingPL.driver.SAMMEPLDriver.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    int status = commandAnalysis(args);
    if (status != 0) {
        return status;
    }/*from   ww  w. j ava 2 s.  c  om*/

    @SuppressWarnings("deprecation")
    Job job = new Job(getConf());
    job.setJobName("SAMMEPL:" + runModel + " " + dataPath.toString() + " " + modelPath.toString() + " "
            + numLinesPerMap + " " + numIterations);
    job.setJarByClass(SAMMEPLDriver.class);

    job.setInputFormatClass(NLineInputFormat.class);
    NLineInputFormat.addInputPath(job, dataPath);
    NLineInputFormat.setNumLinesPerSplit(job, numLinesPerMap);
    FileSystem fs = modelPath.getFileSystem(getConf());
    if (fs.exists(modelPath)) {
        fs.delete(modelPath, true);
    }
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, modelPath);

    if (runModel.equals("train")) {
        job.setMapperClass(AdaBoostPLMapper.class);

        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(ClassifierWritable.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(ClassifierWritable.class);
    } else {
        job.setMapperClass(AdaBoostPLTestMapper.class);
        job.setReducerClass(AdaBoostPLTestReducer.class);
        job.setOutputFormatClass(NullOutputFormat.class);

        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(NullWritable.class);
    }

    Configuration conf = job.getConfiguration();
    conf.set("BoostingPL.boostingName", "SAMME");
    conf.set("BoostingPL.numIterations", String.valueOf(numIterations));
    conf.set("BoostingPL.modelPath", modelPath.toString());
    if (metadataPath == null) {
        conf.set("BoostingPL.metadata", dataPath.toString() + ".metadata");
    } else {
        conf.set("BoostingPL.metadata", metadataPath.toString());
    }
    if (outputFolder != null) {
        conf.set("BoostingPL.outputFolder", outputFolder.toString());
    }

    LOG.info(StringUtils.arrayToString(args));
    return job.waitForCompletion(true) == true ? 0 : -1;
}

From source file:br.com.lassal.nqueens.grid.job.GenerateSolutions.java

public int run(String[] args) throws Exception {
    // Configuration processed by ToolRunner
    Configuration conf = getConf();

    // Create a JobConf using the processed conf
    Job job = new Job(conf, "nqueens-gensolutions");
    job.setJarByClass(GenerateSolutions.class);

    // este job nao possui reduce tasks
    job.setNumReduceTasks(0);/*from   w  ww .  j  a  va2 s .  c  om*/

    int queensNumber = Integer.parseInt(args[0]);

    this.setWorkingFolder(queensNumber, job);

    job.setMapperClass(br.com.lassal.nqueens.grid.mapreduce.NQueenPartialShotMapper.class);

    // Submit the job, then poll for progress until the job is complete
    boolean result = job.waitForCompletion(true);
    return result ? 0 : 1;

}