Example usage for org.apache.hadoop.mapreduce Job setNumReduceTasks

List of usage examples for org.apache.hadoop.mapreduce Job setNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setNumReduceTasks.

Prototype

public void setNumReduceTasks(int tasks) throws IllegalStateException 

Source Link

Document

Set the number of reduce tasks for the job.

Usage

From source file:DAAL.SVD.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();

    /* Put shared libraries into the distributed cache */
    DistributedCache.createSymlink(conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libJavaAPI.so#libJavaAPI.so"), conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbb.so.2#libtbb.so.2"), conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libiomp5.so#libiomp5.so"), conf);

    Job job = new Job(conf, "SVD Job (step1 and step2)");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path("/Hadoop/SVD/step2"));

    job.setMapperClass(SVDStep1Mapper.class);
    job.setReducerClass(SVDStep2Reducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(WriteableData.class);

    job.setJarByClass(SVD.class);

    job.waitForCompletion(true);//www  . j a v a  2 s.c  o m

    Job job1 = new Job(conf, "SVD Job (step3)");

    FileInputFormat.setInputPaths(job1, new Path("/Hadoop/SVD/step2"));
    FileOutputFormat.setOutputPath(job1, new Path(args[1]));

    job1.setMapperClass(SVDStep3Mapper.class);
    job1.setNumReduceTasks(0);

    job1.setInputFormatClass(SequenceFileInputFormat.class);

    job1.setJarByClass(SVD.class);

    return job1.waitForCompletion(true) ? 0 : 1;

}

From source file:DataCubeRefresh.Grep.java

License:Apache License

/**
 * Run function.//from ww  w. j  a v  a2s  .c  o m
 * @param args arguments
 * @return error code
 * @throws Exception if an exception occurs
 */
public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.out.println("Grep <inUrl> <outUrl> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    Job grepJob = new Job(getConf());
    Job sortJob = new Job(getConf());

    String tempStreamTag = UUID.randomUUID().toString();

    try {
        grepJob.setJobName("grep-search");

        TextHStreamingInputFormat.addInputStream(grepJob, 1000, 600, -1, "", false, args[0]);
        HStreamingJobConf.setIsStreamingJob(grepJob, true);
        grepJob.setMapperClass(RegexMapper.class);
        grepJob.getConfiguration().set("mapred.mapper.regex", args[2]);
        if (args.length == 4)
            grepJob.getConfiguration().set("mapred.mapper.regex.group", args[3]);

        grepJob.setCombinerClass(LongSumReducer.class);
        grepJob.setReducerClass(LongSumReducer.class);
        grepJob.setInputFormatClass(TextHStreamingInputFormat.class);
        grepJob.setOutputFormatClass(TextHStreamingOutputFormat.class);
        HStreamingOutputFormat.setOutputStreamTag(grepJob, tempStreamTag);
        grepJob.setOutputKeyClass(Text.class);
        grepJob.setOutputValueClass(LongWritable.class);
        grepJob.setJobName("grep-search");
        grepJob.setJarByClass(this.getClass());

        grepJob.submit();

        sortJob.setJobName("grep-sort");
        sortJob.setInputFormatClass(TextHStreamingInputFormat.class);
        HStreamingJobConf.setIsStreamingJob(sortJob, true);

        // add previous stream partition/reducer 0 as input. 
        HStreamingInputFormat.addInputStreamTag(sortJob, tempStreamTag, 0);

        sortJob.setMapperClass(InverseTextMapper.class);
        sortJob.setNumReduceTasks(1); // single output stream
        sortJob.setOutputFormatClass(TextHStreamingOutputFormat.class);
        TextHStreamingOutputFormat.setOutputPath(sortJob, args[1]);
        sortJob.setSortComparatorClass( // sort by decreasing fre
                LongWritable.DecreasingComparator.class);
        sortJob.setJarByClass(this.getClass());
        sortJob.submit();

        return sortJob.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
        e.printStackTrace();
        try {
            grepJob.killJob();
        } catch (Exception e1) {
            // ignore
        }
        try {
            sortJob.killJob();
        } catch (Exception e2) {
            // ignore
        }
    }
    return 0;
}

From source file:de.bankmark.bigbench.queries.q18.MRlinearRegression.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    int NUMBER_REDUCERS = 1;
    Job job = Job.getInstance(getConf());

    job.setJarByClass(MRlinearRegression.class);
    if (args.length != 2) {
        usage(job);//from w w w.  j  a v a2s.  c o m
        return 2;
    }
    System.out.println("input:");
    job.setJobName(MRlinearRegression.class.getSimpleName() + "::" + args[0] + "->" + args[1]);

    Path input = new Path(args[0]);
    Path output = new Path(args[1]);
    System.out.println("Input: " + input + "  out -> " + output);
    FileInputFormat.addInputPath(job, input);
    FileOutputFormat.setOutputPath(job, output);

    job.setMapperClass(MRlinearRegression.LRmapper.class);
    job.setReducerClass(MRlinearRegression.LRreducer.class);
    job.setNumReduceTasks(NUMBER_REDUCERS);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(DoubleArrayWritable.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:de.bankmark.bigbench.queries.q28.ToSequenceFile.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Job job = Job.getInstance(getConf());

    job.setJarByClass(ToSequenceFile.class);
    if (args.length != 2) {
        usage(job);/*from   w w  w  .  j a v  a2s .  c  om*/
        return 2;
    }
    System.out.println("input:");
    job.setJobName(ToSequenceFile.class.getSimpleName() + "::" + args[0] + "->" + args[1]);

    Path input = new Path(args[0]);
    Path output = new Path(args[1]);
    System.out.println("Input: " + input + "  out -> " + output);
    FileInputFormat.addInputPath(job, input);
    SequenceFileOutputFormat.setOutputPath(job, output);

    job.setMapperClass(IdentityMapper.class);
    job.setReducerClass(Reducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(0);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:de.gesundkrank.wikipedia.hadoop.util.RepackToMapFile.java

License:Open Source License

public int run(String basePath, String outputPath, boolean checkNew, boolean skipRedirect) throws Exception {
    Configuration configuration = getConf();
    configuration.setBoolean("skipRedirect", skipRedirect);

    LOGGER.info("Tool name: " + getClass().getSimpleName());

    Job job = Job.getInstance(configuration, getClass().getSimpleName());
    job.setJarByClass(getClass());//from ww w.j  a v  a2 s  .c o  m

    job.setMapperClass(WikiMapper.class);
    job.setInputFormatClass(WikiInputFormat.class);
    job.setOutputFormatClass(MapFileOutputFormat.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(WikiRevisionWritable.class);

    WikiDumpLoader wikiDumpLoader = new WikiDumpLoader(checkNew);
    wikiDumpLoader.addWikiDump(job, basePath);

    MapFileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setNumReduceTasks(1);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:de.hpi.fgis.hdrs.mapreduce.examples.PredicateAnalysis.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    job.setJarByClass(PredicateAnalysis.class);
    job.setJobName("Predicate Analysis");

    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(LongWritable.class);

    //job.setOutputKeyClass(Text.class);
    //job.setOutputValueClass(Text.class);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(LongWritable.class);

    job.setMapperClass(Map.class);
    //job.setReducerClass(Reduce.class);

    job.setNumReduceTasks(0);

    job.setInputFormatClass(TripleInputFormat.class);
    //job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    TripleInputFormat.setStoreAddress(job, args[0]);
    TripleInputFormat.setIndex(job, "POS");
    TripleInputFormat.setPattern(job, Triple.newPattern(null, args[1], null));
    TripleInputFormat.setAggregationLevel2(job);

    SequenceFileOutputFormat.setOutputPath(job, new Path(args[2]));

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:de.hpi.fgis.hdrs.mapreduce.examples.TripleCount.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    job.setJarByClass(TripleCount.class);
    job.setJobName("TripleCount");

    job.setMapOutputKeyClass(ByteWritable.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setNumReduceTasks(1);

    job.setInputFormatClass(TripleInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    int argc = 0;

    TripleInputFormat.setStoreAddress(job, args[argc++]);
    TripleInputFormat.setIndex(job, args[argc++]);
    if ("-p".equals(args[argc])) {
        argc++;/*from w  w w .ja va  2  s.c om*/
        String s = args[argc++];
        String p = args[argc++];
        String o = args[argc++];
        if ("*".equals(s))
            s = null;
        if ("*".equals(p))
            p = null;
        if ("*".equals(o))
            o = null;
        TripleInputFormat.setPattern(job, Triple.newPattern(s, p, o));
    } else {
        TextOutputFormat.setOutputPath(job, new Path(args[argc]));
    }

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:de.hpi.fgis.hdrs.mapreduce.IndexLoader.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (3 != args.length) {
        System.out.println(/*w w  w.  j  a v a2s.c o  m*/
                "Usage: IndexLoader <StoreAddres> <SourceIndex> " + "<TargetIndex1>[,<TargetIndex2>...]");
        return 0;
    }

    Job job = new Job(getConf());
    job.setJarByClass(IndexLoader.class);
    job.setJobName("HDRS Index Loader");

    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(TripleOutputFormat.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(TripleOutputFormat.class);

    job.setMapperClass(Map.class);
    job.setNumReduceTasks(0);

    job.setInputFormatClass(TripleInputFormat.class);
    job.setOutputFormatClass(TripleOutputFormat.class);

    TripleInputFormat.setStoreAddress(job, args[0]);
    TripleInputFormat.setIndex(job, args[1]);

    TripleOutputFormat.setStoreAddress(job, args[0]);
    TripleOutputFormat.setOutputIndexes(job, args[2]);

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:de.l3s.common.features.hadoop.TimeSeriesJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Options opts = new Options();

    Option jnameOpt = OptionBuilder.withArgName("job-name").hasArg(true).withDescription("Timeseries analysis")
            .create(JOB_NAME);/* www  . jav  a  2 s  . c  o m*/

    Option inputOpt = OptionBuilder.withArgName("input-path").hasArg(true)
            .withDescription("Timeseries file path (required)").create(INPUT_OPT);

    Option outputOpt = OptionBuilder.withArgName("output-path").hasArg(true)
            .withDescription("output file path (required)").create(OUTPUT_OPT);

    Option reduceOpt = OptionBuilder.withArgName("reduce-no").hasArg(true)
            .withDescription("number of reducer nodes").create(REDUCE_NO);

    Option rmOpt = OptionBuilder.withArgName("remove-out").hasArg(false)
            .withDescription("remove the output then create again before writing files onto it")
            .create(REMOVE_OUTPUT);

    Option cOpt = OptionBuilder.withArgName("compress-option").hasArg(true)
            .withDescription("compression option").create(COMPRESS_OPT);

    opts.addOption(jnameOpt);
    opts.addOption(inputOpt);
    opts.addOption(reduceOpt);
    opts.addOption(outputOpt);
    opts.addOption(rmOpt);
    opts.addOption(cOpt);
    CommandLine cl;
    CommandLineParser parser = new GnuParser();
    try {
        cl = parser.parse(opts, args);
    } catch (ParseException e) {
        System.err.println("Error parsing command line: " + e.getMessage());
        return -1;
    }

    if (!cl.hasOption(INPUT_OPT) || !cl.hasOption(OUTPUT_OPT)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(getClass().getName(), opts);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    int reduceNo = DEFAULT_REDUCER_NO;
    if (cl.hasOption(REDUCE_NO)) {
        try {
            reduceNo = Integer.parseInt(cl.getOptionValue(REDUCE_NO));
        } catch (NumberFormatException e) {
            System.err.println("Error parsing reducer number: " + e.getMessage());
        }
    }

    String jobName = "Distributed timeseries [R] correlation";
    if (cl.hasOption(JOB_NAME)) {
        jobName = cl.getOptionValue(JOB_NAME);
        jobName = jobName.replace('-', ' ');
    }

    if (cl.hasOption(REMOVE_OUTPUT)) {

    }

    String input = cl.getOptionValue(INPUT_OPT);
    String output = cl.getOptionValue(OUTPUT_OPT);

    Configuration conf = getConf();
    //DistributedCache.createSymlink(conf); 
    //DistributedCache.addCacheFile(new URI("hdfs://master.hadoop:8020/user/nguyen/lib/"), conf);
    Job job = Job.getInstance(conf, jobName);
    job.setJarByClass(TimeSeriesJob.class);
    job.setMapperClass(TimeSeriesMapper.class);
    job.setReducerClass(TimeSeriesReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Timeseries.class);

    job.setNumReduceTasks(reduceNo);
    job.setInputFormatClass(WholeFileInputFormat.class);
    WholeFileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, new Path(output));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:de.l3s.content.timex.extracting.ClueWeb09Timex.java

License:Apache License

/**
 * Runs this tool./*  w  w w. j  ava  2s .  c  o  m*/
 */
@SuppressWarnings("static-access")
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(
            OptionBuilder.withArgName("input").hasArg().withDescription("input path").create(INPUT_OPTION));

    options.addOption(
            OptionBuilder.withArgName("output").hasArg().withDescription("output path").create(OUTPUT_OPTION));

    options.addOption(OptionBuilder.withArgName("column").hasArg()
            .withDescription("column to store row data into (must exist)").create(COLUMN));
    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    cmdline = parser.parse(options, args);

    if (!cmdline.hasOption(INPUT_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    if (!cmdline.hasOption(OUTPUT_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String input = cmdline.getOptionValue(INPUT_OPTION);

    String output = cmdline.getOptionValue(OUTPUT_OPTION);

    //      String column = cmdline.getOptionValue(COLUMN);

    LOG.info("Tool name: " + ClueWeb09Timex.class.getSimpleName());
    LOG.info(" - input: " + input);
    LOG.info(" - output: " + output);
    //      LOG.info(" - column: " + column);

    Configuration conf = HBaseConfiguration.create();
    conf.set("hbase.zookeeper.quorum", "node05.ib,node03.ib,node04.ib");
    conf.set("hbase.zookeeper.property.clientPort", "2181");
    conf.set("hbase.master", "master.ib");

    //      conf.set("conf.column", column);

    long milliSeconds = 10000 * 60 * 60; //x10 default
    conf.setLong("mapred.task.timeout", milliSeconds);

    Job job = Job.getInstance(conf, ClueWeb09Timex.class.getSimpleName()
            + " time-confident extraction + annotation + HBase import: " + input);

    //Configuration conf = new Configuration();
    //Job job = Job.getInstance(conf, "web pages count");
    job.setJarByClass(ClueWeb09Timex.class);
    job.setNumReduceTasks(0);

    job.setInputFormatClass(ClueWeb09InputFormat.class);
    job.setOutputFormatClass(TableOutputFormat.class);
    job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, output);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(Writable.class);
    job.setMapperClass(TMapper.class);
    //job.setReducerClass(IntSumReducer.class);
    //job.setOutputKeyClass(Text.class);
    //job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(input));
    //FileOutputFormat.setOutputPath(job, new Path(output));
    job.waitForCompletion(true);

    return 0;
}