Example usage for org.apache.hadoop.mapreduce Job setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException

Source Link

Document

Set the combiner class for the job.

Usage

From source file:gr.ntua.h2rdf.sampler.TotalOrderPrep.java

License:Open Source License

public Job createSubmittableJob(String[] args) throws IOException, ClassNotFoundException {

    Job sample_job = new Job();

    // Remember the real input format so the sampling input format can use
    // it under the hood

    sample_job.getConfiguration().setBoolean(ARG_INPUTFORMAT, true);
    sample_job.setInputFormatClass(TextInputFormat.class);

    //sample_job.getConfiguration().set("mapred.fairscheduler.pool", "pool9");
    // Base the sample size on the number of reduce tasks that will be used
    // by the real job, but only use 1 reducer for this job (maps output very
    // little)//  w ww  .  ja va  2 s  .com
    sample_job.setNumReduceTasks(1);

    // Make this job's output a temporary filethe input file for the real job's
    // TotalOrderPartitioner
    Path partition = new Path("partitions/");
    //partition.getFileSystem(job.getConfiguration()).deleteOnExit(partition);

    conf = new Configuration();
    FileSystem fs;
    try {
        fs = FileSystem.get(conf);
        if (fs.exists(partition)) {
            fs.delete(partition, true);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    FileOutputFormat.setOutputPath(sample_job, partition);
    FileInputFormat.setInputPaths(sample_job, new Path(args[0]));
    //TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path(partition, "part-r-00000"));
    //job.setPartitionerClass(TotalOrderPartitioner.class);

    // If there's a combiner, turn it into an identity reducer to prevent
    // destruction of keys.

    sample_job.setCombinerClass(Combiner.class);

    sample_job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    sample_job.setMapOutputValueClass(ImmutableBytesWritable.class);
    sample_job.setOutputKeyClass(ImmutableBytesWritable.class);
    sample_job.setOutputValueClass(NullWritable.class);
    sample_job.setPartitionerClass(HashPartitioner.class);
    sample_job.setOutputFormatClass(SequenceFileOutputFormat.class);
    sample_job.setJarByClass(TotalOrderPrep.class);
    sample_job.setMapperClass(Map.class);
    sample_job.setReducerClass(PartitioningReducer.class);
    sample_job.setJobName("(Sampler)");
    sample_job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
    sample_job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    return sample_job;

}

From source file:gws.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.addResource(new Path("/home/ucas/bigdata/hadoop-2.6.2/etc/hadoop/core-site.xml"));
    conf.addResource(new Path("/home/ucas/bigdata/hadoop-2.6.2/etc/hadoop/hdfs-site.xml"));
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);/*from  w ww .j ava2s  .co  m*/
    }

    Job job = Job.getInstance(conf, "word count");

    job.setJarByClass(WordCount.class);

    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumCombiner.class);
    job.setReducerClass(IntSumReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // add the input paths as given by command line
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }

    // add the output path as given by the command line
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:hadoop.examples.DBCountPageView.java

License:Apache License

public int run(String[] args) throws Exception {

    String driverClassName = DRIVER_CLASS;
    String url = DB_URL;//w  w  w . j  a  v  a 2  s.c  om

    if (args.length > 1) {
        driverClassName = args[0];
        url = args[1];
    }

    initialize(driverClassName, url);
    Configuration conf = getConf();

    DBConfiguration.configureDB(conf, driverClassName, url);

    Job job = new Job(conf);

    job.setJobName("Count Pageviews of URLs");
    job.setJarByClass(DBCountPageView.class);
    job.setMapperClass(PageviewMapper.class);
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(PageviewReducer.class);

    DBInputFormat.setInput(job, AccessRecord.class, "Access", null, "url", AccessFieldNames);

    DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setOutputKeyClass(PageviewRecord.class);
    job.setOutputValueClass(NullWritable.class);
    int ret;
    try {
        ret = job.waitForCompletion(true) ? 0 : 1;
        boolean correct = verify();
        if (!correct) {
            throw new RuntimeException("Evaluation was not correct!");
        }
    } finally {
        shutdown();
    }
    return ret;
}

From source file:hadoop.examples.mapreduce.WordCountV1.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.print("Usage: wordcountV1 <in> <out>");
        System.exit(2);/*  w w w .j  a v a  2s . c o  m*/
    }

    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCountV1.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:hadoop.examples.mapreduce.WordCountV2.java

License:Open Source License

public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    List<String> other_args = new ArrayList<String>();
    args = new GenericOptionsParser(conf, args).getRemainingArgs();

    for (int i = 0; i < args.length; i++) {
        if ("-skip".equals(args[i])) {
            DistributedCache.addCacheFile(new Path(args[++i]).toUri(), conf);
            conf.setBoolean("wordcount.skip.patterns", true);
        } else if ("-D".equals(args[i])) {
            String[] arr = args[++i].split("=");
            conf.setBoolean(arr[0], Boolean.valueOf(arr[1]));
        } else/*from w  w w  .  j a  va  2s .com*/
            other_args.add(args[i]);
    }

    Job job = new Job(conf);
    job.setJarByClass(WordCountV2.class);
    job.setJobName("word count version 2");
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(TokenizeMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(other_args.get(0)));
    FileOutputFormat.setOutputPath(job, new Path(other_args.get(1)));
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:hadoop.examples.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {

    System.out.println("hello 4");
    System.out.print("gegin");
    System.out.print("gegin2");

    String hdfspath = "hdfs://10.2.12.93:9000/user/root/";
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from  w w w  .ja  v  a  2  s  . c  om*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(hdfspath + "input"));
    FileOutputFormat.setOutputPath(job, new Path(hdfspath + "output7"));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:hadoop.examples.WordMean.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordmean <in> <out>");
        return 0;
    }//from   ww w. j a  va2 s  .  c o m

    Configuration conf = getConf();

    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "word mean");
    job.setJarByClass(WordMean.class);
    job.setMapperClass(WordMeanMapper.class);
    job.setCombinerClass(WordMeanReducer.class);
    job.setReducerClass(WordMeanReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    Path outputpath = new Path(args[1]);
    FileOutputFormat.setOutputPath(job, outputpath);
    boolean result = job.waitForCompletion(true);
    mean = readAndCalcMean(outputpath, conf);

    return (result ? 0 : 1);
}

From source file:hadoop.examples.WordMedian.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordmedian <in> <out>");
        return 0;
    }// w  w w  .ja  v  a 2s  . c  o  m

    setConf(new Configuration());
    Configuration conf = getConf();

    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "word median");
    job.setJarByClass(WordMedian.class);
    job.setMapperClass(WordMedianMapper.class);
    job.setCombinerClass(WordMedianReducer.class);
    job.setReducerClass(WordMedianReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    boolean result = job.waitForCompletion(true);

    // Wait for JOB 1 -- get middle value to check for Median

    long totalWords = job.getCounters().getGroup(TaskCounter.class.getCanonicalName())
            .findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue();
    int medianIndex1 = (int) Math.ceil((totalWords / 2.0));
    int medianIndex2 = (int) Math.floor((totalWords / 2.0));

    median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf);

    return (result ? 0 : 1);
}

From source file:hadoop.examples.WordStandardDeviation.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordstddev <in> <out>");
        return 0;
    }// w  w  w  .j  a  va 2 s.  c o  m

    Configuration conf = getConf();

    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "word stddev");
    job.setJarByClass(WordStandardDeviation.class);
    job.setMapperClass(WordStandardDeviationMapper.class);
    job.setCombinerClass(WordStandardDeviationReducer.class);
    job.setReducerClass(WordStandardDeviationReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    Path outputpath = new Path(args[1]);
    FileOutputFormat.setOutputPath(job, outputpath);
    boolean result = job.waitForCompletion(true);

    // read output and calculate standard deviation
    stddev = readAndCalcStdDev(outputpath, conf);

    return (result ? 0 : 1);
}

From source file:hadoop.TestingDriver.java

License:Open Source License

public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    String input = conf.get("gc.TestingDriver.input");
    String output = conf.get("gc.TestingDriver.output");
    String jobname = conf.get("gc.TestingDriver.name");
    String dataset = conf.get("gc.TestingDriver.dataset");

    if (input == null || output == null || dataset == null || jobname == null) {
        System.out.println(" Incorrect parameters ");
        System.exit(0);/*from w  ww. ja  v a 2 s  .  co  m*/
    }

    conf = addPathToDC(conf, conf.get("gc.TestingDriver.dataset") + "*");

    Job job = new Job(conf);
    job.setJarByClass(TestingDriverMapper.class);
    job.setJobName(jobname);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PIFArray.class);

    job.setMapperClass(TestingDriverMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PIFArray.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setCombinerClass(TestingDriverReducer.class);
    job.setReducerClass(TestingDriverReducer.class);

    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, input);

    FileOutputFormat.setOutputPath(job, new Path(output));

    System.out.println(" Input dir = " + conf.get("gc.TestingDriver.input"));
    System.out.println(" Output dir = " + conf.get("gc.TestingDriver.output"));
    System.out.println(" Testing Input = " + conf.get("gc.TestingDriver.dataset"));
    System.out.println(" Name = " + conf.get("gc.TestingDriver.name"));

    if (job.waitForCompletion(true) == false) {
        System.err.println(" Job " + jobname + " Failed (miserably)");
        System.exit(2);
    }

    return 0;
}