Example usage for org.apache.hadoop.mapreduce Job setCombinerClass

List of usage examples for org.apache.hadoop.mapreduce Job setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException 

Source Link

Document

Set the combiner class for the job.

Usage

From source file:gr.ntua.h2rdf.sampler.TotalOrderPrep.java

License:Open Source License

public Job createSubmittableJob(String[] args) throws IOException, ClassNotFoundException {

    Job sample_job = new Job();

    // Remember the real input format so the sampling input format can use
    // it under the hood

    sample_job.getConfiguration().setBoolean(ARG_INPUTFORMAT, true);
    sample_job.setInputFormatClass(TextInputFormat.class);

    //sample_job.getConfiguration().set("mapred.fairscheduler.pool", "pool9");
    // Base the sample size on the number of reduce tasks that will be used
    // by the real job, but only use 1 reducer for this job (maps output very
    // little)//  w ww  .  ja va  2 s  .com
    sample_job.setNumReduceTasks(1);

    // Make this job's output a temporary filethe input file for the real job's
    // TotalOrderPartitioner
    Path partition = new Path("partitions/");
    //partition.getFileSystem(job.getConfiguration()).deleteOnExit(partition);

    conf = new Configuration();
    FileSystem fs;
    try {
        fs = FileSystem.get(conf);
        if (fs.exists(partition)) {
            fs.delete(partition, true);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    FileOutputFormat.setOutputPath(sample_job, partition);
    FileInputFormat.setInputPaths(sample_job, new Path(args[0]));
    //TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path(partition, "part-r-00000"));
    //job.setPartitionerClass(TotalOrderPartitioner.class);

    // If there's a combiner, turn it into an identity reducer to prevent
    // destruction of keys.

    sample_job.setCombinerClass(Combiner.class);

    sample_job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    sample_job.setMapOutputValueClass(ImmutableBytesWritable.class);
    sample_job.setOutputKeyClass(ImmutableBytesWritable.class);
    sample_job.setOutputValueClass(NullWritable.class);
    sample_job.setPartitionerClass(HashPartitioner.class);
    sample_job.setOutputFormatClass(SequenceFileOutputFormat.class);
    sample_job.setJarByClass(TotalOrderPrep.class);
    sample_job.setMapperClass(Map.class);
    sample_job.setReducerClass(PartitioningReducer.class);
    sample_job.setJobName("(Sampler)");
    sample_job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
    sample_job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    return sample_job;

}

From source file:gws.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.addResource(new Path("/home/ucas/bigdata/hadoop-2.6.2/etc/hadoop/core-site.xml"));
    conf.addResource(new Path("/home/ucas/bigdata/hadoop-2.6.2/etc/hadoop/hdfs-site.xml"));
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);/*from  w ww .j ava2s  .co  m*/
    }

    Job job = Job.getInstance(conf, "word count");

    job.setJarByClass(WordCount.class);

    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumCombiner.class);
    job.setReducerClass(IntSumReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // add the input paths as given by command line
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }

    // add the output path as given by the command line
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:hadoop.examples.DBCountPageView.java

License:Apache License

public int run(String[] args) throws Exception {

    String driverClassName = DRIVER_CLASS;
    String url = DB_URL;//w  w  w . j  a  v  a 2  s.c  om

    if (args.length > 1) {
        driverClassName = args[0];
        url = args[1];
    }

    initialize(driverClassName, url);
    Configuration conf = getConf();

    DBConfiguration.configureDB(conf, driverClassName, url);

    Job job = new Job(conf);

    job.setJobName("Count Pageviews of URLs");
    job.setJarByClass(DBCountPageView.class);
    job.setMapperClass(PageviewMapper.class);
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(PageviewReducer.class);

    DBInputFormat.setInput(job, AccessRecord.class, "Access", null, "url", AccessFieldNames);

    DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setOutputKeyClass(PageviewRecord.class);
    job.setOutputValueClass(NullWritable.class);
    int ret;
    try {
        ret = job.waitForCompletion(true) ? 0 : 1;
        boolean correct = verify();
        if (!correct) {
            throw new RuntimeException("Evaluation was not correct!");
        }
    } finally {
        shutdown();
    }
    return ret;
}

From source file:hadoop.examples.mapreduce.WordCountV1.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.print("Usage: wordcountV1 <in> <out>");
        System.exit(2);/*  w w w .j  a v a  2s . c o  m*/
    }

    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCountV1.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:hadoop.examples.mapreduce.WordCountV2.java

License:Open Source License

public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    List<String> other_args = new ArrayList<String>();
    args = new GenericOptionsParser(conf, args).getRemainingArgs();

    for (int i = 0; i < args.length; i++) {
        if ("-skip".equals(args[i])) {
            DistributedCache.addCacheFile(new Path(args[++i]).toUri(), conf);
            conf.setBoolean("wordcount.skip.patterns", true);
        } else if ("-D".equals(args[i])) {
            String[] arr = args[++i].split("=");
            conf.setBoolean(arr[0], Boolean.valueOf(arr[1]));
        } else/*from w  w w  .  j a  va  2s .com*/
            other_args.add(args[i]);
    }

    Job job = new Job(conf);
    job.setJarByClass(WordCountV2.class);
    job.setJobName("word count version 2");
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(TokenizeMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(other_args.get(0)));
    FileOutputFormat.setOutputPath(job, new Path(other_args.get(1)));
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:hadoop.examples.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {

    System.out.println("hello 4");
    System.out.print("gegin");
    System.out.print("gegin2");

    String hdfspath = "hdfs://10.2.12.93:9000/user/root/";
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from  w w w  .ja  v  a  2  s  . c  om*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(hdfspath + "input"));
    FileOutputFormat.setOutputPath(job, new Path(hdfspath + "output7"));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:hadoop.examples.WordMean.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordmean <in> <out>");
        return 0;
    }//from   ww w. j a  va2 s  .  c o m

    Configuration conf = getConf();

    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "word mean");
    job.setJarByClass(WordMean.class);
    job.setMapperClass(WordMeanMapper.class);
    job.setCombinerClass(WordMeanReducer.class);
    job.setReducerClass(WordMeanReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    Path outputpath = new Path(args[1]);
    FileOutputFormat.setOutputPath(job, outputpath);
    boolean result = job.waitForCompletion(true);
    mean = readAndCalcMean(outputpath, conf);

    return (result ? 0 : 1);
}

From source file:hadoop.examples.WordMedian.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordmedian <in> <out>");
        return 0;
    }// w  w w  .ja  v  a 2s  . c  o  m

    setConf(new Configuration());
    Configuration conf = getConf();

    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "word median");
    job.setJarByClass(WordMedian.class);
    job.setMapperClass(WordMedianMapper.class);
    job.setCombinerClass(WordMedianReducer.class);
    job.setReducerClass(WordMedianReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    boolean result = job.waitForCompletion(true);

    // Wait for JOB 1 -- get middle value to check for Median

    long totalWords = job.getCounters().getGroup(TaskCounter.class.getCanonicalName())
            .findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue();
    int medianIndex1 = (int) Math.ceil((totalWords / 2.0));
    int medianIndex2 = (int) Math.floor((totalWords / 2.0));

    median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf);

    return (result ? 0 : 1);
}

From source file:hadoop.examples.WordStandardDeviation.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordstddev <in> <out>");
        return 0;
    }// w  w  w  .j  a  va 2 s.  c o  m

    Configuration conf = getConf();

    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "word stddev");
    job.setJarByClass(WordStandardDeviation.class);
    job.setMapperClass(WordStandardDeviationMapper.class);
    job.setCombinerClass(WordStandardDeviationReducer.class);
    job.setReducerClass(WordStandardDeviationReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    Path outputpath = new Path(args[1]);
    FileOutputFormat.setOutputPath(job, outputpath);
    boolean result = job.waitForCompletion(true);

    // read output and calculate standard deviation
    stddev = readAndCalcStdDev(outputpath, conf);

    return (result ? 0 : 1);
}

From source file:hadoop.TestingDriver.java

License:Open Source License

public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    String input = conf.get("gc.TestingDriver.input");
    String output = conf.get("gc.TestingDriver.output");
    String jobname = conf.get("gc.TestingDriver.name");
    String dataset = conf.get("gc.TestingDriver.dataset");

    if (input == null || output == null || dataset == null || jobname == null) {
        System.out.println(" Incorrect parameters ");
        System.exit(0);/*from w  ww. ja  v a 2 s  .  co  m*/
    }

    conf = addPathToDC(conf, conf.get("gc.TestingDriver.dataset") + "*");

    Job job = new Job(conf);
    job.setJarByClass(TestingDriverMapper.class);
    job.setJobName(jobname);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PIFArray.class);

    job.setMapperClass(TestingDriverMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PIFArray.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setCombinerClass(TestingDriverReducer.class);
    job.setReducerClass(TestingDriverReducer.class);

    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, input);

    FileOutputFormat.setOutputPath(job, new Path(output));

    System.out.println(" Input dir = " + conf.get("gc.TestingDriver.input"));
    System.out.println(" Output dir = " + conf.get("gc.TestingDriver.output"));
    System.out.println(" Testing Input = " + conf.get("gc.TestingDriver.dataset"));
    System.out.println(" Name = " + conf.get("gc.TestingDriver.name"));

    if (job.waitForCompletion(true) == false) {
        System.err.println(" Job " + jobname + " Failed (miserably)");
        System.exit(2);
    }

    return 0;
}