Example usage for org.apache.hadoop.mapreduce Job Job

List of usage examples for org.apache.hadoop.mapreduce Job Job

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job Job.

Prototype

Job(JobStatus status, JobConf conf) throws IOException 

Source Link

Usage

From source file:com.benchmark.mapred.SecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysrot <in> <out>");
        System.exit(2);//from  w  w  w.j av a2 s  .  co  m
    }
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    // group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);
    job.setGroupingComparatorClass(FirstGroupingComparator.class);

    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    job.setMapOutputValueClass(IntWritable.class);

    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.benchmark.mapred.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "wordcount");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {//ww  w  .  j  a va 2 s . c o m
            if ("-r".equals(args[i])) {
                job.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            System.err.println("Usage: wordcount <numReduces> <in> <out>");
            System.exit(2);
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            System.err.println("Usage: wordcount <numReduces> <in> <out>");
            System.exit(2);
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        System.err.println("Usage: wordcount <numReduces> <in> <out>");
        System.exit(2);
    }

    FileInputFormat.addInputPath(job, new Path(other_args.get(0)));
    FileOutputFormat.setOutputPath(job, new Path(other_args.get(1)));
    Date startIteration = new Date();
    Boolean waitforCompletion = job.waitForCompletion(true);
    Date endIteration = new Date();
    System.out.println(
            "The iteration took " + (endIteration.getTime() - startIteration.getTime()) / 1000 + " seconds.");
    System.exit(waitforCompletion ? 0 : 1);
}

From source file:com.bigdog.hadoop.mapreduce.combine.WordCountCombineApp.java

public void combine() throws Exception {
    Configuration conf = new Configuration();
    final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf);
    final Path outPath = new Path(OUT_PATH);
    if (fileSystem.exists(outPath)) {
        fileSystem.delete(outPath, true);
    }/*ww  w  .  ja  va  2  s.  c o  m*/

    final Job job = new Job(conf, WordCountCombineApp.class.getSimpleName());
    //1.1??
    FileInputFormat.setInputPaths(job, INPUT_PATH);
    //????
    //job.setInputFormatClass(TextInputFormat.class);

    //1.2 map
    job.setMapperClass(MyMapper.class);
    //map<k,v><k3,v3><k2,v2>??
    //job.setMapOutputKeyClass(Text.class);
    //job.setMapOutputValueClass(LongWritable.class);

    //1.3 
    //job.setPartitionerClass(HashPartitioner.class);
    //reduce?
    //job.setNumReduceTasks(1);

    //1.4 TODO ??

    //1.5 
    job.setCombinerClass(MyCombiner.class);

    //2.2 reduce
    job.setReducerClass(MyReducer.class);
    //reduce
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    //2.3 
    FileOutputFormat.setOutputPath(job, outPath);
    //?
    //job.setOutputFormatClass(TextOutputFormat.class);

    //job??JobTracker?
    job.waitForCompletion(true);
}

From source file:com.bigdog.hadoop.mapreduce.counter.WordCountCounterApp.java

public void CustomerCounter() throws Exception {
    Configuration conf = new Configuration();
    final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf);
    final Path outPath = new Path(OUT_PATH);
    if (fileSystem.exists(outPath)) {
        fileSystem.delete(outPath, true);
    }//ww w  . ja  v a2  s. c  o  m

    final Job job = new Job(conf, WordCountCounterApp.class.getSimpleName());
    //1.1??
    FileInputFormat.setInputPaths(job, INPUT_PATH);
    //????
    //job.setInputFormatClass(TextInputFormat.class);

    //1.2 map
    job.setMapperClass(MyMapper.class);
    //map<k,v><k3,v3><k2,v2>??
    //job.setMapOutputKeyClass(Text.class);
    //job.setMapOutputValueClass(LongWritable.class);

    //1.3 
    //job.setPartitionerClass(HashPartitioner.class);
    //reduce?
    //job.setNumReduceTasks(1);

    //1.4 TODO ??

    //1.5 TODO 

    //2.2 reduce
    job.setReducerClass(MyReducer.class);
    //reduce
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    //2.3 
    FileOutputFormat.setOutputPath(job, outPath);
    //?
    //job.setOutputFormatClass(TextOutputFormat.class);

    //job??JobTracker?
    job.waitForCompletion(true);
}

From source file:com.bigdog.hadoop.mapreduce.group.GroupApp.java

public void group() throws Exception {
    final Configuration configuration = new Configuration();

    final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), configuration);
    if (fileSystem.exists(new Path(OUT_PATH))) {
        fileSystem.delete(new Path(OUT_PATH), true);
    }/*w  ww  .  j  a va 2s  .  c  o m*/

    final Job job = new Job(configuration, GroupApp.class.getSimpleName());

    //1.1 
    FileInputFormat.setInputPaths(job, INPUT_PATH);
    //??
    job.setInputFormatClass(TextInputFormat.class);

    //1.2Mapper
    job.setMapperClass(MyMapper.class);
    //<k2,v2>
    job.setMapOutputKeyClass(NewK2.class);
    job.setMapOutputValueClass(LongWritable.class);

    //1.3 
    job.setPartitionerClass(HashPartitioner.class);
    job.setNumReduceTasks(1);

    //1.4 TODO ??
    job.setGroupingComparatorClass(MyGroupingComparator.class);
    //1.5  TODO ??

    //2.2 reduce
    job.setReducerClass(MyReducer.class);
    //<k3,v3>
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(LongWritable.class);

    //2.3 
    FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));
    //?
    job.setOutputFormatClass(TextOutputFormat.class);

    //???JobTracker
    job.waitForCompletion(true);
}

From source file:com.bigdog.hadoop.mapreduce.partition.KpiApp.java

public void kpi() throws Exception {
    final Job job = new Job(new Configuration(), KpiApp.class.getSimpleName());

    job.setJarByClass(KpiApp.class);

    //1.1 //from www .ja va2 s .c  om
    FileInputFormat.setInputPaths(job, INPUT_PATH);
    //??
    job.setInputFormatClass(TextInputFormat.class);

    //1.2Mapper
    job.setMapperClass(MyMapper.class);
    //<k2,v2>
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(KpiWritable.class);

    //1.3 
    job.setPartitionerClass(KpiPartitioner.class);
    job.setNumReduceTasks(2);

    //1.4 TODO ??
    //1.5  TODO ??
    //2.2 reduce
    job.setReducerClass(MyReducer.class);
    //<k3,v3>
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(KpiWritable.class);

    //2.3 
    FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));
    //?
    job.setOutputFormatClass(TextOutputFormat.class);

    //???JobTracker
    job.waitForCompletion(true);
}

From source file:com.binbo.wordcount.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from   w  w  w .  ja va2  s.  c om*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class); // Set the combiner
    job.setPartitionerClass(WordPartitioner.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.bizosys.oneline.maintenance.Import.java

License:Apache License

/**
 * Sets up the actual job.// w w w .j  ava  2 s.c o m
 *
 * @param conf  The current configuration.
 * @param args  The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
    String tableName = args[0];
    Path inputDir = new Path(args[1]);
    Job job = new Job(conf, NAME + "_" + tableName);
    job.setJarByClass(Importer.class);
    System.out.println("Input Dir:" + inputDir);
    FileInputFormat.setInputPaths(job, inputDir);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapperClass(Importer.class);
    // No reducers.  Just write straight to table.  Call initTableReducerJob
    // because it sets up the TableOutputFormat.
    TableMapReduceUtil.initTableReducerJob(tableName, null, job);
    job.setNumReduceTasks(0);
    return job;
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.PFPGrowth.java

License:Apache License

/**
 * Run the aggregation Job to aggregate the different TopK patterns and group each Pattern by the features
 * present in it and thus calculate the final Top K frequent Patterns for each feature
 *///from w  w  w .  ja v  a 2s .  co  m
public static void startAggregating(Parameters params, Configuration conf)
        throws IOException, InterruptedException, ClassNotFoundException {

    conf.set(PFP_PARAMETERS, params.toString());
    conf.set("mapred.compress.map.output", "true");
    conf.set("mapred.output.compression.type", "BLOCK");

    Path input = new Path(params.get(OUTPUT), FPGROWTH);
    Job job = new Job(conf, "PFP Aggregator Driver running over input: " + input);
    job.setJarByClass(PFPGrowth.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(TopKStringPatterns.class);

    FileInputFormat.addInputPath(job, input);
    Path outPath = new Path(params.get(OUTPUT), FREQUENT_PATTERNS);
    FileOutputFormat.setOutputPath(job, outPath);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapperClass(AggregatorMapper.class);
    job.setCombinerClass(AggregatorReducer.class);
    job.setReducerClass(AggregatorReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    HadoopUtil.delete(conf, outPath);
    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
        throw new IllegalStateException("Job failed!");
    }
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.PFPGrowth.java

License:Apache License

/**
 * Count the frequencies of various features in parallel using Map/Reduce
 *//*from   ww  w.  j  av  a 2  s. c  o m*/
public static void startParallelCounting(Parameters params, Configuration conf)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf.set(PFP_PARAMETERS, params.toString());

    conf.set("mapred.compress.map.output", "true");
    conf.set("mapred.output.compression.type", "BLOCK");

    String input = params.get(INPUT);
    Job job = new Job(conf, "Parallel Counting Driver running over input: " + input);
    job.setJarByClass(PFPGrowth.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    FileInputFormat.addInputPath(job, new Path(input));
    Path outPath = new Path(params.get(OUTPUT), PARALLEL_COUNTING);
    FileOutputFormat.setOutputPath(job, outPath);

    HadoopUtil.delete(conf, outPath);

    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(ParallelCountingMapper.class);
    job.setCombinerClass(ParallelCountingReducer.class);
    job.setReducerClass(ParallelCountingReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
        throw new IllegalStateException("Job failed!");
    }

}