Example usage for org.apache.hadoop.mapreduce Job Job

List of usage examples for org.apache.hadoop.mapreduce Job Job

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job Job.

Prototype

Job(JobStatus status, JobConf conf) throws IOException 

Source Link

Usage

From source file:com.talis.labs.pagerank.mapreduce.CountPages.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: CountPages <input path> <output path>");
        return -1;
    }//ww w  .j a  va  2  s  . c  om

    FileSystem.get(getConf()).delete(new Path(args[1]), true);

    Job job = new Job(getConf(), "CountPages");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(CountPagesMapper.class);
    job.setCombinerClass(CountPagesReducer.class);
    job.setReducerClass(CountPagesReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    job.setNumReduceTasks(1);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.talis.labs.pagerank.mapreduce.DanglingPages.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: DanglingPages <input path> <output path>");
        return -1;
    }/*from   ww w.j  av a 2s.  c  o m*/

    FileSystem.get(getConf()).delete(new Path(args[1]), true);

    Job job = new Job(getConf(), "DanglingPages");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(DanglingPagesMapper.class);
    job.setCombinerClass(DanglingPagesReducer.class);
    job.setReducerClass(DanglingPagesReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(DoubleWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    job.setNumReduceTasks(1);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.talis.labs.pagerank.mapreduce.InitializePageRanks.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.println("Usage: InitializePageRanks <input path> <output path> <number of pages>");
        return -1;
    }/*from  w  w  w  .  j  a  va 2  s . c o  m*/

    Configuration conf = getConf();
    conf.set("pagerank.count", args[2]);

    FileSystem.get(conf).delete(new Path(args[1]), true);

    Job job = new Job(conf, "InitializePageRanks");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(InitializePageRanksMapper.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.talis.labs.pagerank.mapreduce.SortPageRanks.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: SortPageRanks <input path> <output path>");
        return -1;
    }/*www.j  av a  2 s  . c o  m*/

    FileSystem.get(getConf()).delete(new Path(args[1]), true);

    Job job = new Job(getConf(), "SortPageRanks");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(SortPageRanksMapper.class);
    job.setReducerClass(Reducer.class); // i.e. identity reducer
    job.setSortComparatorClass(DoubleWritableDecreasingComparator.class);

    job.setMapOutputKeyClass(DoubleWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(1); // TODO: inefficient, use InputSampler with v0.20.x

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.talis.labs.pagerank.mapreduce.UpdatePageRanks.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 4) {
        System.err.println(/*ww w .  j av a  2s. co  m*/
                "Usage: UpdatePageRanks <input path> <output path> <number of pages> <dangling pages contribution>");
        return -1;
    }

    Configuration conf = getConf();
    conf.set("pagerank.count", args[2]);
    conf.set("pagerank.dangling", args[3]);

    FileSystem.get(conf).delete(new Path(args[1]), true);

    Job job = new Job(conf, "UpdatePageRanks");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(UpdatePageRanksMapper.class);
    job.setReducerClass(UpdatePageRanksReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.talis.mapreduce.dicenc.FirstDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }//from  ww w  .java2 s . co m

    Job job = new Job(getConf(), "first");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(FirstMapper.class);
    job.setReducerClass(FirstReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.talis.mapreduce.dicenc.SecondDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }/*from ww w .  ja v  a2 s . c  om*/

    Job job = new Job(getConf(), "second");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(SecondMapper.class);
    job.setReducerClass(SecondReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    MultipleOutputs.addNamedOutput(job, "dict", TextOutputFormat.class, Text.class, Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.talis.mapreduce.dicenc.ThirdDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }/*from w  w  w.j av a 2 s  . c  o m*/

    Job job = new Job(getConf(), "third");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileInputFormat.setInputPathFilter(job, DataPathFilter.class);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(ThirdMapper.class);
    job.setReducerClass(ThirdReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.talis.mapreduce.lib.input.TestDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }//from  w w w. j  a  v a2 s. c o  m

    Job job = new Job(getConf(), "test");
    job.setJarByClass(getClass());

    job.setInputFormatClass(NQuadsInputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(TestMapper.class);
    job.setNumReduceTasks(0);

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.talis.mapreduce.wordcount.newapi.WordCount.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }//from  w  w  w . j  a  va  2  s  .  com

    Job job = new Job(getConf(), getClass().getSimpleName());
    job.setJarByClass(getClass());

    job.setMapperClass(WordCountMapper.class);
    job.setCombinerClass(WordCountReducer.class);
    job.setReducerClass(WordCountReducer.class);

    // job.setPartitionerClass(HashPartitioner.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job.waitForCompletion(true) ? 0 : 1;
}