Example usage for org.apache.hadoop.mapreduce Job Job

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job Job.

Prototype

Job(JobStatus status, JobConf conf) throws IOException

Source Link

Usage

From source file:com.neu.cs6240.Xml2csvComments.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    // Setting up the xml tag configurator for splitter
    conf.set("xmlinput.start", "<row ");
    conf.set("xmlinput.end", " />");

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: Xml2csvPosts <in> <out>");
        System.exit(2);/*from  w  ww  . j  a va 2s . co  m*/
    }
    Job job = new Job(conf, "Converts Posts.xml to .csv");
    job.setJarByClass(Xml2csvPosts.class);
    job.setInputFormatClass(XmlInputFormat.class);
    job.setMapperClass(CommentsMapper.class);
    job.setReducerClass(CommentsReducer.class);
    job.setPartitionerClass(PostsPartitioner.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);
    // Set as per your file size
    job.setNumReduceTasks(10);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.neu.cs6240.Xml2csvPosts.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    // Setting up the xml tag configurator for splitter
    conf.set("xmlinput.start", "<row ");
    conf.set("xmlinput.end", " />");

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: Xml2csvPosts <in> <out>");
        System.exit(2);/*from  ww w.ja  v a  2 s .co m*/
    }
    Job job = new Job(conf, "Converts Posts.xml to .csv");
    job.setJarByClass(Xml2csvPosts.class);
    job.setInputFormatClass(XmlInputFormat.class);
    job.setMapperClass(PostsMapper.class);
    job.setReducerClass(PostsReducer.class);
    job.setPartitionerClass(PostsPartitioner.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);
    // Set as per your file size
    job.setNumReduceTasks(15);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.neusoft.hbase.test.hadoop.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from   w ww .  j  a  v  a 2  s .c o m
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.nistfortunetellers.cleaning.NISTClean.java

License:Apache License

/** Runs a Job that is Text in and Out, and TextInput in and out, too! */
@SuppressWarnings({ "deprecation", "rawtypes" })
static void runTextJob(String jobName, Configuration jobConfig, String inputPath, String outputPath,
        Class<? extends Mapper> mapper, Class<? extends Reducer> reducer) {
    try {/*from   ww  w.j  a  v  a 2 s  .c  om*/
        Job genericJob = new Job(jobConfig, jobName);
        // DEBUG
        //genericJob.setNumReduceTasks(0);
        // END DEBUG
        genericJob.setJarByClass(NISTClean.class);
        genericJob.setOutputKeyClass(Text.class);
        genericJob.setOutputValueClass(Text.class);
        genericJob.setMapperClass(mapper);
        genericJob.setReducerClass(reducer);
        genericJob.setInputFormatClass(TextInputFormat.class);
        genericJob.setOutputFormatClass(TextOutputFormat.class);
        FileInputFormat.addInputPath(genericJob, new Path(inputPath));
        FileOutputFormat.setOutputPath(genericJob, new Path(outputPath));
        genericJob.waitForCompletion(true);
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    } catch (InterruptedException e) {
        e.printStackTrace();
    }
}

From source file:com.nnapz.hbaseexplorer.mr.TableStats.java

License:Apache License

/**
 * M/R Job setup. No reduce.//from  w w  w . jav  a  2 s.c o  m
 *
 * @param conf      a suitable hadoop+hbase configuration
 * @param tableName the table we want to get stats from
 * @return the Job object, to be started
 * @throws java.io.IOException any hadoop IO problem
 */
public static Job createSubmittableJob(Configuration conf, String tableName) throws IOException {

    Job job = new Job(conf, NAME + "_" + tableName);
    if (job.getJar() == null) {
        job.setJarByClass(TableStats.class); // otherwise set in conf already
    }
    Scan scan = new Scan();
    scan.setMaxVersions(10000); // todo fixme
    TableMapReduceUtil.initTableMapperJob(tableName, scan, RowCountMapper.class, Text.class, Result.class, job);
    job.setOutputFormatClass(NullOutputFormat.class);
    job.setNumReduceTasks(0);

    return job;
}

From source file:com.paperbook.test.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    //    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    //    if (otherArgs.length != 2) {
    //      System.err.println("Usage: wordcount <in> <out>");
    //      System.exit(2);
    //    }/*from   www  . j a  va 2s .c o  m*/
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    // The path is local path on the local file system not the hdfs
    FileInputFormat.addInputPath(job, new Path("resources/words.txt"));
    FileOutputFormat.setOutputPath(job, new Path("output"));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.phantom.hadoop.examples.BaileyBorweinPlouffe.java

License:Apache License

/** Create and setup a job */
private static Job createJob(String name, Configuration conf) throws IOException {
    final Job job = new Job(conf, NAME + "_" + name);
    final Configuration jobconf = job.getConfiguration();
    job.setJarByClass(BaileyBorweinPlouffe.class);

    // setup mapper
    job.setMapperClass(BbpMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(BytesWritable.class);

    // setup reducer
    job.setReducerClass(BbpReducer.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(BytesWritable.class);
    job.setNumReduceTasks(1);/*from  w  w  w . jav a2s.  c  om*/

    // setup input
    job.setInputFormatClass(BbpInputFormat.class);

    // disable task timeout
    jobconf.setLong(MRJobConfig.TASK_TIMEOUT, 0);

    // do not use speculative execution
    jobconf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false);
    jobconf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false);
    return job;
}

From source file:com.phantom.hadoop.examples.pi.DistSum.java

License:Apache License

/** Create a job */
private Job createJob(String name, Summation sigma) throws IOException {
    final Job job = new Job(getConf(), parameters.remoteDir + "/" + name);
    final Configuration jobconf = job.getConfiguration();
    job.setJarByClass(DistSum.class);
    jobconf.setInt(N_PARTS, parameters.nParts);
    SummationWritable.write(sigma, DistSum.class, jobconf);

    // disable task timeout
    jobconf.setLong(MRJobConfig.TASK_TIMEOUT, 0);
    // do not use speculative execution
    jobconf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false);
    jobconf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false);

    return job;/* w w  w.  j  ava 2 s. c  o m*/
}

From source file:com.phantom.hadoop.examples.SecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysort <in> <out>");
        System.exit(2);/*ww  w  .j av  a  2s.  com*/
    }
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    // group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);
    job.setGroupingComparatorClass(FirstGroupingComparator.class);

    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    job.setMapOutputValueClass(IntWritable.class);

    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.phantom.hadoop.examples.WordMean.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordmean <in> <out>");
        return 0;
    }/*from  ww  w . ja  v a 2 s  . co  m*/

    Configuration conf = getConf();

    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "word mean");
    job.setJarByClass(WordMean.class);
    job.setMapperClass(WordMeanMapper.class);
    job.setCombinerClass(WordMeanReducer.class);
    job.setReducerClass(WordMeanReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    Path outputpath = new Path(args[1]);
    FileOutputFormat.setOutputPath(job, outputpath);
    boolean result = job.waitForCompletion(true);
    mean = readAndCalcMean(outputpath, conf);

    return (result ? 0 : 1);
}