Example usage for org.apache.hadoop.mapreduce Job Job

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job Job.

Prototype

Job(JobStatus status, JobConf conf) throws IOException

Source Link

Usage

From source file:com.siwind.routingloop.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);//  ww w  .  ja  va  2s  .c om
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }

    Path outputpath = new Path(otherArgs[otherArgs.length - 1]);
    FileSystem.get(conf).delete(outputpath, true);

    FileOutputFormat.setOutputPath(job, outputpath);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.skp.experiment.cf.als.hadoop.UploloadToHbaseTableJob.java

License:Apache License

/**
 * Job configuration./* w  ww  . j a  v a  2s .com*/
 */
public static Job configureJob(Configuration conf, String[] args) throws IOException {
    Path inputPath = new Path(args[0]);
    String tableName = args[1];
    Job job = new Job(conf, NAME + "_" + tableName);
    job.setJarByClass(Uploader.class);
    FileInputFormat.setInputPaths(job, inputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapperClass(Uploader.class);
    // No reducers.  Just write straight to table.  Call initTableReducerJob
    // because it sets up the TableOutputFormat.
    TableMapReduceUtil.initTableReducerJob(tableName, null, job);
    job.setNumReduceTasks(0);
    return job;
}

From source file:com.sohu.rdc.inf.cdn.offline.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);/*from   w w  w  . ja v a 2s .  c o  m*/
    }

    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);

    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    job.setInputFormatClass(LzoTextInputFormat.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.splicemachine.mrio.api.core.SMInputFormatIT.java

License:Apache License

@Test
public void testSparkIntegrationWithInputFormat() throws IOException {
    config.set(MRConstants.SPLICE_TABLE_NAME, tableWatcherA.toString());
    Job job = new Job(config, "Test Scan");
    JavaPairRDD<RowLocation, ExecRow> table = sparkWatcher.jsc.newAPIHadoopRDD(job.getConfiguration(),
            SMInputFormat.class, RowLocation.class, ExecRow.class);
    List<Tuple2<RowLocation, ExecRow>> data = table.collect();
    int i = 0;//w  w w  .jav a2 s .  c om
    for (Tuple2<RowLocation, ExecRow> tuple : data) {
        i++;
        Assert.assertNotNull(tuple._1());
        Assert.assertNotNull(tuple._2());
    }
    Assert.assertEquals("Incorrect Results Returned", 2, i);
}

From source file:com.splicemachine.mrio.api.core.SMInputFormatIT.java

License:Apache License

@Test
public void testCountOverMultipleRegionsInSpark() throws IOException {
    config.set(MRConstants.SPLICE_TABLE_NAME, tableWatcherB.toString());
    Job job = new Job(config, "Test Scan");
    JavaPairRDD<RowLocation, ExecRow> table = sparkWatcher.jsc.newAPIHadoopRDD(job.getConfiguration(),
            SMInputFormat.class, RowLocation.class, ExecRow.class);
    List<Tuple2<RowLocation, ExecRow>> data = table.collect();
    int i = 0;/*from w  w  w  .  j  av  a 2s  .  c  om*/
    for (Tuple2<RowLocation, ExecRow> tuple : data) {
        i++;
        Assert.assertNotNull(tuple._1());
        Assert.assertNotNull(tuple._2());
    }
    Assert.assertEquals("Incorrect Results Returned", 10000, i);
}

From source file:com.splunk.shuttl.integration.hadoop.hbase.CSVJobFactory.java

License:Apache License

/**
 * @return the hadoopConfiguration/*from  w  ww . j a  va2 s.c  om*/
 * @throws IOException
 */
public static Job getConfiguredJob(String[] arguments) throws IOException {

    Configuration jobConfiguration = new Configuration(true);
    // Load hbase-site.xml
    HBaseConfiguration.addHbaseResources(jobConfiguration);

    jobConfiguration.set("fs.default.name", arguments[0]);
    jobConfiguration.set("mapred.job.tracker", arguments[1]);
    jobConfiguration.set(JobConfigurationConstants.FILENAME, arguments[2]);
    jobConfiguration.set(JobConfigurationConstants.OUTPUT_PATH, arguments[3]);
    jobConfiguration.set(JobConfigurationConstants.TABLE_NAME, arguments[4]);

    jobConfiguration.set(JobConfigurationConstants.COLUMN_FAMILY, "d");

    Job job = new Job(jobConfiguration, "BucketToHbase");
    job.setJarByClass(CSVMapper.class);

    job.setMapperClass(CSVMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);

    job.setInputFormatClass(TextInputFormat.class);

    return job;
}

From source file:com.springsource.insight.plugin.hadoop.WordCount.java

License:Open Source License

public int run(String[] args) throws Exception {
    String INPUT = "src/test/resources";
    String OUTPUT = "target/out";

    Configuration conf = new Configuration();
    File targetFolder = FileUtil.detectTargetFolder(getClass());
    if (targetFolder == null) {
        throw new IllegalStateException("Cannot detect target folder");
    }//from   ww w .  j  a  v a  2  s.c  om
    File tempFolder = new File(targetFolder, "temp");
    conf.set("hadoop.tmp.dir", tempFolder.getAbsolutePath());

    Job job = new Job(conf, "wordcount");
    job.setJarByClass(WordCount.class);

    job.setMapperClass(WordCountMapper.class);
    job.setCombinerClass(WordCountReducer.class);
    job.setReducerClass(WordCountReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    FileUtils.deleteDirectory(new File(OUTPUT)); // delete old output data
    FileInputFormat.addInputPath(job, new Path(INPUT));
    FileOutputFormat.setOutputPath(job, new Path(OUTPUT));

    return job.waitForCompletion(true) ? 0 : -1;
}

From source file:com.stride.cartrek.core.hbase.RowKeyDistributorTestBase.java

License:Apache License

private void testMapReduceInternal(long origKeyPrefix, Scan scan, int numValues, int startWithValue,
        int seekIntervalMinValue, int seekIntervalMaxValue)
        throws IOException, InterruptedException, ClassNotFoundException {
    int valuesCountInSeekInterval = writeTestData(origKeyPrefix, numValues, startWithValue,
            seekIntervalMinValue, seekIntervalMaxValue);

    // Reading data
    Configuration conf = testingUtility.getConfiguration();
    Job job = new Job(conf, "testMapReduceInternal()-Job");
    job.setJarByClass(this.getClass());
    TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, RowCounterMapper.class,
            ImmutableBytesWritable.class, Result.class, job);

    // Substituting standard TableInputFormat which was set in
    // TableMapReduceUtil.initTableMapperJob(...)
    job.setInputFormatClass(WdTableInputFormat.class);
    keyDistributor.addInfo(job.getConfiguration());

    job.setOutputFormatClass(NullOutputFormat.class);
    job.setNumReduceTasks(0);//from  ww w .j av a2s. c o m

    boolean succeeded = job.waitForCompletion(true);
    Assert.assertTrue(succeeded);

    long mapInputRecords = job.getCounters().findCounter(RowCounterMapper.Counters.ROWS).getValue();
    Assert.assertEquals(valuesCountInSeekInterval, mapInputRecords);
}

From source file:com.talis.labs.pagerank.mapreduce.CheckConvergence.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: CheckConvergence <input path> <output path>");
        return -1;
    }//  ww  w  . j a  va 2  s .  c o  m

    FileSystem.get(getConf()).delete(new Path(args[1]), true);

    Job job = new Job(getConf(), "CheckConvergence");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(CheckConvergenceMapper.class);
    job.setCombinerClass(CheckConvergenceReducer.class);
    job.setReducerClass(CheckConvergenceReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(DoubleWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    job.setNumReduceTasks(1);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.talis.labs.pagerank.mapreduce.CheckingData.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: CheckingData <input path> <output path>");
        return -1;
    }//from   www  . j a  v  a 2s .  com

    FileSystem.get(getConf()).delete(new Path(args[1]), true);

    Job job = new Job(getConf(), "CheckingData");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(CheckingDataMapper.class);
    job.setReducerClass(CheckingDataReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}