Example usage for org.apache.hadoop.mapreduce Job Job

List of usage examples for org.apache.hadoop.mapreduce Job Job

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job Job.

Prototype

Job(JobStatus status, JobConf conf) throws IOException 

Source Link

Usage

From source file:com.placeiq.piqconnect.Runner.java

License:Apache License

private Job buildJob3(Path input, Path output) throws Exception {
    Configuration conf = getConf();
    conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize);

    Job job = new Job(conf, "data-piqid.piqconnect.FinalResultBuilder");
    job.setJarByClass(Runner.class);

    job.setMapperClass(FinalResultBuilder._Mapper.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setNumReduceTasks(0);//from   w  ww .jav a  2s.  co m
    job.setOutputKeyClass(VLongWritable.class);
    job.setOutputValueClass(VLongWritable.class);

    FileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, output);
    FileOutputFormat.setCompressOutput(job, true);

    setCompression(job);
    return job;
}

From source file:com.renren.hadoop.oiv.tools.MoveData.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 3 && otherArgs.length != 4) {
        System.err.println("Usage: MoveData <data_from_dir> <data_to_dir> <job_out_dir> [<black_list_file>]");
        System.exit(2);/*from   ww w. j  a  v  a2 s. c  om*/
    }
    String blackListFile = null;
    if (otherArgs.length == 4) {
        blackListFile = otherArgs[3];
    } else {
        System.err.println("Warn: black_list_file param is not given");
    }

    String dataFromDir = otherArgs[0];
    conf.set(DATA_TO_DIR, otherArgs[1]);
    conf.set(BlackListManager.BLACK_LIST_FILE, blackListFile);
    String jobOutDir = otherArgs[2];

    Job job = new Job(conf, "clearData");
    job.setJarByClass(MoveData.class);
    job.setMapperClass(MoveDataMapper.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(dataFromDir));
    FileOutputFormat.setOutputPath(job, new Path(jobOutDir));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.rockstor.compact.CompactDataTool.java

License:Apache License

private Job createSubmittableJob(Configuration conf) throws IOException {
    Job job = new Job(conf, NAME);
    job.setJarByClass(CompactDataTool.class);

    job.setInputFormatClass(CompactDirInputFormat.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setMapOutputKeyClass(NullWritable.class);

    job.setMapperClass(CompactDataMapper.class);
    job.setNumReduceTasks(0);/* w w w  .j  a  v a 2 s. co m*/
    job.setOutputFormatClass(NullOutputFormat.class);
    LOG.info("init job " + NAME + " OK");
    return job;
}

From source file:com.rockstor.compact.GenGarbageIndexTool.java

License:Apache License

private Job createSubmittableJob(Configuration conf) throws IOException {
    Job job = new Job(conf, NAME);

    job.setJarByClass(GenGarbageIndexTool.class);
    Scan scan = new Scan();
    TableMapReduceUtil.initTableMapperJob(GarbageChunkDB.TAB_NAME, scan, GarbageChunkMapper.class,
            ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);

    TableMapReduceUtil.setScannerCaching(job, batchSize);
    job.setReducerClass(GarbageChunkReduce.class);
    job.setPartitionerClass(GarbageChunkPartition.class);
    job.setCombinerClass(GarbageChunkCombine.class);

    job.setNumReduceTasks(Compactor.getInstance().getReduceNum());
    job.setOutputFormatClass(NullOutputFormat.class);

    LOG.info("init job " + NAME + " finished!");
    return job;//from w w w.j av  a2s .c o  m
}

From source file:com.rockstor.compact.RecoveryTool.java

License:Apache License

private Job createSubmittableJob(Configuration conf) throws IOException {
    Job job = new Job(conf, NAME);
    job.setJarByClass(RecoveryTool.class);

    job.setInputFormatClass(CompactDirInputFormat.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setMapOutputKeyClass(NullWritable.class);

    job.setMapperClass(RecoveryMapper.class);

    job.setNumReduceTasks(0);/*from   w  w w. ja va2s . co m*/

    job.setOutputFormatClass(NullOutputFormat.class);
    LOG.info("init job " + NAME + " OK!");
    return job;
}

From source file:com.sanjay.mapreduce.SiCombiner.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);/*w  w w  .  ja v a  2  s  .  co m*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(SiCombiner.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setPartitionerClass(WordPartitioner.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(5);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.sematext.hbase.hut.RollbackUpdatesMrJob.java

License:Apache License

/**
 * Sets up the actual job./*ww  w . j a v  a  2 s. c o m*/
 *
 * @param conf  The current configuration.
 * @param args  The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
    String tableName = args[0];

    conf.set("mapred.map.tasks.speculative.execution", "false");

    Job job = new Job(conf, NAME + "_" + tableName);
    job.setJobName(NAME + "_" + tableName);
    job.setJarByClass(RollbackUpdatesMapper.class);
    // TODO: Allow passing filter and subset of rows/columns.
    Scan s = new Scan();
    // Optional arguments.
    long startTime = args.length > 1 ? Long.parseLong(args[1]) : 0L;
    long endTime = args.length > 2 ? Long.parseLong(args[2]) : Long.MAX_VALUE;

    // TODO: consider using scan.setTimeRange() for limiting scanned data range. It may
    //       not be good way to do if tss are artificial in HutPuts though
    //    s.setTimeRange(startTime, endTime);
    job.getConfiguration().set(RollbackUpdatesMapper.HUT_ROLLBACK_UPDATE_MIN_TIME_ATTR,
            String.valueOf(startTime));
    job.getConfiguration().set(RollbackUpdatesMapper.HUT_ROLLBACK_UPDATE_MAX_TIME_ATTR,
            String.valueOf(endTime));

    s.setFilter(new HutWriteTimeRowsFilter(endTime, startTime));

    s.setCacheBlocks(false);
    // TODO: allow user change using job params
    s.setCaching(512);
    s.setCacheBlocks(false);

    LOG.info("Using scan: " + s.toString());

    // TODO: allow better limiting of data to be fetched
    if (conf.get(TableInputFormat.SCAN_COLUMN_FAMILY) != null) {
        s.addFamily(Bytes.toBytes(conf.get(TableInputFormat.SCAN_COLUMN_FAMILY)));
    }

    LOG.info("starttime (inclusive): " + startTime + " (" + new Date(startTime) + ")"
            + ", endtime (inclusive): " + endTime + " (" + new Date(endTime) + ")");

    TableMapReduceUtil.initTableMapperJob(tableName, s, RollbackUpdatesMapper.class, null, null, job);
    TableMapReduceUtil.initTableReducerJob(tableName, null, job);
    // No reducers.  Just write straight to output files.
    job.setNumReduceTasks(0);
    return job;
}

From source file:com.sematext.hbase.wd.RowKeyDistributorTestBase.java

License:Apache License

private void testMapReduceInternal(long origKeyPrefix, Scan scan, int numValues, int startWithValue,
        int seekIntervalMinValue, int seekIntervalMaxValue)
        throws IOException, InterruptedException, ClassNotFoundException {
    int valuesCountInSeekInterval = writeTestData(origKeyPrefix, numValues, startWithValue,
            seekIntervalMinValue, seekIntervalMaxValue);

    // Reading data
    Configuration conf = testingUtility.getConfiguration();
    Job job = new Job(conf, "testMapReduceInternal()-Job");
    job.setJarByClass(this.getClass());
    TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, RowCounterMapper.class,
            ImmutableBytesWritable.class, Result.class, job);

    // Substituting standard TableInputFormat which was set in TableMapReduceUtil.initTableMapperJob(...)
    job.setInputFormatClass(WdTableInputFormat.class);
    keyDistributor.addInfo(job.getConfiguration());

    job.setOutputFormatClass(NullOutputFormat.class);
    job.setNumReduceTasks(0);/*w w  w.jav a2  s . c  o m*/

    boolean succeeded = job.waitForCompletion(true);
    Assert.assertTrue(succeeded);

    long mapInputRecords = job.getCounters().findCounter(RowCounterMapper.Counters.ROWS).getValue();
    Assert.assertEquals(valuesCountInSeekInterval, mapInputRecords);
}

From source file:com.shopping.hbase.mapreduce.Import.java

License:Apache License

/**
 * Job configuration./*from   www .j a  va  2  s .  co m*/
 */
protected Job configureJob(Configuration conf, String inputPathName, String tableName) throws IOException {
    Path inputPath = new Path(inputPathName);
    Job job = new Job(conf, NAME + "_" + tableName);
    job.setJarByClass(Importer.class);
    FileInputFormat.setInputPaths(job, inputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    // job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(Importer.class);
    // No reducers. Just write straight to table. Call initTableReducerJob
    // because it sets up the TableOutputFormat.
    TableMapReduceUtil.initTableReducerJob(tableName, null, job);
    job.setNumReduceTasks(0);
    return job;
}

From source file:com.shopping.hbase.sample.mapreduce.SampleUploader.java

License:Apache License

/**
 * Job configuration.//  w w  w. ja  v  a  2 s .co  m
 */
public static Job configureJob(Configuration conf, String[] args) throws IOException {
    Path inputPath = new Path(args[0]);
    String tableName = args[1];
    Job job = new Job(conf, NAME + "_" + tableName);
    job.setJarByClass(Uploader.class);
    FileInputFormat.setInputPaths(job, inputPath);
    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(Uploader.class);
    // No reducers.  Just write straight to table.  Call initTableReducerJob
    // because it sets up the TableOutputFormat.
    TableMapReduceUtil.initTableReducerJob(tableName, null, job);
    job.setNumReduceTasks(0);
    return job;
}