Example usage for org.apache.hadoop.mapreduce Job getNumReduceTasks

List of usage examples for org.apache.hadoop.mapreduce Job getNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getNumReduceTasks.

Prototype

public int getNumReduceTasks() 

Source Link

Document

Get configured the number of reduce tasks for this job.

Usage

From source file:com.linkedin.thirdeye.hadoop.aggregation.AggregationPhaseJob.java

License:Apache License

public Job run() throws Exception {
    Job job = Job.getInstance(getConf());
    job.setJobName(name);/*from w w  w .  ja v  a 2 s  . co m*/
    job.setJarByClass(AggregationPhaseJob.class);

    FileSystem fs = FileSystem.get(getConf());
    Configuration configuration = job.getConfiguration();

    // Properties
    LOGGER.info("Properties {}", props);

    // Input Path
    String inputPathDir = getAndSetConfiguration(configuration, AGG_PHASE_INPUT_PATH);
    LOGGER.info("Input path dir: " + inputPathDir);
    for (String inputPath : inputPathDir.split(ThirdEyeConstants.FIELD_SEPARATOR)) {
        LOGGER.info("Adding input:" + inputPath);
        Path input = new Path(inputPath);
        FileInputFormat.addInputPath(job, input);
    }

    // Output path
    Path outputPath = new Path(getAndSetConfiguration(configuration, AGG_PHASE_OUTPUT_PATH));
    LOGGER.info("Output path dir: " + outputPath.toString());
    if (fs.exists(outputPath)) {
        fs.delete(outputPath, true);
    }
    FileOutputFormat.setOutputPath(job, outputPath);

    // Schema
    Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir);
    LOGGER.info("Schema : {}", avroSchema.toString(true));
    job.getConfiguration().set(AGG_PHASE_AVRO_SCHEMA.toString(), avroSchema.toString());

    // ThirdEyeConfig
    String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(
            props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()),
            props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
    ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
    LOGGER.info("Thirdeye Config {}", thirdeyeConfig.encode());
    job.getConfiguration().set(AGG_PHASE_THIRDEYE_CONFIG.toString(),
            OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));

    // Map config
    job.setMapperClass(AggregationMapper.class);
    job.setInputFormatClass(AvroKeyInputFormat.class);
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(BytesWritable.class);

    // Reduce config
    job.setReducerClass(AggregationReducer.class);
    job.setOutputKeyClass(AvroKey.class);
    job.setOutputValueClass(NullWritable.class);
    AvroJob.setOutputKeySchema(job, avroSchema);
    job.setOutputFormatClass(AvroKeyOutputFormat.class);
    String numReducers = props.getProperty(ThirdEyeJobProperties.THIRDEYE_NUM_REDUCERS.getName());
    LOGGER.info("Num Reducers : {}", numReducers);
    if (StringUtils.isNotBlank(numReducers)) {
        job.setNumReduceTasks(Integer.valueOf(numReducers));
        LOGGER.info("Setting num reducers {}", job.getNumReduceTasks());
    }

    job.waitForCompletion(true);

    Counter counter = job.getCounters().findCounter(AggregationCounter.NUMBER_OF_RECORDS);
    LOGGER.info(counter.getDisplayName() + " : " + counter.getValue());
    if (counter.getValue() == 0) {
        throw new IllegalStateException("No input records in " + inputPathDir);
    }
    counter = job.getCounters().findCounter(AggregationCounter.NUMBER_OF_RECORDS_FLATTENED);
    LOGGER.info(counter.getDisplayName() + " : " + counter.getValue());

    for (String metric : thirdeyeConfig.getMetricNames()) {
        counter = job.getCounters().findCounter(thirdeyeConfig.getCollection(), metric);
        LOGGER.info(counter.getDisplayName() + " : " + counter.getValue());
    }

    return job;
}

From source file:com.linkedin.whiteelephant.mapreduce.MyAvroMultipleOutputs.java

License:Apache License

private TaskAttemptContext getContext(String nameOutput) throws IOException {

    TaskAttemptContext taskContext = taskContexts.get(nameOutput);

    if (taskContext != null) {
        return taskContext;
    }//from   w  w  w  . jav  a  2 s.  com

    // The following trick leverages the instantiation of a record writer via
    // the job thus supporting arbitrary output formats.
    context.getConfiguration().set("avro.mo.config.namedOutput", nameOutput);
    Job job = new Job(context.getConfiguration());
    job.setOutputFormatClass(getNamedOutputFormatClass(context, nameOutput));
    Schema keySchema = keySchemas.get(nameOutput + "_KEYSCHEMA");
    Schema valSchema = valSchemas.get(nameOutput + "_VALSCHEMA");

    boolean isMaponly = job.getNumReduceTasks() == 0;

    if (keySchema != null) {
        if (isMaponly)
            AvroJob.setMapOutputKeySchema(job, keySchema);
        else
            AvroJob.setOutputKeySchema(job, keySchema);
    }
    if (valSchema != null) {
        if (isMaponly)
            AvroJob.setMapOutputValueSchema(job, valSchema);
        else
            AvroJob.setOutputValueSchema(job, valSchema);
    }
    taskContext = new TaskAttemptContext(job.getConfiguration(), context.getTaskAttemptID());

    taskContexts.put(nameOutput, taskContext);

    return taskContext;
}

From source file:com.moz.fiji.mapreduce.output.TestFileMapReduceJobOutput.java

License:Apache License

@Test
public void testConfigure() throws ClassNotFoundException, IOException {
    final Path filePath = new Path("foo/bar");
    final int numSplits = 42;
    final Class<? extends OutputFormat> outputFormatClass = TextOutputFormat.class;
    FileMapReduceJobOutput jobOutput = new ConcreteFileMapReduceJobOutput(filePath, numSplits,
            outputFormatClass);//from w w  w .  ja  va 2  s  . c  o  m

    Job job = new Job();
    jobOutput.configure(job);

    // The output format class should be set in the job configuration.
    assertEquals(outputFormatClass, job.getOutputFormatClass());
    // The file output path should be set in the job configuration.
    assert (FileOutputFormat.getOutputPath(job).toString().endsWith(filePath.toString()));
    // The number of reduce tasks should be set to the number of splits.
    assertEquals(numSplits, job.getNumReduceTasks());
}

From source file:com.moz.fiji.mapreduce.TestFijiBulkImportJobBuilder.java

License:Apache License

@Test
public void testBuildWithHFileOutput() throws Exception {
    final FijiMapReduceJob mrjob = FijiBulkImportJobBuilder.create().withConf(getConf())
            .withInput(MapReduceJobInputs.newTextMapReduceJobInput(new Path(mTempPath, "input")))
            .withBulkImporter(NoopBulkImporter.class).withOutput(MapReduceJobOutputs
                    .newHFileMapReduceJobOutput(mTable.getURI(), new Path(mTempPath, "output"), 10))
            .build();//from  w  w  w  .  java 2 s. c o m

    final Job job = mrjob.getHadoopJob();
    assertEquals(TextInputFormat.class, job.getInputFormatClass());
    assertEquals(BulkImportMapper.class, job.getMapperClass());
    assertEquals(NoopBulkImporter.class,
            job.getConfiguration().getClass(FijiConfKeys.FIJI_BULK_IMPORTER_CLASS, null));
    assertEquals(IdentityReducer.class, job.getReducerClass());
    assertEquals(10, job.getNumReduceTasks());
    assertEquals(FijiHFileOutputFormat.class, job.getOutputFormatClass());
    assertEquals(TotalOrderPartitioner.class, job.getPartitionerClass());
}

From source file:com.moz.fiji.mapreduce.TestFijiBulkImportJobBuilder.java

License:Apache License

@Test
public void testBuildWithKeyValueStore() throws Exception {
    final FijiMapReduceJob mrjob = FijiBulkImportJobBuilder.create().withConf(getConf())
            .withInput(MapReduceJobInputs.newTextMapReduceJobInput(new Path(mTempPath, "input")))
            .withBulkImporter(KVStoreBulkImporter.class).withOutput(MapReduceJobOutputs
                    .newHFileMapReduceJobOutput(mTable.getURI(), new Path(mTempPath, "output"), 10))
            .build();/*w w  w . j  a va2 s.  c  o  m*/

    final Job job = mrjob.getHadoopJob();
    // Verify that everything else is what we expected as in the previous test
    // (except the bulk importer class name)...
    assertEquals(TextInputFormat.class, job.getInputFormatClass());
    assertEquals(BulkImportMapper.class, job.getMapperClass());
    assertEquals(KVStoreBulkImporter.class,
            job.getConfiguration().getClass(FijiConfKeys.FIJI_BULK_IMPORTER_CLASS, null));
    assertEquals(IdentityReducer.class, job.getReducerClass());
    assertEquals(10, job.getNumReduceTasks());
    assertEquals(FijiHFileOutputFormat.class, job.getOutputFormatClass());
    assertEquals(TotalOrderPartitioner.class, job.getPartitionerClass());

    // KeyValueStore-specific checks here.
    final Configuration confOut = job.getConfiguration();
    assertEquals(1, confOut.getInt(KeyValueStoreConfigSerializer.CONF_KEY_VALUE_STORE_COUNT, 0));
    assertEquals(EmptyKeyValueStore.class.getName(),
            confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0."
                    + KeyValueStoreConfigSerializer.CONF_CLASS));
    assertEquals("foostore", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0."
            + KeyValueStoreConfigSerializer.CONF_NAME));
}

From source file:com.moz.fiji.mapreduce.TestFijiGatherJobBuilder.java

License:Apache License

@Test
public void testGatherToHFile() throws Exception {
    final FijiMapReduceJob gatherJob = FijiGatherJobBuilder.create().withConf(getConf())
            .withInputTable(mTable.getURI()).withGatherer(GatherToHFile.class).withOutput(MapReduceJobOutputs
                    .newHFileMapReduceJobOutput(mTable.getURI(), getLocalTestPath("hfile"), 10))
            .build();/*from   w  w w.j  a  va  2 s  . c  om*/

    final Job job = gatherJob.getHadoopJob();
    final Configuration conf = job.getConfiguration();
    assertEquals(GatherToHFile.class.getName(), conf.get(FijiConfKeys.FIJI_GATHERER_CLASS));
    assertEquals(null, job.getCombinerClass());
    assertEquals(IdentityReducer.class, job.getReducerClass());
    assertEquals(10, job.getNumReduceTasks());
    assertEquals(FijiHFileOutputFormat.class, job.getOutputFormatClass());
    assertEquals(HFileKeyValue.class, job.getOutputKeyClass());
    assertEquals(NullWritable.class, job.getOutputValueClass());
}

From source file:com.moz.fiji.mapreduce.TestFijiGatherJobBuilder.java

License:Apache License

@Test
public void testGatherReducerToHFile() throws Exception {
    final FijiMapReduceJob gatherJob = FijiGatherJobBuilder.create().withConf(getConf())
            .withInputTable(mTable.getURI()).withGatherer(SimpleGatherer.class)
            .withReducer(ReducerToHFile.class).withOutput(MapReduceJobOutputs
                    .newHFileMapReduceJobOutput(mTable.getURI(), getLocalTestPath("hfile"), 10))
            .build();/*from  www  . j a  v a  2 s.  c o  m*/

    final Job job = gatherJob.getHadoopJob();
    final Configuration conf = job.getConfiguration();
    assertEquals(SimpleGatherer.class.getName(), conf.get(FijiConfKeys.FIJI_GATHERER_CLASS));
    assertEquals(null, job.getCombinerClass());
    assertEquals(ReducerToHFile.class, job.getReducerClass());
    assertEquals(10, job.getNumReduceTasks());
    assertEquals(SequenceFileOutputFormat.class, job.getOutputFormatClass());
    assertEquals(HFileKeyValue.class, job.getOutputKeyClass());
    assertEquals(NullWritable.class, job.getOutputValueClass());
}

From source file:com.moz.fiji.mapreduce.TestFijiMapReduceJobBuilder.java

License:Apache License

@Test
public void testBuild() throws Exception {
    final FijiMapReduceJob job = FijiMapReduceJobBuilder.create().withConf(mConf)
            .withInput(MapReduceJobInputs.newTextMapReduceJobInput(new Path("/path/to/my/input")))
            .withMapper(MyMapper.class).withReducer(MyReducer.class)
            .withOutput(MapReduceJobOutputs.newTextMapReduceJobOutput(new Path("/path/to/my/output"), 16))
            .build();/* w w w  . j a va 2s  . c  o  m*/

    final Job hadoopJob = job.getHadoopJob();
    assertEquals(TextInputFormat.class, hadoopJob.getInputFormatClass());
    assertEquals(MyMapper.class, hadoopJob.getMapperClass());
    assertEquals(MyReducer.class, hadoopJob.getReducerClass());
    assertEquals(16, hadoopJob.getNumReduceTasks());
    assertEquals(TextOutputFormat.class, hadoopJob.getOutputFormatClass());

    // KeyValueStore-specific checks here.
    Configuration confOut = hadoopJob.getConfiguration();
    assertEquals(2, confOut.getInt(KeyValueStoreConfigSerializer.CONF_KEY_VALUE_STORE_COUNT, 0));
    assertEquals(EmptyKeyValueStore.class.getName(),
            confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0."
                    + KeyValueStoreConfigSerializer.CONF_CLASS));
    assertEquals("mapperMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0."
            + KeyValueStoreConfigSerializer.CONF_NAME));
    assertEquals(EmptyKeyValueStore.class.getName(),
            confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1."
                    + KeyValueStoreConfigSerializer.CONF_CLASS));
    assertEquals("reducerMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1."
            + KeyValueStoreConfigSerializer.CONF_NAME));
}

From source file:com.moz.fiji.mapreduce.TestFijiMapReduceJobBuilder.java

License:Apache License

@Test
public void testBuildWithXmlKVStores() throws Exception {
    // Test that we can override default configuration KeyValueStores from an XML file.
    final InputStream xmlStores = Resources.openSystemResource("com.moz.fiji/mapreduce/test-kvstores.xml");

    // This file needs to exist before we build the job, or else
    // we can't build the job; it's referenced by a key-value store that checks
    // for its presence.
    final File tmpFile = new File("/tmp/foo.seq");
    if (tmpFile.createNewFile()) {
        // We created this temp file, we're responsible for deleting it.
        tmpFile.deleteOnExit();/*from  w  w  w .j  ava2s. co  m*/
    }

    LOG.info("Building job...");
    final FijiMapReduceJob job = FijiMapReduceJobBuilder.create().withConf(mConf)
            .withInput(MapReduceJobInputs.newTextMapReduceJobInput(new Path("/path/to/my/input")))
            .withMapper(MyMapper.class).withReducer(MyReducer.class)
            .withOutput(MapReduceJobOutputs.newTextMapReduceJobOutput(new Path("/path/to/my/output"), 16))
            .withStoreBindings(xmlStores).build();

    xmlStores.close();

    LOG.info("Verifying job configuration...");
    final Job hadoopJob = job.getHadoopJob();
    assertEquals(TextInputFormat.class, hadoopJob.getInputFormatClass());
    assertEquals(MyMapper.class, hadoopJob.getMapperClass());
    assertEquals(MyReducer.class, hadoopJob.getReducerClass());
    assertEquals(16, hadoopJob.getNumReduceTasks());
    assertEquals(TextOutputFormat.class, hadoopJob.getOutputFormatClass());

    // KeyValueStore-specific checks here.
    // We override mapperMap with a SeqFileKeyValueStore.
    Configuration confOut = hadoopJob.getConfiguration();
    assertEquals(2, confOut.getInt(KeyValueStoreConfigSerializer.CONF_KEY_VALUE_STORE_COUNT, 0));
    assertEquals(SeqFileKeyValueStore.class.getName(),
            confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0."
                    + KeyValueStoreConfigSerializer.CONF_CLASS));
    assertEquals("mapperMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0."
            + KeyValueStoreConfigSerializer.CONF_NAME));
    assertEquals(EmptyKeyValueStore.class.getName(),
            confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1."
                    + KeyValueStoreConfigSerializer.CONF_CLASS));
    assertEquals("reducerMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1."
            + KeyValueStoreConfigSerializer.CONF_NAME));
}

From source file:com.moz.fiji.mapreduce.TestFijiProduceJobBuilder.java

License:Apache License

@Test
public void testBuildWithHFileOutput() throws ClassNotFoundException, IOException {

    final FijiMapReduceJob produceJob = FijiProduceJobBuilder.create().withConf(getConf())
            .withInputTable(mTable.getURI()).withProducer(MyProducer.class)
            .withOutput(//from  w  w w  .  j  av  a  2 s . c o m
                    MapReduceJobOutputs.newHFileMapReduceJobOutput(mTable.getURI(), new Path("foo/bar"), 10))
            .build();

    // Verify that the MR Job was configured correctly.
    final Job job = produceJob.getHadoopJob();
    assertEquals(HBaseFijiTableInputFormat.class, job.getInputFormatClass());
    assertEquals(ProduceMapper.class, job.getMapperClass());
    assertEquals(MyProducer.class, job.getConfiguration().getClass(FijiConfKeys.FIJI_PRODUCER_CLASS, null));
    assertEquals(10, job.getNumReduceTasks());
    assertEquals(FijiHFileOutputFormat.class, job.getOutputFormatClass());
}