Example usage for org.apache.hadoop.mapreduce Job getMapperClass

List of usage examples for org.apache.hadoop.mapreduce Job getMapperClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getMapperClass.

Prototype

@SuppressWarnings("unchecked")
public Class<? extends Mapper<?, ?, ?, ?>> getMapperClass() throws ClassNotFoundException 

Source Link

Document

Get the Mapper class for the job.

Usage

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.MultipleInputsTest.java

License:Apache License

@Test
public void testConfigurations() throws IOException, ClassNotFoundException {
    Job job = Job.getInstance();

    String inputName1 = "inputName1";
    String inputFormatClass1 = TextInputFormat.class.getName();
    Map<String, String> inputFormatConfigs1 = ImmutableMap.of("key1", "val1", "key2", "val2");
    MultipleInputs.addInput(job, inputName1, inputFormatClass1, inputFormatConfigs1, job.getMapperClass());

    Map<String, MultipleInputs.MapperInput> map = MultipleInputs.getInputMap(job.getConfiguration());

    Assert.assertEquals(1, map.size());/*from  w ww . j a  v a 2  s.  c  om*/
    Assert.assertEquals(inputName1, Iterables.getOnlyElement(map.keySet()));
    Assert.assertEquals(inputFormatClass1, Iterables.getOnlyElement(map.values()).getInputFormatClassName());
    Assert.assertEquals(inputFormatConfigs1,
            Iterables.getOnlyElement(map.values()).getInputFormatConfiguration());
    Assert.assertEquals(job.getMapperClass().getName(),
            Iterables.getOnlyElement(map.values()).getMapperClassName());

    Assert.assertEquals(DelegatingInputFormat.class, job.getInputFormatClass());

    // now, test with two inputs in the configuration
    String inputName2 = "inputName2";
    String inputFormatClass2 = TextInputFormat.class.getName();
    Map<String, String> inputFormatConfigs2 = ImmutableMap.of("some_key1", "some_val1", "some_key2",
            "some_val2");
    MultipleInputs.addInput(job, inputName2, inputFormatClass2, inputFormatConfigs2, CustomMapper.class);

    map = MultipleInputs.getInputMap(job.getConfiguration());

    Assert.assertEquals(2, map.size());

    MultipleInputs.MapperInput mapperInput1 = map.get(inputName1);
    Assert.assertEquals(inputFormatClass1, mapperInput1.getInputFormatClassName());
    Assert.assertEquals(inputFormatConfigs1, mapperInput1.getInputFormatConfiguration());
    Assert.assertEquals(job.getMapperClass().getName(), mapperInput1.getMapperClassName());

    MultipleInputs.MapperInput mapperInput2 = map.get(inputName2);
    Assert.assertEquals(inputFormatClass2, mapperInput2.getInputFormatClassName());
    Assert.assertEquals(inputFormatConfigs2, mapperInput2.getInputFormatConfiguration());
    Assert.assertEquals(CustomMapper.class,
            job.getConfiguration().getClassByName(mapperInput2.getMapperClassName()));
}

From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java

License:Apache License

/**
 * Sets the configurations used for inputs.
 * Multiple mappers could be defined, so we first check that their output types are consistent.
 *
 * @return the TypeToken for one of the mappers (doesn't matter which one, since we check that all of their output
 * key/value types are consistent. Returns null if the mapper class was not configured directly on the job and the
 * job's mapper class is to be used./*from w w  w  . j a v a2s  .  co  m*/
 * @throws IllegalArgumentException if any of the configured mapper output types are inconsistent.
 */
@Nullable
private TypeToken<Mapper> setInputsIfNeeded(Job job) throws IOException, ClassNotFoundException {
    Class<? extends Mapper> jobMapperClass = job.getMapperClass();

    Class<? extends Mapper> firstMapperClass = null;
    Map.Entry<Class, Class> firstMapperOutputTypes = null;

    for (Map.Entry<String, MapperInput> mapperInputEntry : context.getMapperInputs().entrySet()) {
        MapperInput mapperInput = mapperInputEntry.getValue();
        InputFormatProvider provider = mapperInput.getInputFormatProvider();
        Map<String, String> inputFormatConfiguration = new HashMap<>(provider.getInputFormatConfiguration());

        // default to what is configured on the job, if user didn't specify a mapper for an input
        Class<? extends Mapper> mapperClass = mapperInput.getMapper() == null ? jobMapperClass
                : mapperInput.getMapper();

        // check output key/value type consistency, except for the first input
        if (firstMapperClass == null) {
            firstMapperClass = mapperClass;
            firstMapperOutputTypes = getMapperOutputKeyValueTypes(mapperClass);
        } else {
            assertConsistentTypes(firstMapperClass, firstMapperOutputTypes, mapperClass);
        }

        // A bit hacky for stream.
        if (provider instanceof StreamInputFormatProvider) {
            // pass in mapperInput.getMapper() instead of mapperClass, because mapperClass defaults to the Identity Mapper
            setDecoderForStream((StreamInputFormatProvider) provider, job, inputFormatConfiguration,
                    mapperInput.getMapper());
        }

        MultipleInputs.addInput(job, mapperInputEntry.getKey(), provider.getInputFormatClassName(),
                inputFormatConfiguration, mapperClass);
    }

    // if firstMapperClass is null, then, user is not going through our APIs to add input; leave the job's input format
    // to user and simply return the mapper output types of the mapper configured on the job.
    // if firstMapperClass == jobMapperClass, return null if the user didn't configure the mapper class explicitly
    if (firstMapperClass == null || firstMapperClass == jobMapperClass) {
        return resolveClass(job.getConfiguration(), MRJobConfig.MAP_CLASS_ATTR, Mapper.class);
    }
    return resolveClass(firstMapperClass, Mapper.class);
}

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
private void runMap(Job job, KeyValueSorter<?, ?> sorter)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = job.getConfiguration();
    InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
    List<InputSplit> splits = input.getSplits(job);
    int serial = 1;
    for (InputSplit split : splits) {
        TaskAttemptID id = newTaskAttemptId(newMapTaskId(job.getJobID(), serial++), 0);
        Mapper<?, ?, ?, ?> mapper = ReflectionUtils.newInstance(job.getMapperClass(), conf);
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("starting mapper: {0}@{1} ({2}bytes)", //$NON-NLS-1$
                    mapper.getClass().getName(), id, split.getLength()));
        }//ww w.  j av  a  2s .  c o m
        TaskAttemptContext context = newTaskAttemptContext(conf, id);
        // we always obtain a new OutputFormat object / OutputFormat.getOutputCommiter() may be cached
        OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf);
        OutputCommitter committer = output.getOutputCommitter(context);
        committer.setupTask(context);
        boolean succeed = false;
        try (RecordReader<?, ?> reader = input.createRecordReader(split, newTaskAttemptContext(conf, id))) {
            RecordWriter<?, ?> writer;
            if (sorter != null) {
                writer = new ShuffleWriter(sorter);
            } else {
                writer = output.getRecordWriter(newTaskAttemptContext(conf, id));
            }
            try {
                Mapper.Context c = newMapperContext(conf, id, reader, writer, committer, split);
                reader.initialize(split, c);
                mapper.run(c);
            } finally {
                writer.close(newTaskAttemptContext(conf, id));
            }
            doCommitTask(context, committer);
            succeed = true;
        } finally {
            if (succeed == false) {
                doAbortTask(context, committer);
            }
        }
    }
}

From source file:com.baynote.kafka.hadoop.KafkaJobBuilderTest.java

License:Apache License

@Test
public void testConfigureWholeJob() throws Exception {
    // base configuration
    builder.setZkConnect("localhost:2181");
    builder.addQueueInput("queue_name", "group_name", MockMapper.class);
    builder.setTextFileOutputFormat("/a/hdfs/path");

    // extended configuration
    builder.setJobName("job_name");
    builder.setMapOutputKeyClass(Text.class);
    builder.setMapOutputValueClass(BytesWritable.class);
    builder.setReducerClass(MockReducer.class);
    builder.setTaskMemorySettings("-Xmx2048m");
    builder.setNumReduceTasks(100);/*  w w w. j a  v a 2s. c o m*/
    builder.setParitioner(MockPartitioner.class);
    builder.setKafkaFetchSizeBytes(1024);

    Job job = builder.configureJob(conf);

    assertEquals("job_name", job.getJobName());
    assertEquals(Text.class, job.getMapOutputKeyClass());
    assertEquals(BytesWritable.class, job.getMapOutputValueClass());
    assertEquals(MockReducer.class, job.getReducerClass());
    assertEquals(MockMapper.class, job.getMapperClass());
    assertEquals("-Xmx2048m", job.getConfiguration().get("mapred.child.java.opts"));
    assertEquals(100, job.getNumReduceTasks());
    assertEquals(MockPartitioner.class, job.getPartitionerClass());
    assertEquals(1024, KafkaInputFormat.getKafkaFetchSizeBytes(job.getConfiguration()));
    assertEquals(TextOutputFormat.class, job.getOutputFormatClass());
    assertEquals(KafkaInputFormat.class, job.getInputFormatClass());
    assertEquals("file:/a/hdfs/path", TextOutputFormat.getOutputPath(job).toString());

    builder.setJobName(null);
    builder.setSequenceFileOutputFormat();
    builder.setUseLazyOutput();
    builder.addQueueInput("queue_name_2", "group_name_2", MockMapper.class);

    job = builder.configureJob(conf);
    assertEquals(LazyOutputFormat.class, job.getOutputFormatClass());
    assertEquals(MultipleKafkaInputFormat.class, job.getInputFormatClass());
    assertEquals(DelegatingMapper.class, job.getMapperClass());
    assertEquals(BytesWritable.class, job.getOutputKeyClass());
    assertEquals(BytesWritable.class, job.getOutputValueClass());
    assertNotNull(SequenceFileOutputFormat.getOutputPath(job));
    assertNotNull(job.getJobName());

    // use s3
    builder.useS3("my_aws_key", "s3cr3t", "my-bucket");
    builder.setTextFileOutputFormat("/a/hdfs/path");
    job = builder.configureJob(conf);

    assertEquals("my_aws_key", job.getConfiguration().get("fs.s3n.awsAccessKeyId"));
    assertEquals("s3cr3t", job.getConfiguration().get("fs.s3n.awsSecretAccessKey"));
    assertEquals("my_aws_key", job.getConfiguration().get("fs.s3.awsAccessKeyId"));
    assertEquals("s3cr3t", job.getConfiguration().get("fs.s3.awsSecretAccessKey"));
}

From source file:com.cloudera.castagna.logparser.Utils.java

License:Apache License

public static void log(Job job, Logger log) throws ClassNotFoundException {
    log.debug("{} -> {} ({}, {}) -> {}#{} ({}, {}) -> {}",
            new Object[] { job.getInputFormatClass().getSimpleName(), job.getMapperClass().getSimpleName(),
                    job.getMapOutputKeyClass().getSimpleName(), job.getMapOutputValueClass().getSimpleName(),
                    job.getReducerClass().getSimpleName(), job.getNumReduceTasks(),
                    job.getOutputKeyClass().getSimpleName(), job.getOutputValueClass().getSimpleName(),
                    job.getOutputFormatClass().getSimpleName() });
    Path[] inputs = FileInputFormat.getInputPaths(job);
    Path output = FileOutputFormat.getOutputPath(job);
    log.debug("input: {}", inputs[0]);
    log.debug("output: {}", output);
}

From source file:com.example.Driver.java

License:Open Source License

public int run(String[] args) throws Exception {

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Your job name");

    job.setJarByClass(Driver.class);

    logger.info("job " + job.getJobName() + " [" + job.getJar() + "] started with the following arguments: "
            + Arrays.toString(args));

    if (args.length < 2) {
        logger.warn("to run this jar are necessary at 2 parameters \"" + job.getJar()
                + " input_files output_directory");
        return 1;
    }/*ww w .j a v  a2  s .c om*/

    job.setMapperClass(WordcountMapper.class);
    logger.info("mapper class is " + job.getMapperClass());

    //job.setMapOutputKeyClass(Text.class);
    //job.setMapOutputValueClass(IntWritable.class);
    logger.info("mapper output key class is " + job.getMapOutputKeyClass());
    logger.info("mapper output value class is " + job.getMapOutputValueClass());

    job.setReducerClass(WordcountReducer.class);
    logger.info("reducer class is " + job.getReducerClass());
    job.setCombinerClass(WordcountReducer.class);
    logger.info("combiner class is " + job.getCombinerClass());
    //When you are not runnign any Reducer
    //OR    job.setNumReduceTasks(0);
    //      logger.info("number of reduce task is " + job.getNumReduceTasks());

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    logger.info("output key class is " + job.getOutputKeyClass());
    logger.info("output value class is " + job.getOutputValueClass());

    job.setInputFormatClass(TextInputFormat.class);
    logger.info("input format class is " + job.getInputFormatClass());

    job.setOutputFormatClass(TextOutputFormat.class);
    logger.info("output format class is " + job.getOutputFormatClass());

    Path filePath = new Path(args[0]);
    logger.info("input path " + filePath);
    FileInputFormat.setInputPaths(job, filePath);

    Path outputPath = new Path(args[1]);
    logger.info("output path " + outputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.waitForCompletion(true);
    return 0;
}

From source file:com.moz.fiji.mapreduce.TestFijiBulkImportJobBuilder.java

License:Apache License

@Test
public void testBuildWithHFileOutput() throws Exception {
    final FijiMapReduceJob mrjob = FijiBulkImportJobBuilder.create().withConf(getConf())
            .withInput(MapReduceJobInputs.newTextMapReduceJobInput(new Path(mTempPath, "input")))
            .withBulkImporter(NoopBulkImporter.class).withOutput(MapReduceJobOutputs
                    .newHFileMapReduceJobOutput(mTable.getURI(), new Path(mTempPath, "output"), 10))
            .build();/*from  www  .jav a  2 s  . c om*/

    final Job job = mrjob.getHadoopJob();
    assertEquals(TextInputFormat.class, job.getInputFormatClass());
    assertEquals(BulkImportMapper.class, job.getMapperClass());
    assertEquals(NoopBulkImporter.class,
            job.getConfiguration().getClass(FijiConfKeys.FIJI_BULK_IMPORTER_CLASS, null));
    assertEquals(IdentityReducer.class, job.getReducerClass());
    assertEquals(10, job.getNumReduceTasks());
    assertEquals(FijiHFileOutputFormat.class, job.getOutputFormatClass());
    assertEquals(TotalOrderPartitioner.class, job.getPartitionerClass());
}

From source file:com.moz.fiji.mapreduce.TestFijiBulkImportJobBuilder.java

License:Apache License

@Test
public void testBuildWithKeyValueStore() throws Exception {
    final FijiMapReduceJob mrjob = FijiBulkImportJobBuilder.create().withConf(getConf())
            .withInput(MapReduceJobInputs.newTextMapReduceJobInput(new Path(mTempPath, "input")))
            .withBulkImporter(KVStoreBulkImporter.class).withOutput(MapReduceJobOutputs
                    .newHFileMapReduceJobOutput(mTable.getURI(), new Path(mTempPath, "output"), 10))
            .build();//from ww w.j  a  va 2s  .  c o  m

    final Job job = mrjob.getHadoopJob();
    // Verify that everything else is what we expected as in the previous test
    // (except the bulk importer class name)...
    assertEquals(TextInputFormat.class, job.getInputFormatClass());
    assertEquals(BulkImportMapper.class, job.getMapperClass());
    assertEquals(KVStoreBulkImporter.class,
            job.getConfiguration().getClass(FijiConfKeys.FIJI_BULK_IMPORTER_CLASS, null));
    assertEquals(IdentityReducer.class, job.getReducerClass());
    assertEquals(10, job.getNumReduceTasks());
    assertEquals(FijiHFileOutputFormat.class, job.getOutputFormatClass());
    assertEquals(TotalOrderPartitioner.class, job.getPartitionerClass());

    // KeyValueStore-specific checks here.
    final Configuration confOut = job.getConfiguration();
    assertEquals(1, confOut.getInt(KeyValueStoreConfigSerializer.CONF_KEY_VALUE_STORE_COUNT, 0));
    assertEquals(EmptyKeyValueStore.class.getName(),
            confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0."
                    + KeyValueStoreConfigSerializer.CONF_CLASS));
    assertEquals("foostore", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0."
            + KeyValueStoreConfigSerializer.CONF_NAME));
}

From source file:com.moz.fiji.mapreduce.TestFijiMapReduceJobBuilder.java

License:Apache License

@Test
public void testBuild() throws Exception {
    final FijiMapReduceJob job = FijiMapReduceJobBuilder.create().withConf(mConf)
            .withInput(MapReduceJobInputs.newTextMapReduceJobInput(new Path("/path/to/my/input")))
            .withMapper(MyMapper.class).withReducer(MyReducer.class)
            .withOutput(MapReduceJobOutputs.newTextMapReduceJobOutput(new Path("/path/to/my/output"), 16))
            .build();//from w  w w  .j  av a  2 s .  c o m

    final Job hadoopJob = job.getHadoopJob();
    assertEquals(TextInputFormat.class, hadoopJob.getInputFormatClass());
    assertEquals(MyMapper.class, hadoopJob.getMapperClass());
    assertEquals(MyReducer.class, hadoopJob.getReducerClass());
    assertEquals(16, hadoopJob.getNumReduceTasks());
    assertEquals(TextOutputFormat.class, hadoopJob.getOutputFormatClass());

    // KeyValueStore-specific checks here.
    Configuration confOut = hadoopJob.getConfiguration();
    assertEquals(2, confOut.getInt(KeyValueStoreConfigSerializer.CONF_KEY_VALUE_STORE_COUNT, 0));
    assertEquals(EmptyKeyValueStore.class.getName(),
            confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0."
                    + KeyValueStoreConfigSerializer.CONF_CLASS));
    assertEquals("mapperMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0."
            + KeyValueStoreConfigSerializer.CONF_NAME));
    assertEquals(EmptyKeyValueStore.class.getName(),
            confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1."
                    + KeyValueStoreConfigSerializer.CONF_CLASS));
    assertEquals("reducerMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1."
            + KeyValueStoreConfigSerializer.CONF_NAME));
}

From source file:com.moz.fiji.mapreduce.TestFijiMapReduceJobBuilder.java

License:Apache License

@Test
public void testBuildWithXmlKVStores() throws Exception {
    // Test that we can override default configuration KeyValueStores from an XML file.
    final InputStream xmlStores = Resources.openSystemResource("com.moz.fiji/mapreduce/test-kvstores.xml");

    // This file needs to exist before we build the job, or else
    // we can't build the job; it's referenced by a key-value store that checks
    // for its presence.
    final File tmpFile = new File("/tmp/foo.seq");
    if (tmpFile.createNewFile()) {
        // We created this temp file, we're responsible for deleting it.
        tmpFile.deleteOnExit();//from   ww  w.j av a  2  s . c om
    }

    LOG.info("Building job...");
    final FijiMapReduceJob job = FijiMapReduceJobBuilder.create().withConf(mConf)
            .withInput(MapReduceJobInputs.newTextMapReduceJobInput(new Path("/path/to/my/input")))
            .withMapper(MyMapper.class).withReducer(MyReducer.class)
            .withOutput(MapReduceJobOutputs.newTextMapReduceJobOutput(new Path("/path/to/my/output"), 16))
            .withStoreBindings(xmlStores).build();

    xmlStores.close();

    LOG.info("Verifying job configuration...");
    final Job hadoopJob = job.getHadoopJob();
    assertEquals(TextInputFormat.class, hadoopJob.getInputFormatClass());
    assertEquals(MyMapper.class, hadoopJob.getMapperClass());
    assertEquals(MyReducer.class, hadoopJob.getReducerClass());
    assertEquals(16, hadoopJob.getNumReduceTasks());
    assertEquals(TextOutputFormat.class, hadoopJob.getOutputFormatClass());

    // KeyValueStore-specific checks here.
    // We override mapperMap with a SeqFileKeyValueStore.
    Configuration confOut = hadoopJob.getConfiguration();
    assertEquals(2, confOut.getInt(KeyValueStoreConfigSerializer.CONF_KEY_VALUE_STORE_COUNT, 0));
    assertEquals(SeqFileKeyValueStore.class.getName(),
            confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0."
                    + KeyValueStoreConfigSerializer.CONF_CLASS));
    assertEquals("mapperMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0."
            + KeyValueStoreConfigSerializer.CONF_NAME));
    assertEquals(EmptyKeyValueStore.class.getName(),
            confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1."
                    + KeyValueStoreConfigSerializer.CONF_CLASS));
    assertEquals("reducerMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1."
            + KeyValueStoreConfigSerializer.CONF_NAME));
}