Example usage for org.apache.hadoop.mapreduce Job getNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getNumReduceTasks.

Prototype

public int getNumReduceTasks()

Source Link

Document

Get configured the number of reduce tasks for this job.

Usage

From source file:org.kiji.mapreduce.TestKijiMapReduceJobBuilder.java

License:Apache License

@Test
public void testBuildWithXmlKVStores() throws Exception {
    // Test that we can override default configuration KeyValueStores from an XML file.
    final InputStream xmlStores = Resources.openSystemResource("org/kiji/mapreduce/test-kvstores.xml");

    // This file needs to exist before we build the job, or else
    // we can't build the job; it's referenced by a key-value store that checks
    // for its presence.
    final File tmpFile = new File("/tmp/foo.seq");
    if (tmpFile.createNewFile()) {
        // We created this temp file, we're responsible for deleting it.
        tmpFile.deleteOnExit();/*from   w w w .  j av a  2 s.c  o m*/
    }

    LOG.info("Building job...");
    final KijiMapReduceJob job = KijiMapReduceJobBuilder.create().withConf(mConf)
            .withInput(MapReduceJobInputs.newTextMapReduceJobInput(new Path("/path/to/my/input")))
            .withMapper(MyMapper.class).withReducer(MyReducer.class)
            .withOutput(MapReduceJobOutputs.newTextMapReduceJobOutput(new Path("/path/to/my/output"), 16))
            .withStoreBindings(xmlStores).build();

    xmlStores.close();

    LOG.info("Verifying job configuration...");
    final Job hadoopJob = job.getHadoopJob();
    assertEquals(TextInputFormat.class, hadoopJob.getInputFormatClass());
    assertEquals(MyMapper.class, hadoopJob.getMapperClass());
    assertEquals(MyReducer.class, hadoopJob.getReducerClass());
    assertEquals(16, hadoopJob.getNumReduceTasks());
    assertEquals(TextOutputFormat.class, hadoopJob.getOutputFormatClass());

    // KeyValueStore-specific checks here.
    // We override mapperMap with a SeqFileKeyValueStore.
    Configuration confOut = hadoopJob.getConfiguration();
    assertEquals(2, confOut.getInt(KeyValueStoreConfigSerializer.CONF_KEY_VALUE_STORE_COUNT, 0));
    assertEquals(SeqFileKeyValueStore.class.getName(),
            confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0."
                    + KeyValueStoreConfigSerializer.CONF_CLASS));
    assertEquals("mapperMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0."
            + KeyValueStoreConfigSerializer.CONF_NAME));
    assertEquals(EmptyKeyValueStore.class.getName(),
            confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1."
                    + KeyValueStoreConfigSerializer.CONF_CLASS));
    assertEquals("reducerMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1."
            + KeyValueStoreConfigSerializer.CONF_NAME));
}

From source file:org.kiji.mapreduce.TestKijiProduceJobBuilder.java

License:Apache License

@Test
public void testBuildWithHFileOutput() throws ClassNotFoundException, IOException {

    final MapReduceJob produceJob = KijiProduceJobBuilder.create().withConf(getConf()).withInputTable(mTable)
            .withProducer(MyProducer.class)
            .withOutput(new HFileMapReduceJobOutput(mTable, new Path("foo/bar"), 10)).build();

    // Verify that the MR Job was configured correctly.
    final Job job = produceJob.getHadoopJob();
    assertEquals(KijiTableInputFormat.class, job.getInputFormatClass());
    assertEquals(ProduceMapper.class, job.getMapperClass());
    assertEquals(MyProducer.class, job.getConfiguration().getClass(KijiConfKeys.KIJI_PRODUCER_CLASS, null));
    assertEquals(10, job.getNumReduceTasks());
    assertEquals(KijiHFileOutputFormat.class, job.getOutputFormatClass());
}

From source file:org.kiji.mapreduce.TestKijiTransformJobBuilder.java

License:Apache License

@Test
public void testBuild() throws Exception {
    final MapReduceJob job = KijiTransformJobBuilder.create().withConf(mConf)
            .withInput(new TextMapReduceJobInput(new Path("/path/to/my/input"))).withMapper(MyMapper.class)
            .withReducer(MyReducer.class)
            .withOutput(new TextMapReduceJobOutput(new Path("/path/to/my/output"), 16)).build();

    final Job hadoopJob = job.getHadoopJob();
    assertEquals(TextInputFormat.class, hadoopJob.getInputFormatClass());
    assertEquals(MyMapper.class, hadoopJob.getMapperClass());
    assertEquals(MyReducer.class, hadoopJob.getReducerClass());
    assertEquals(16, hadoopJob.getNumReduceTasks());
    assertEquals(TextOutputFormat.class, hadoopJob.getOutputFormatClass());

    // KeyValueStore-specific checks here.
    Configuration confOut = hadoopJob.getConfiguration();
    assertEquals(2, confOut.getInt(KeyValueStoreConfigSerializer.CONF_KEY_VALUE_STORE_COUNT, 0));
    assertEquals(EmptyKeyValueStore.class.getName(),
            confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0."
                    + KeyValueStoreConfigSerializer.CONF_CLASS));
    assertEquals("mapperMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0."
            + KeyValueStoreConfigSerializer.CONF_NAME));
    assertEquals(EmptyKeyValueStore.class.getName(),
            confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1."
                    + KeyValueStoreConfigSerializer.CONF_CLASS));
    assertEquals("reducerMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1."
            + KeyValueStoreConfigSerializer.CONF_NAME));
}

From source file:org.kiji.mapreduce.TestKijiTransformJobBuilder.java

License:Apache License

@Test
public void testBuildWithXmlKVStores() throws Exception {
    // Test that we can override default configuration KeyValueStores from an XML file.
    final InputStream xmlStores = Resources.openSystemResource("org/kiji/mapreduce/test-kvstores.xml");

    // This file needs to exist before we build the job, or else
    // we can't build the job; it's referenced by a key-value store that checks
    // for its presence.
    final File tmpFile = new File("/tmp/foo.seq");
    if (tmpFile.createNewFile()) {
        // We created this temp file, we're responsible for deleting it.
        tmpFile.deleteOnExit();// w w w. j  a  v a2s  . c  o  m
    }

    LOG.info("Building job...");
    final MapReduceJob job = KijiTransformJobBuilder.create().withConf(mConf)
            .withInput(new TextMapReduceJobInput(new Path("/path/to/my/input"))).withMapper(MyMapper.class)
            .withReducer(MyReducer.class)
            .withOutput(new TextMapReduceJobOutput(new Path("/path/to/my/output"), 16))
            .withStoreBindings(xmlStores).build();

    xmlStores.close();

    LOG.info("Verifying job configuration...");
    final Job hadoopJob = job.getHadoopJob();
    assertEquals(TextInputFormat.class, hadoopJob.getInputFormatClass());
    assertEquals(MyMapper.class, hadoopJob.getMapperClass());
    assertEquals(MyReducer.class, hadoopJob.getReducerClass());
    assertEquals(16, hadoopJob.getNumReduceTasks());
    assertEquals(TextOutputFormat.class, hadoopJob.getOutputFormatClass());

    // KeyValueStore-specific checks here.
    // We override mapperMap with a SeqFileKeyValueStore.
    Configuration confOut = hadoopJob.getConfiguration();
    assertEquals(2, confOut.getInt(KeyValueStoreConfigSerializer.CONF_KEY_VALUE_STORE_COUNT, 0));
    assertEquals(SeqFileKeyValueStore.class.getName(),
            confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0."
                    + KeyValueStoreConfigSerializer.CONF_CLASS));
    assertEquals("mapperMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0."
            + KeyValueStoreConfigSerializer.CONF_NAME));
    assertEquals(EmptyKeyValueStore.class.getName(),
            confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1."
                    + KeyValueStoreConfigSerializer.CONF_CLASS));
    assertEquals("reducerMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1."
            + KeyValueStoreConfigSerializer.CONF_NAME));
}

From source file:org.mrgeo.hdfs.partitioners.TileIdPartitioner.java

License:Apache License

public static Path setup(final Job job, final SplitGenerator splitGenerator) throws IOException {
    // don't set up a partitioner in local mode
    if (HadoopUtils.isLocal(job.getConfiguration())) {
        // make sure we have at least 1 reducer...
        if (job.getNumReduceTasks() < 1) {
            job.setNumReduceTasks(1);//www .  jav a2  s .com
        }
        return null;
    }

    PartitionerSplit splits = new PartitionerSplit();

    splits.generateSplits(splitGenerator);

    // create a split file in the hadoop tmp directory
    // this is copied into the job's output directory upon job completion
    final int uniquePrefixLen = 5;
    Path splitFile = new Path(HadoopFileUtils.getTempDir(job.getConfiguration()),
            HadoopUtils.createRandomString(uniquePrefixLen) + "_" + PartitionerSplit.SPLIT_FILE);

    splits.writeSplits(splitFile);

    job.setNumReduceTasks(splits.length());
    job.setPartitionerClass(TileIdPartitioner.class);

    setSplitFile(splitFile.toString(), job);

    return splitFile;
}

From source file:org.mrgeo.hdfs.partitioners.TileIdPartitionerTest.java

License:Apache License

@Test
@Category(UnitTest.class)
public void setupPartitionerLocal() throws IOException {
    Job job = Job.getInstance(conf, testName.getMethodName());
    FileSystem fs = HadoopFileUtils.getFileSystem(conf);

    SplitGenerator sg = new TestGenerator();

    Path splitfile = TileIdPartitioner.setup(job, sg);

    Assert.assertEquals("Bad number of reducers", 1, job.getNumReduceTasks());
    Assert.assertNull("Splitfile should be null", splitfile);

}

From source file:org.mrgeo.hdfs.partitioners.TileIdPartitionerTest.java

License:Apache License

@Test
@Category(UnitTest.class)
public void setupPartitioner() throws IOException {
    conf = HadoopUtils.createConfiguration();

    Job job = Job.getInstance(conf, testName.getMethodName());
    conf = job.getConfiguration();//w  ww .  j  a v  a 2 s. c o m

    FileSystem fs = HadoopFileUtils.getFileSystem(conf);

    SplitGenerator sg = new TestGenerator();

    Path splitfile = TileIdPartitioner.setup(job, sg);

    Assert.assertTrue("Reducers should be greater than zero", job.getNumReduceTasks() > 0);
    Assert.assertNotNull("Splitfile should not be null", splitfile);

    Assert.assertEquals("TileIdPartitioner.splitFile not set", splitfile.toString(),
            conf.get("TileIdPartitioner.splitFile", null));
    Assert.assertTrue("TileIdPartitioner.useDistributedCache should be set",
            conf.getBoolean("TileIdPartitioner.useDistributedCache", false));

    URI files[] = job.getCacheFiles();
    Assert.assertEquals("Cache files should have 1 file", 1, files.length);

    Assert.assertEquals("Cache file name wrong", splitfile.toString(), files[0].toString());

    Assert.assertTrue("Partition Splits directory does not exist", fs.exists(splitfile));
    Assert.assertTrue("Partition Splits file does not exist",
            fs.exists(new Path(splitfile, PartitionerSplit.SPLIT_FILE)));

    Splits splits = new PartitionerSplit();
    splits.readSplits(splitfile);

    PartitionerSplit.PartitionerSplitInfo[] si = (PartitionerSplit.PartitionerSplitInfo[]) splits.getSplits();

    Assert.assertEquals("Splits length not correct", generated.length, si.length);
    for (int i = 0; i < generated.length; i++) {
        Assert.assertEquals("Splits entry not correct", generated[i].longValue(), si[i].getTileId());
    }

    fs.delete(splitfile, true);
}

From source file:org.mrgeo.hdfs.partitioners.TileIdPartitionerTest.java

License:Apache License

@Test
@Category(UnitTest.class)
public void getPartition() throws IOException {
    conf = HadoopUtils.createConfiguration();

    Job job = Job.getInstance(conf, testName.getMethodName());
    conf = job.getConfiguration();/*  w  w  w .j  a v  a2s. c o  m*/

    FileSystem fs = HadoopFileUtils.getFileSystem(conf);

    SplitGenerator sg = new TestGenerator();

    Path splitfile = TileIdPartitioner.setup(job, sg);

    Assert.assertTrue("Reducers should be greater than zero", job.getNumReduceTasks() > 0);
    Assert.assertNotNull("Splitfile should not be null", splitfile);

    TileIdPartitioner<TileIdWritable, RasterWritable> partitioner = new TileIdPartitioner<TileIdWritable, RasterWritable>();

    partitioner.setConf(conf);

    TileIdWritable key = new TileIdWritable();
    RasterWritable value = new RasterWritable();

    int partitions = generated.length;
    int partition;

    Random rand = new Random();
    for (int i = 0; i < 1000; i++) {
        long test = rand.nextInt(generated[generated.length - 1].intValue());
        long testPartition = findSplit(generated, test);

        key.set(test);

        // test the 3 ways of getting a partition
        partition = partitioner.getPartition(key, value, partitions);
        Assert.assertEquals("Splits entry not correct", testPartition, partition);

        partition = partitioner.getPartition(key);
        Assert.assertEquals("Splits entry not correct", testPartition, partition);
    }

    fs.delete(splitfile, true);
}

From source file:Qn3.TotalOrderPartitioner.java

License:Apache License

/**
 * Read in the partition file and build indexing data structures.
 * If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and
 * <tt>total.order.partitioner.natural.order</tt> is not false, a trie
 * of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes
 * will be built. Otherwise, keys will be located using a binary search of
 * the partition keyset using the {@link org.apache.hadoop.io.RawComparator}
 * defined for this job. The input file must be sorted with the same
 * comparator and contain {@link Job#getNumReduceTasks()} - 1 keys.
 *//*from w w  w.  ja va  2  s  .c om*/
@SuppressWarnings("unchecked") // keytype from conf not static
public void setConf(Configuration conf) {
    try {
        this.conf = conf;
        String parts = getPartitionFile(conf);
        final Path partFile = new Path(parts);
        final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? FileSystem.getLocal(conf) // assume in DistributedCache
                : partFile.getFileSystem(conf);

        Job job = new Job(conf);
        //      Class<K> keyClass = (Class<K>)job.getMapOutputKeyClass();
        Class<K> keyClass = (Class<K>) LongWritable.class;
        K[] splitPoints = readPartitions(fs, partFile, keyClass, conf);
        LOG.info("Number of Reducer " + job.getNumReduceTasks());
        if (splitPoints.length != job.getNumReduceTasks() - 1) {
            throw new IOException("Wrong number of partitions in keyset");
        }
        /*RawComparator<K> comparator =
          (RawComparator<K>) job.getSortComparator();*/
        RawComparator<K> comparator = (RawComparator<K>) WritableComparator.get(LongWritable.class);
        for (int i = 0; i < splitPoints.length - 1; ++i) {
            if (comparator.compare(splitPoints[i], splitPoints[i + 1]) >= 0) {
                throw new IOException("Split points are out of order");
            }
        }
        boolean natOrder = conf.getBoolean(NATURAL_ORDER, true);
        if (natOrder && BinaryComparable.class.isAssignableFrom(keyClass)) {
            partitions = buildTrie((BinaryComparable[]) splitPoints, 0, splitPoints.length, new byte[0],
                    // Now that blocks of identical splitless trie nodes are 
                    // represented reentrantly, and we develop a leaf for any trie
                    // node with only one split point, the only reason for a depth
                    // limit is to refute stack overflow or bloat in the pathological
                    // case where the split points are long and mostly look like bytes 
                    // iii...iixii...iii   .  Therefore, we make the default depth
                    // limit large but not huge.
                    conf.getInt(MAX_TRIE_DEPTH, 200));
        } else {
            partitions = new BinarySearchNode(splitPoints, comparator);
        }
    } catch (IOException e) {
        throw new IllegalArgumentException("Can't read partitions file", e);
    }
}

From source file:sampler.TotalOrderPartitioner.java

License:Open Source License

/**
 * Read in the partition file and build indexing data structures.
 * If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and
 * <tt>total.order.partitioner.natural.order</tt> is not false, a trie
 * of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes
 * will be built. Otherwise, keys will be located using a binary search of
 * the partition keyset using the {@link org.apache.hadoop.io.RawComparator}
 * defined for this job. The input file must be sorted with the same
 * comparator and contain {@link Job#getNumReduceTasks()} - 1 keys.
 *//*from  w w w  .j a  v  a  2  s .  co  m*/
@SuppressWarnings("unchecked") // keytype from conf not static
public void setConf(Configuration conf) {
    try {
        this.conf = conf;
        String parts = getPartitionFile(conf);
        final Path partFile = new Path(parts);
        final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? FileSystem.getLocal(conf) // assume in DistributedCache
                : partFile.getFileSystem(conf);

        Job job = new Job(conf);
        Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
        K[] splitPoints = readPartitions(fs, partFile, keyClass, conf);
        if (splitPoints.length != job.getNumReduceTasks() - 1) {
            System.out.println(job.getNumReduceTasks());
            System.out.println(splitPoints.length);
            throw new IOException("Wrong number of partitions in keyset:" + splitPoints.length);
        }
        RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator();
        for (int i = 0; i < splitPoints.length - 1; ++i) {
            if (comparator.compare(splitPoints[i], splitPoints[i + 1]) >= 0) {
                throw new IOException("Split points are out of order");
            }
        }
        boolean natOrder = conf.getBoolean(NATURAL_ORDER, true);
        if (natOrder && BinaryComparable.class.isAssignableFrom(keyClass)) {
            partitions = buildTrie((BinaryComparable[]) splitPoints, 0, splitPoints.length, new byte[0],
                    // Now that blocks of identical splitless trie nodes are 
                    // represented reentrantly, and we develop a leaf for any trie
                    // node with only one split point, the only reason for a depth
                    // limit is to refute stack overflow or bloat in the pathological
                    // case where the split points are long and mostly look like bytes 
                    // iii...iixii...iii   .  Therefore, we make the default depth
                    // limit large but not huge.
                    conf.getInt(MAX_TRIE_DEPTH, 200));
        } else {
            partitions = new BinarySearchNode(splitPoints, comparator);
        }
    } catch (IOException e) {
        throw new IllegalArgumentException("Can't read partitions file", e);
    }
}