Example usage for org.apache.hadoop.mapreduce Job getNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getNumReduceTasks.

Prototype

public int getNumReduceTasks()

Source Link

Document

Get configured the number of reduce tasks for this job.

Usage

From source file:org.apache.jena.tdbloader4.partitioners.TotalOrderPartitioner.java

License:Apache License

@SuppressWarnings("unchecked")
private void init(String indexName, Configuration conf) {
    log.debug("init({}, {})", indexName, conf);
    try {//from  ww w .j av a  2s . co m
        String parts = getPartitionFile(conf);
        final Path partFile = new Path(parts + "_" + indexName);
        final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? FileSystem.getLocal(conf) // assume in DistributedCache
                : partFile.getFileSystem(conf);
        log.debug("FileSystem is {}", fs);
        Job job = new Job(conf);
        Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
        log.debug("Map output key class is {}", keyClass.getSimpleName());
        K[] splitPoints = readPartitions(fs, partFile, keyClass, conf);
        numReduceTasks = job.getNumReduceTasks();
        log.debug("Found {} split points, number of reducers is {}", splitPoints.length, numReduceTasks);
        if (splitPoints.length != (numReduceTasks / 9) - 1) {
            log.debug("Split points are {} which is different from {}", splitPoints.length,
                    (numReduceTasks / 9) - 1);
            throw new IOException("Wrong number of partitions in keyset");
        }
        RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator();
        for (int i = 0; i < splitPoints.length - 1; ++i) {
            if (comparator.compare(splitPoints[i], splitPoints[i + 1]) >= 0) {
                log.debug("Split points are out of order");
                throw new IOException("Split points are out of order");
            }
        }
        boolean natOrder = conf.getBoolean(NATURAL_ORDER, true);
        Node<?> partitions = null;
        if (natOrder && BinaryComparable.class.isAssignableFrom(keyClass)) {
            partitions = buildTrie((BinaryComparable[]) splitPoints, 0, splitPoints.length, new byte[0],
                    // Now that blocks of identical splitless trie nodes are
                    // represented reentrantly, and we develop a leaf for any trie
                    // node with only one split point, the only reason for a depth
                    // limit is to refute stack overflow or bloat in the pathological
                    // case where the split points are long and mostly look like bytes
                    // iii...iixii...iii . Therefore, we make the default
                    // depth limit large but not huge.
                    conf.getInt(MAX_TRIE_DEPTH, 200));
        } else {
            partitions = new BinarySearchNode(splitPoints, comparator);
        }
        log.debug("Adding {} to {}", partitions, this.partitions);
        this.partitions.put(indexName, partitions);
    } catch (IOException e) {
        throw new IllegalArgumentException("Can't read partitions file", e);
    }
    log.debug("init({}, {}) finished.", indexName, conf);
}

From source file:org.broadinstitute.sting.gatk.hadoop.hadoopsrc.InputSampler.java

License:Apache License

/**
 * Write a partition file for the given job, using the Sampler provided.
 * Queries the sampler for a sample keyset, sorts by the output key
 * comparator, selects the keys for each rank, and writes to the destination
 * returned from {@link TotalOrderPartitioner#getPartitionFile}.
 *///  w  w  w .j  a  v  a 2s  .  com
@SuppressWarnings("unchecked")
// getInputFormat, getOutputKeyComparator
public static <K, V> void writePartitionFile(Job job, Sampler<K, V> sampler)
        throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = job.getConfiguration();
    final InputFormat inf = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
    int numPartitions = job.getNumReduceTasks();
    K[] samples = sampler.getSample(inf, job);
    RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator();

    Arrays.sort(samples, comparator);
    Path dst = new Path(TotalOrderPartitioner.getPartitionFile(conf));
    FileSystem fs = dst.getFileSystem(conf);
    if (fs.exists(dst)) {
        fs.delete(dst, false);
    }
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(),
            NullWritable.class);
    NullWritable nullValue = NullWritable.get();
    float stepSize = samples.length / (float) numPartitions;
    int last = -1;
    for (int i = 1; i < numPartitions; ++i) {
        int k = Math.round(stepSize * i);
        while (last >= k && comparator.compare(samples[last], samples[k]) == 0) {
            ++k;
        }
        writer.append(samples[k], nullValue);
        last = k;
    }
    writer.close();
}

From source file:org.broadinstitute.sting.gatk.hadoop.hadoopsrc.TotalOrderPartitioner.java

License:Apache License

/**
 * Read in the partition file and build indexing data structures. If the
 * keytype is {@link org.apache.hadoop.io.BinaryComparable} and
 * <tt>total.order.partitioner.natural.order</tt> is not false, a trie of
 * the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes
 * will be built. Otherwise, keys will be located using a binary search of
 * the partition keyset using the {@link org.apache.hadoop.io.RawComparator}
 * defined for this job. The input file must be sorted with the same
 * comparator and contain {@link Job#getNumReduceTasks()} - 1 keys.
 *///from   w  w  w.  java2s .  co m
@SuppressWarnings("unchecked")
// keytype from conf not static
public void setConf(Configuration conf) {
    try {
        this.conf = conf;
        String parts = getPartitionFile(conf);
        final Path partFile = new Path(parts);
        final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? FileSystem.getLocal(conf) // assume in DistributedCache
                : partFile.getFileSystem(conf);

        Job job = new Job(conf);
        Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
        K[] splitPoints = readPartitions(fs, partFile, keyClass, conf);
        if (splitPoints.length != job.getNumReduceTasks() - 1) {
            throw new IOException("Wrong number of partitions in keyset");
        }

        RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator();
        for (int i = 0; i < splitPoints.length - 1; ++i) {
            if (comparator.compare(splitPoints[i], splitPoints[i + 1]) > 0) {
                throw new IOException("Split points are out of order");
            }
        }
        boolean natOrder = conf.getBoolean(NATURAL_ORDER, true);
        if (natOrder && BinaryComparable.class.isAssignableFrom(keyClass)) {
            partitions = buildTrie((BinaryComparable[]) splitPoints, 0, splitPoints.length, new byte[0],
                    // Now that blocks of identical splitless trie nodes are
                    // represented reentrantly, and we develop a leaf for
                    // any trie
                    // node with only one split point, the only reason for a
                    // depth
                    // limit is to refute stack overflow or bloat in the
                    // pathological
                    // case where the split points are long and mostly look
                    // like bytes
                    // iii...iixii...iii . Therefore, we make the default
                    // depth
                    // limit large but not huge.
                    conf.getInt(MAX_TRIE_DEPTH, 200));
        } else {
            partitions = new BinarySearchNode(splitPoints, comparator);
        }
    } catch (IOException e) {
        throw new IllegalArgumentException("Can't read partitions file", e);
    }
}

From source file:org.cloudgraph.hbase.mapreduce.GraphMapReduceSetup.java

License:Apache License

/**
 * Ensures that the given number of reduce tasks for the given job
 * configuration does not exceed the number of regions for the given table.
 * /*www . jav a2 s.  c  o m*/
 * @param table
 *          The table to get the region count for.
 * @param job
 *          The current job to adjust.
 * @throws IOException
 *           When retrieving the table details fails.
 */
public static void limitNumReduceTasks(String table, Job job) throws IOException {
    HTable outputTable = new HTable(job.getConfiguration(), table);
    int regions = outputTable.getRegionLocations().size();
    if (job.getNumReduceTasks() > regions)
        job.setNumReduceTasks(regions);
}

From source file:org.imageterrier.indexers.hadoop.HadoopIndexer.java

License:Mozilla Public License

/**
 * Process the arguments and start the map-reduce indexing.
 * //from w  ww .j  av a  2s  . c  o  m
 * @param args
 * @throws Exception
 */
@Override
public int run(String[] args) throws Exception {
    final long time = System.currentTimeMillis();

    final HadoopIndexerOptions options = new HadoopIndexerOptions();
    final CmdLineParser parser = new CmdLineParser(options);
    try {
        parser.parseArgument(args);
    } catch (final CmdLineException e) {
        parser.printUsage(System.err);
        logger.fatal(e.getMessage());
        logger.fatal(usage());
        return 1;
    }

    if (Files.exists(options.getOutputPathString())
            && Index.existsIndex(options.getOutputPathString(), ApplicationSetup.TERRIER_INDEX_PREFIX)) {
        logger.fatal("Cannot index while index exists at " + options.getOutputPathString() + ","
                + ApplicationSetup.TERRIER_INDEX_PREFIX);
        return 1;
    }

    // create job
    final Job job = createJob(options);

    // set args string
    job.getConfiguration().setStrings(INDEXER_ARGS_STRING, args);
    options.configureFilterMode(job.getConfiguration());

    // run job
    JobID jobId = null;
    boolean ranOK = true;
    try {
        ranOK = job.waitForCompletion(true);
        jobId = job.getJobID();
    } catch (final Exception e) {
        logger.error("Problem running job", e);
        ranOK = false;
    }

    if (jobId != null) {
        deleteTaskFiles(options.getOutputPathString(), jobId);
    }

    if (ranOK) {
        if (!options.isDocumentPartitionMode()) {
            if (job.getNumReduceTasks() > 1) {
                mergeLexiconInvertedFiles(options.getOutputPathString(), job.getNumReduceTasks());
            }
        }

        finish(options.getOutputPathString(), options.isDocumentPartitionMode() ? job.getNumReduceTasks() : 1,
                job.getConfiguration());
    }

    System.out.println("Time Taken = " + ((System.currentTimeMillis() - time) / 1000) + " seconds");

    return 0;
}

From source file:org.kiji.mapreduce.TestKijiBulkImportJobBuilder.java

License:Apache License

@Test
public void testBuildWithHFileOutput() throws Exception {
    final MapReduceJob mrjob = KijiBulkImportJobBuilder.create().withConf(getConf())
            .withInput(new TextMapReduceJobInput(new Path(mTempPath, "input")))
            .withBulkImporter(NoopBulkImporter.class)
            .withOutput(new HFileMapReduceJobOutput(mTable, new Path(mTempPath, "output"), 10)).build();

    final Job job = mrjob.getHadoopJob();
    assertEquals(TextInputFormat.class, job.getInputFormatClass());
    assertEquals(BulkImportMapper.class, job.getMapperClass());
    assertEquals(NoopBulkImporter.class,
            job.getConfiguration().getClass(KijiConfKeys.KIJI_BULK_IMPORTER_CLASS, null));
    assertEquals(IdentityReducer.class, job.getReducerClass());
    assertEquals(10, job.getNumReduceTasks());
    assertEquals(KijiHFileOutputFormat.class, job.getOutputFormatClass());
    assertEquals(TotalOrderPartitioner.class, job.getPartitionerClass());
}

From source file:org.kiji.mapreduce.TestKijiBulkImportJobBuilder.java

License:Apache License

@Test
public void testBuildWithKeyValueStore() throws Exception {
    final MapReduceJob mrjob = KijiBulkImportJobBuilder.create().withConf(getConf())
            .withInput(new TextMapReduceJobInput(new Path(mTempPath, "input")))
            .withBulkImporter(KVStoreBulkImporter.class)
            .withOutput(new HFileMapReduceJobOutput(mTable, new Path(mTempPath, "output"), 10)).build();

    final Job job = mrjob.getHadoopJob();
    // Verify that everything else is what we expected as in the previous test
    // (except the bulk importer class name)...
    assertEquals(TextInputFormat.class, job.getInputFormatClass());
    assertEquals(BulkImportMapper.class, job.getMapperClass());
    assertEquals(KVStoreBulkImporter.class,
            job.getConfiguration().getClass(KijiConfKeys.KIJI_BULK_IMPORTER_CLASS, null));
    assertEquals(IdentityReducer.class, job.getReducerClass());
    assertEquals(10, job.getNumReduceTasks());
    assertEquals(KijiHFileOutputFormat.class, job.getOutputFormatClass());
    assertEquals(TotalOrderPartitioner.class, job.getPartitionerClass());

    // KeyValueStore-specific checks here.
    final Configuration confOut = job.getConfiguration();
    assertEquals(1, confOut.getInt(KeyValueStoreConfigSerializer.CONF_KEY_VALUE_STORE_COUNT, 0));
    assertEquals(EmptyKeyValueStore.class.getName(),
            confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0."
                    + KeyValueStoreConfigSerializer.CONF_CLASS));
    assertEquals("foostore", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0."
            + KeyValueStoreConfigSerializer.CONF_NAME));
}

From source file:org.kiji.mapreduce.TestKijiGatherJobBuilder.java

License:Apache License

@Test
public void testGatherToHFile() throws Exception {
    final MapReduceJob gatherJob = KijiGatherJobBuilder.create().withConf(getConf()).withInputTable(mTable)
            .withGatherer(GatherToHFile.class)
            .withOutput(new HFileMapReduceJobOutput(mTable, getLocalTestPath("hfile"), 10)).build();

    final Job job = gatherJob.getHadoopJob();
    final Configuration conf = job.getConfiguration();
    assertEquals(GatherToHFile.class.getName(), conf.get(KijiConfKeys.KIJI_GATHERER_CLASS));
    assertEquals(null, job.getCombinerClass());
    assertEquals(IdentityReducer.class, job.getReducerClass());
    assertEquals(10, job.getNumReduceTasks());
    assertEquals(KijiHFileOutputFormat.class, job.getOutputFormatClass());
    assertEquals(HFileKeyValue.class, job.getOutputKeyClass());
    assertEquals(NullWritable.class, job.getOutputValueClass());
}

From source file:org.kiji.mapreduce.TestKijiGatherJobBuilder.java

License:Apache License

@Test
public void testGatherReducerToHFile() throws Exception {
    final MapReduceJob gatherJob = KijiGatherJobBuilder.create().withConf(getConf()).withInputTable(mTable)
            .withGatherer(SimpleGatherer.class).withReducer(ReducerToHFile.class)
            .withOutput(new HFileMapReduceJobOutput(mTable, getLocalTestPath("hfile"), 10)).build();

    final Job job = gatherJob.getHadoopJob();
    final Configuration conf = job.getConfiguration();
    assertEquals(SimpleGatherer.class.getName(), conf.get(KijiConfKeys.KIJI_GATHERER_CLASS));
    assertEquals(null, job.getCombinerClass());
    assertEquals(ReducerToHFile.class, job.getReducerClass());
    assertEquals(10, job.getNumReduceTasks());
    assertEquals(SequenceFileOutputFormat.class, job.getOutputFormatClass());
    assertEquals(HFileKeyValue.class, job.getOutputKeyClass());
    assertEquals(NullWritable.class, job.getOutputValueClass());
}

From source file:org.kiji.mapreduce.TestKijiMapReduceJobBuilder.java

License:Apache License

@Test
public void testBuild() throws Exception {
    final KijiMapReduceJob job = KijiMapReduceJobBuilder.create().withConf(mConf)
            .withInput(MapReduceJobInputs.newTextMapReduceJobInput(new Path("/path/to/my/input")))
            .withMapper(MyMapper.class).withReducer(MyReducer.class)
            .withOutput(MapReduceJobOutputs.newTextMapReduceJobOutput(new Path("/path/to/my/output"), 16))
            .build();/*w ww .  ja v a 2  s  .  c o  m*/

    final Job hadoopJob = job.getHadoopJob();
    assertEquals(TextInputFormat.class, hadoopJob.getInputFormatClass());
    assertEquals(MyMapper.class, hadoopJob.getMapperClass());
    assertEquals(MyReducer.class, hadoopJob.getReducerClass());
    assertEquals(16, hadoopJob.getNumReduceTasks());
    assertEquals(TextOutputFormat.class, hadoopJob.getOutputFormatClass());

    // KeyValueStore-specific checks here.
    Configuration confOut = hadoopJob.getConfiguration();
    assertEquals(2, confOut.getInt(KeyValueStoreConfigSerializer.CONF_KEY_VALUE_STORE_COUNT, 0));
    assertEquals(EmptyKeyValueStore.class.getName(),
            confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0."
                    + KeyValueStoreConfigSerializer.CONF_CLASS));
    assertEquals("mapperMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0."
            + KeyValueStoreConfigSerializer.CONF_NAME));
    assertEquals(EmptyKeyValueStore.class.getName(),
            confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1."
                    + KeyValueStoreConfigSerializer.CONF_CLASS));
    assertEquals("reducerMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1."
            + KeyValueStoreConfigSerializer.CONF_NAME));
}