List of usage examples for org.apache.hadoop.mapreduce Job getNumReduceTasks
public int getNumReduceTasks()
From source file:org.apache.jena.tdbloader4.partitioners.TotalOrderPartitioner.java
License:Apache License
@SuppressWarnings("unchecked") private void init(String indexName, Configuration conf) { log.debug("init({}, {})", indexName, conf); try {//from ww w .j av a 2s . co m String parts = getPartitionFile(conf); final Path partFile = new Path(parts + "_" + indexName); final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? FileSystem.getLocal(conf) // assume in DistributedCache : partFile.getFileSystem(conf); log.debug("FileSystem is {}", fs); Job job = new Job(conf); Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass(); log.debug("Map output key class is {}", keyClass.getSimpleName()); K[] splitPoints = readPartitions(fs, partFile, keyClass, conf); numReduceTasks = job.getNumReduceTasks(); log.debug("Found {} split points, number of reducers is {}", splitPoints.length, numReduceTasks); if (splitPoints.length != (numReduceTasks / 9) - 1) { log.debug("Split points are {} which is different from {}", splitPoints.length, (numReduceTasks / 9) - 1); throw new IOException("Wrong number of partitions in keyset"); } RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator(); for (int i = 0; i < splitPoints.length - 1; ++i) { if (comparator.compare(splitPoints[i], splitPoints[i + 1]) >= 0) { log.debug("Split points are out of order"); throw new IOException("Split points are out of order"); } } boolean natOrder = conf.getBoolean(NATURAL_ORDER, true); Node<?> partitions = null; if (natOrder && BinaryComparable.class.isAssignableFrom(keyClass)) { partitions = buildTrie((BinaryComparable[]) splitPoints, 0, splitPoints.length, new byte[0], // Now that blocks of identical splitless trie nodes are // represented reentrantly, and we develop a leaf for any trie // node with only one split point, the only reason for a depth // limit is to refute stack overflow or bloat in the pathological // case where the split points are long and mostly look like bytes // iii...iixii...iii . Therefore, we make the default // depth limit large but not huge. conf.getInt(MAX_TRIE_DEPTH, 200)); } else { partitions = new BinarySearchNode(splitPoints, comparator); } log.debug("Adding {} to {}", partitions, this.partitions); this.partitions.put(indexName, partitions); } catch (IOException e) { throw new IllegalArgumentException("Can't read partitions file", e); } log.debug("init({}, {}) finished.", indexName, conf); }
From source file:org.broadinstitute.sting.gatk.hadoop.hadoopsrc.InputSampler.java
License:Apache License
/** * Write a partition file for the given job, using the Sampler provided. * Queries the sampler for a sample keyset, sorts by the output key * comparator, selects the keys for each rank, and writes to the destination * returned from {@link TotalOrderPartitioner#getPartitionFile}. */// w w w .j a v a 2s . com @SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator public static <K, V> void writePartitionFile(Job job, Sampler<K, V> sampler) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = job.getConfiguration(); final InputFormat inf = ReflectionUtils.newInstance(job.getInputFormatClass(), conf); int numPartitions = job.getNumReduceTasks(); K[] samples = sampler.getSample(inf, job); RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator(); Arrays.sort(samples, comparator); Path dst = new Path(TotalOrderPartitioner.getPartitionFile(conf)); FileSystem fs = dst.getFileSystem(conf); if (fs.exists(dst)) { fs.delete(dst, false); } SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(), NullWritable.class); NullWritable nullValue = NullWritable.get(); float stepSize = samples.length / (float) numPartitions; int last = -1; for (int i = 1; i < numPartitions; ++i) { int k = Math.round(stepSize * i); while (last >= k && comparator.compare(samples[last], samples[k]) == 0) { ++k; } writer.append(samples[k], nullValue); last = k; } writer.close(); }
From source file:org.broadinstitute.sting.gatk.hadoop.hadoopsrc.TotalOrderPartitioner.java
License:Apache License
/** * Read in the partition file and build indexing data structures. If the * keytype is {@link org.apache.hadoop.io.BinaryComparable} and * <tt>total.order.partitioner.natural.order</tt> is not false, a trie of * the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes * will be built. Otherwise, keys will be located using a binary search of * the partition keyset using the {@link org.apache.hadoop.io.RawComparator} * defined for this job. The input file must be sorted with the same * comparator and contain {@link Job#getNumReduceTasks()} - 1 keys. *///from w w w. java2s . co m @SuppressWarnings("unchecked") // keytype from conf not static public void setConf(Configuration conf) { try { this.conf = conf; String parts = getPartitionFile(conf); final Path partFile = new Path(parts); final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? FileSystem.getLocal(conf) // assume in DistributedCache : partFile.getFileSystem(conf); Job job = new Job(conf); Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass(); K[] splitPoints = readPartitions(fs, partFile, keyClass, conf); if (splitPoints.length != job.getNumReduceTasks() - 1) { throw new IOException("Wrong number of partitions in keyset"); } RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator(); for (int i = 0; i < splitPoints.length - 1; ++i) { if (comparator.compare(splitPoints[i], splitPoints[i + 1]) > 0) { throw new IOException("Split points are out of order"); } } boolean natOrder = conf.getBoolean(NATURAL_ORDER, true); if (natOrder && BinaryComparable.class.isAssignableFrom(keyClass)) { partitions = buildTrie((BinaryComparable[]) splitPoints, 0, splitPoints.length, new byte[0], // Now that blocks of identical splitless trie nodes are // represented reentrantly, and we develop a leaf for // any trie // node with only one split point, the only reason for a // depth // limit is to refute stack overflow or bloat in the // pathological // case where the split points are long and mostly look // like bytes // iii...iixii...iii . Therefore, we make the default // depth // limit large but not huge. conf.getInt(MAX_TRIE_DEPTH, 200)); } else { partitions = new BinarySearchNode(splitPoints, comparator); } } catch (IOException e) { throw new IllegalArgumentException("Can't read partitions file", e); } }
From source file:org.cloudgraph.hbase.mapreduce.GraphMapReduceSetup.java
License:Apache License
/** * Ensures that the given number of reduce tasks for the given job * configuration does not exceed the number of regions for the given table. * /*www . jav a2 s. c o m*/ * @param table * The table to get the region count for. * @param job * The current job to adjust. * @throws IOException * When retrieving the table details fails. */ public static void limitNumReduceTasks(String table, Job job) throws IOException { HTable outputTable = new HTable(job.getConfiguration(), table); int regions = outputTable.getRegionLocations().size(); if (job.getNumReduceTasks() > regions) job.setNumReduceTasks(regions); }
From source file:org.imageterrier.indexers.hadoop.HadoopIndexer.java
License:Mozilla Public License
/** * Process the arguments and start the map-reduce indexing. * //from w ww .j av a 2s . c o m * @param args * @throws Exception */ @Override public int run(String[] args) throws Exception { final long time = System.currentTimeMillis(); final HadoopIndexerOptions options = new HadoopIndexerOptions(); final CmdLineParser parser = new CmdLineParser(options); try { parser.parseArgument(args); } catch (final CmdLineException e) { parser.printUsage(System.err); logger.fatal(e.getMessage()); logger.fatal(usage()); return 1; } if (Files.exists(options.getOutputPathString()) && Index.existsIndex(options.getOutputPathString(), ApplicationSetup.TERRIER_INDEX_PREFIX)) { logger.fatal("Cannot index while index exists at " + options.getOutputPathString() + "," + ApplicationSetup.TERRIER_INDEX_PREFIX); return 1; } // create job final Job job = createJob(options); // set args string job.getConfiguration().setStrings(INDEXER_ARGS_STRING, args); options.configureFilterMode(job.getConfiguration()); // run job JobID jobId = null; boolean ranOK = true; try { ranOK = job.waitForCompletion(true); jobId = job.getJobID(); } catch (final Exception e) { logger.error("Problem running job", e); ranOK = false; } if (jobId != null) { deleteTaskFiles(options.getOutputPathString(), jobId); } if (ranOK) { if (!options.isDocumentPartitionMode()) { if (job.getNumReduceTasks() > 1) { mergeLexiconInvertedFiles(options.getOutputPathString(), job.getNumReduceTasks()); } } finish(options.getOutputPathString(), options.isDocumentPartitionMode() ? job.getNumReduceTasks() : 1, job.getConfiguration()); } System.out.println("Time Taken = " + ((System.currentTimeMillis() - time) / 1000) + " seconds"); return 0; }
From source file:org.kiji.mapreduce.TestKijiBulkImportJobBuilder.java
License:Apache License
@Test public void testBuildWithHFileOutput() throws Exception { final MapReduceJob mrjob = KijiBulkImportJobBuilder.create().withConf(getConf()) .withInput(new TextMapReduceJobInput(new Path(mTempPath, "input"))) .withBulkImporter(NoopBulkImporter.class) .withOutput(new HFileMapReduceJobOutput(mTable, new Path(mTempPath, "output"), 10)).build(); final Job job = mrjob.getHadoopJob(); assertEquals(TextInputFormat.class, job.getInputFormatClass()); assertEquals(BulkImportMapper.class, job.getMapperClass()); assertEquals(NoopBulkImporter.class, job.getConfiguration().getClass(KijiConfKeys.KIJI_BULK_IMPORTER_CLASS, null)); assertEquals(IdentityReducer.class, job.getReducerClass()); assertEquals(10, job.getNumReduceTasks()); assertEquals(KijiHFileOutputFormat.class, job.getOutputFormatClass()); assertEquals(TotalOrderPartitioner.class, job.getPartitionerClass()); }
From source file:org.kiji.mapreduce.TestKijiBulkImportJobBuilder.java
License:Apache License
@Test public void testBuildWithKeyValueStore() throws Exception { final MapReduceJob mrjob = KijiBulkImportJobBuilder.create().withConf(getConf()) .withInput(new TextMapReduceJobInput(new Path(mTempPath, "input"))) .withBulkImporter(KVStoreBulkImporter.class) .withOutput(new HFileMapReduceJobOutput(mTable, new Path(mTempPath, "output"), 10)).build(); final Job job = mrjob.getHadoopJob(); // Verify that everything else is what we expected as in the previous test // (except the bulk importer class name)... assertEquals(TextInputFormat.class, job.getInputFormatClass()); assertEquals(BulkImportMapper.class, job.getMapperClass()); assertEquals(KVStoreBulkImporter.class, job.getConfiguration().getClass(KijiConfKeys.KIJI_BULK_IMPORTER_CLASS, null)); assertEquals(IdentityReducer.class, job.getReducerClass()); assertEquals(10, job.getNumReduceTasks()); assertEquals(KijiHFileOutputFormat.class, job.getOutputFormatClass()); assertEquals(TotalOrderPartitioner.class, job.getPartitionerClass()); // KeyValueStore-specific checks here. final Configuration confOut = job.getConfiguration(); assertEquals(1, confOut.getInt(KeyValueStoreConfigSerializer.CONF_KEY_VALUE_STORE_COUNT, 0)); assertEquals(EmptyKeyValueStore.class.getName(), confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0." + KeyValueStoreConfigSerializer.CONF_CLASS)); assertEquals("foostore", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0." + KeyValueStoreConfigSerializer.CONF_NAME)); }
From source file:org.kiji.mapreduce.TestKijiGatherJobBuilder.java
License:Apache License
@Test public void testGatherToHFile() throws Exception { final MapReduceJob gatherJob = KijiGatherJobBuilder.create().withConf(getConf()).withInputTable(mTable) .withGatherer(GatherToHFile.class) .withOutput(new HFileMapReduceJobOutput(mTable, getLocalTestPath("hfile"), 10)).build(); final Job job = gatherJob.getHadoopJob(); final Configuration conf = job.getConfiguration(); assertEquals(GatherToHFile.class.getName(), conf.get(KijiConfKeys.KIJI_GATHERER_CLASS)); assertEquals(null, job.getCombinerClass()); assertEquals(IdentityReducer.class, job.getReducerClass()); assertEquals(10, job.getNumReduceTasks()); assertEquals(KijiHFileOutputFormat.class, job.getOutputFormatClass()); assertEquals(HFileKeyValue.class, job.getOutputKeyClass()); assertEquals(NullWritable.class, job.getOutputValueClass()); }
From source file:org.kiji.mapreduce.TestKijiGatherJobBuilder.java
License:Apache License
@Test public void testGatherReducerToHFile() throws Exception { final MapReduceJob gatherJob = KijiGatherJobBuilder.create().withConf(getConf()).withInputTable(mTable) .withGatherer(SimpleGatherer.class).withReducer(ReducerToHFile.class) .withOutput(new HFileMapReduceJobOutput(mTable, getLocalTestPath("hfile"), 10)).build(); final Job job = gatherJob.getHadoopJob(); final Configuration conf = job.getConfiguration(); assertEquals(SimpleGatherer.class.getName(), conf.get(KijiConfKeys.KIJI_GATHERER_CLASS)); assertEquals(null, job.getCombinerClass()); assertEquals(ReducerToHFile.class, job.getReducerClass()); assertEquals(10, job.getNumReduceTasks()); assertEquals(SequenceFileOutputFormat.class, job.getOutputFormatClass()); assertEquals(HFileKeyValue.class, job.getOutputKeyClass()); assertEquals(NullWritable.class, job.getOutputValueClass()); }
From source file:org.kiji.mapreduce.TestKijiMapReduceJobBuilder.java
License:Apache License
@Test public void testBuild() throws Exception { final KijiMapReduceJob job = KijiMapReduceJobBuilder.create().withConf(mConf) .withInput(MapReduceJobInputs.newTextMapReduceJobInput(new Path("/path/to/my/input"))) .withMapper(MyMapper.class).withReducer(MyReducer.class) .withOutput(MapReduceJobOutputs.newTextMapReduceJobOutput(new Path("/path/to/my/output"), 16)) .build();/*w ww . ja v a 2 s . c o m*/ final Job hadoopJob = job.getHadoopJob(); assertEquals(TextInputFormat.class, hadoopJob.getInputFormatClass()); assertEquals(MyMapper.class, hadoopJob.getMapperClass()); assertEquals(MyReducer.class, hadoopJob.getReducerClass()); assertEquals(16, hadoopJob.getNumReduceTasks()); assertEquals(TextOutputFormat.class, hadoopJob.getOutputFormatClass()); // KeyValueStore-specific checks here. Configuration confOut = hadoopJob.getConfiguration(); assertEquals(2, confOut.getInt(KeyValueStoreConfigSerializer.CONF_KEY_VALUE_STORE_COUNT, 0)); assertEquals(EmptyKeyValueStore.class.getName(), confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0." + KeyValueStoreConfigSerializer.CONF_CLASS)); assertEquals("mapperMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0." + KeyValueStoreConfigSerializer.CONF_NAME)); assertEquals(EmptyKeyValueStore.class.getName(), confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1." + KeyValueStoreConfigSerializer.CONF_CLASS)); assertEquals("reducerMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1." + KeyValueStoreConfigSerializer.CONF_NAME)); }