List of usage examples for org.apache.hadoop.mapreduce Job getNumReduceTasks
public int getNumReduceTasks()
From source file:org.kiji.mapreduce.TestKijiMapReduceJobBuilder.java
License:Apache License
@Test public void testBuildWithXmlKVStores() throws Exception { // Test that we can override default configuration KeyValueStores from an XML file. final InputStream xmlStores = Resources.openSystemResource("org/kiji/mapreduce/test-kvstores.xml"); // This file needs to exist before we build the job, or else // we can't build the job; it's referenced by a key-value store that checks // for its presence. final File tmpFile = new File("/tmp/foo.seq"); if (tmpFile.createNewFile()) { // We created this temp file, we're responsible for deleting it. tmpFile.deleteOnExit();/*from w w w . j av a 2 s.c o m*/ } LOG.info("Building job..."); final KijiMapReduceJob job = KijiMapReduceJobBuilder.create().withConf(mConf) .withInput(MapReduceJobInputs.newTextMapReduceJobInput(new Path("/path/to/my/input"))) .withMapper(MyMapper.class).withReducer(MyReducer.class) .withOutput(MapReduceJobOutputs.newTextMapReduceJobOutput(new Path("/path/to/my/output"), 16)) .withStoreBindings(xmlStores).build(); xmlStores.close(); LOG.info("Verifying job configuration..."); final Job hadoopJob = job.getHadoopJob(); assertEquals(TextInputFormat.class, hadoopJob.getInputFormatClass()); assertEquals(MyMapper.class, hadoopJob.getMapperClass()); assertEquals(MyReducer.class, hadoopJob.getReducerClass()); assertEquals(16, hadoopJob.getNumReduceTasks()); assertEquals(TextOutputFormat.class, hadoopJob.getOutputFormatClass()); // KeyValueStore-specific checks here. // We override mapperMap with a SeqFileKeyValueStore. Configuration confOut = hadoopJob.getConfiguration(); assertEquals(2, confOut.getInt(KeyValueStoreConfigSerializer.CONF_KEY_VALUE_STORE_COUNT, 0)); assertEquals(SeqFileKeyValueStore.class.getName(), confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0." + KeyValueStoreConfigSerializer.CONF_CLASS)); assertEquals("mapperMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0." + KeyValueStoreConfigSerializer.CONF_NAME)); assertEquals(EmptyKeyValueStore.class.getName(), confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1." + KeyValueStoreConfigSerializer.CONF_CLASS)); assertEquals("reducerMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1." + KeyValueStoreConfigSerializer.CONF_NAME)); }
From source file:org.kiji.mapreduce.TestKijiProduceJobBuilder.java
License:Apache License
@Test public void testBuildWithHFileOutput() throws ClassNotFoundException, IOException { final MapReduceJob produceJob = KijiProduceJobBuilder.create().withConf(getConf()).withInputTable(mTable) .withProducer(MyProducer.class) .withOutput(new HFileMapReduceJobOutput(mTable, new Path("foo/bar"), 10)).build(); // Verify that the MR Job was configured correctly. final Job job = produceJob.getHadoopJob(); assertEquals(KijiTableInputFormat.class, job.getInputFormatClass()); assertEquals(ProduceMapper.class, job.getMapperClass()); assertEquals(MyProducer.class, job.getConfiguration().getClass(KijiConfKeys.KIJI_PRODUCER_CLASS, null)); assertEquals(10, job.getNumReduceTasks()); assertEquals(KijiHFileOutputFormat.class, job.getOutputFormatClass()); }
From source file:org.kiji.mapreduce.TestKijiTransformJobBuilder.java
License:Apache License
@Test public void testBuild() throws Exception { final MapReduceJob job = KijiTransformJobBuilder.create().withConf(mConf) .withInput(new TextMapReduceJobInput(new Path("/path/to/my/input"))).withMapper(MyMapper.class) .withReducer(MyReducer.class) .withOutput(new TextMapReduceJobOutput(new Path("/path/to/my/output"), 16)).build(); final Job hadoopJob = job.getHadoopJob(); assertEquals(TextInputFormat.class, hadoopJob.getInputFormatClass()); assertEquals(MyMapper.class, hadoopJob.getMapperClass()); assertEquals(MyReducer.class, hadoopJob.getReducerClass()); assertEquals(16, hadoopJob.getNumReduceTasks()); assertEquals(TextOutputFormat.class, hadoopJob.getOutputFormatClass()); // KeyValueStore-specific checks here. Configuration confOut = hadoopJob.getConfiguration(); assertEquals(2, confOut.getInt(KeyValueStoreConfigSerializer.CONF_KEY_VALUE_STORE_COUNT, 0)); assertEquals(EmptyKeyValueStore.class.getName(), confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0." + KeyValueStoreConfigSerializer.CONF_CLASS)); assertEquals("mapperMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0." + KeyValueStoreConfigSerializer.CONF_NAME)); assertEquals(EmptyKeyValueStore.class.getName(), confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1." + KeyValueStoreConfigSerializer.CONF_CLASS)); assertEquals("reducerMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1." + KeyValueStoreConfigSerializer.CONF_NAME)); }
From source file:org.kiji.mapreduce.TestKijiTransformJobBuilder.java
License:Apache License
@Test public void testBuildWithXmlKVStores() throws Exception { // Test that we can override default configuration KeyValueStores from an XML file. final InputStream xmlStores = Resources.openSystemResource("org/kiji/mapreduce/test-kvstores.xml"); // This file needs to exist before we build the job, or else // we can't build the job; it's referenced by a key-value store that checks // for its presence. final File tmpFile = new File("/tmp/foo.seq"); if (tmpFile.createNewFile()) { // We created this temp file, we're responsible for deleting it. tmpFile.deleteOnExit();// w w w. j a v a2s . c o m } LOG.info("Building job..."); final MapReduceJob job = KijiTransformJobBuilder.create().withConf(mConf) .withInput(new TextMapReduceJobInput(new Path("/path/to/my/input"))).withMapper(MyMapper.class) .withReducer(MyReducer.class) .withOutput(new TextMapReduceJobOutput(new Path("/path/to/my/output"), 16)) .withStoreBindings(xmlStores).build(); xmlStores.close(); LOG.info("Verifying job configuration..."); final Job hadoopJob = job.getHadoopJob(); assertEquals(TextInputFormat.class, hadoopJob.getInputFormatClass()); assertEquals(MyMapper.class, hadoopJob.getMapperClass()); assertEquals(MyReducer.class, hadoopJob.getReducerClass()); assertEquals(16, hadoopJob.getNumReduceTasks()); assertEquals(TextOutputFormat.class, hadoopJob.getOutputFormatClass()); // KeyValueStore-specific checks here. // We override mapperMap with a SeqFileKeyValueStore. Configuration confOut = hadoopJob.getConfiguration(); assertEquals(2, confOut.getInt(KeyValueStoreConfigSerializer.CONF_KEY_VALUE_STORE_COUNT, 0)); assertEquals(SeqFileKeyValueStore.class.getName(), confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0." + KeyValueStoreConfigSerializer.CONF_CLASS)); assertEquals("mapperMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0." + KeyValueStoreConfigSerializer.CONF_NAME)); assertEquals(EmptyKeyValueStore.class.getName(), confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1." + KeyValueStoreConfigSerializer.CONF_CLASS)); assertEquals("reducerMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1." + KeyValueStoreConfigSerializer.CONF_NAME)); }
From source file:org.mrgeo.hdfs.partitioners.TileIdPartitioner.java
License:Apache License
public static Path setup(final Job job, final SplitGenerator splitGenerator) throws IOException { // don't set up a partitioner in local mode if (HadoopUtils.isLocal(job.getConfiguration())) { // make sure we have at least 1 reducer... if (job.getNumReduceTasks() < 1) { job.setNumReduceTasks(1);//www . jav a2 s .com } return null; } PartitionerSplit splits = new PartitionerSplit(); splits.generateSplits(splitGenerator); // create a split file in the hadoop tmp directory // this is copied into the job's output directory upon job completion final int uniquePrefixLen = 5; Path splitFile = new Path(HadoopFileUtils.getTempDir(job.getConfiguration()), HadoopUtils.createRandomString(uniquePrefixLen) + "_" + PartitionerSplit.SPLIT_FILE); splits.writeSplits(splitFile); job.setNumReduceTasks(splits.length()); job.setPartitionerClass(TileIdPartitioner.class); setSplitFile(splitFile.toString(), job); return splitFile; }
From source file:org.mrgeo.hdfs.partitioners.TileIdPartitionerTest.java
License:Apache License
@Test @Category(UnitTest.class) public void setupPartitionerLocal() throws IOException { Job job = Job.getInstance(conf, testName.getMethodName()); FileSystem fs = HadoopFileUtils.getFileSystem(conf); SplitGenerator sg = new TestGenerator(); Path splitfile = TileIdPartitioner.setup(job, sg); Assert.assertEquals("Bad number of reducers", 1, job.getNumReduceTasks()); Assert.assertNull("Splitfile should be null", splitfile); }
From source file:org.mrgeo.hdfs.partitioners.TileIdPartitionerTest.java
License:Apache License
@Test @Category(UnitTest.class) public void setupPartitioner() throws IOException { conf = HadoopUtils.createConfiguration(); Job job = Job.getInstance(conf, testName.getMethodName()); conf = job.getConfiguration();//w ww . j a v a 2 s. c o m FileSystem fs = HadoopFileUtils.getFileSystem(conf); SplitGenerator sg = new TestGenerator(); Path splitfile = TileIdPartitioner.setup(job, sg); Assert.assertTrue("Reducers should be greater than zero", job.getNumReduceTasks() > 0); Assert.assertNotNull("Splitfile should not be null", splitfile); Assert.assertEquals("TileIdPartitioner.splitFile not set", splitfile.toString(), conf.get("TileIdPartitioner.splitFile", null)); Assert.assertTrue("TileIdPartitioner.useDistributedCache should be set", conf.getBoolean("TileIdPartitioner.useDistributedCache", false)); URI files[] = job.getCacheFiles(); Assert.assertEquals("Cache files should have 1 file", 1, files.length); Assert.assertEquals("Cache file name wrong", splitfile.toString(), files[0].toString()); Assert.assertTrue("Partition Splits directory does not exist", fs.exists(splitfile)); Assert.assertTrue("Partition Splits file does not exist", fs.exists(new Path(splitfile, PartitionerSplit.SPLIT_FILE))); Splits splits = new PartitionerSplit(); splits.readSplits(splitfile); PartitionerSplit.PartitionerSplitInfo[] si = (PartitionerSplit.PartitionerSplitInfo[]) splits.getSplits(); Assert.assertEquals("Splits length not correct", generated.length, si.length); for (int i = 0; i < generated.length; i++) { Assert.assertEquals("Splits entry not correct", generated[i].longValue(), si[i].getTileId()); } fs.delete(splitfile, true); }
From source file:org.mrgeo.hdfs.partitioners.TileIdPartitionerTest.java
License:Apache License
@Test @Category(UnitTest.class) public void getPartition() throws IOException { conf = HadoopUtils.createConfiguration(); Job job = Job.getInstance(conf, testName.getMethodName()); conf = job.getConfiguration();/* w w w .j a v a2s. c o m*/ FileSystem fs = HadoopFileUtils.getFileSystem(conf); SplitGenerator sg = new TestGenerator(); Path splitfile = TileIdPartitioner.setup(job, sg); Assert.assertTrue("Reducers should be greater than zero", job.getNumReduceTasks() > 0); Assert.assertNotNull("Splitfile should not be null", splitfile); TileIdPartitioner<TileIdWritable, RasterWritable> partitioner = new TileIdPartitioner<TileIdWritable, RasterWritable>(); partitioner.setConf(conf); TileIdWritable key = new TileIdWritable(); RasterWritable value = new RasterWritable(); int partitions = generated.length; int partition; Random rand = new Random(); for (int i = 0; i < 1000; i++) { long test = rand.nextInt(generated[generated.length - 1].intValue()); long testPartition = findSplit(generated, test); key.set(test); // test the 3 ways of getting a partition partition = partitioner.getPartition(key, value, partitions); Assert.assertEquals("Splits entry not correct", testPartition, partition); partition = partitioner.getPartition(key); Assert.assertEquals("Splits entry not correct", testPartition, partition); } fs.delete(splitfile, true); }
From source file:Qn3.TotalOrderPartitioner.java
License:Apache License
/** * Read in the partition file and build indexing data structures. * If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and * <tt>total.order.partitioner.natural.order</tt> is not false, a trie * of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes * will be built. Otherwise, keys will be located using a binary search of * the partition keyset using the {@link org.apache.hadoop.io.RawComparator} * defined for this job. The input file must be sorted with the same * comparator and contain {@link Job#getNumReduceTasks()} - 1 keys. *//*from w w w. ja va 2 s .c om*/ @SuppressWarnings("unchecked") // keytype from conf not static public void setConf(Configuration conf) { try { this.conf = conf; String parts = getPartitionFile(conf); final Path partFile = new Path(parts); final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? FileSystem.getLocal(conf) // assume in DistributedCache : partFile.getFileSystem(conf); Job job = new Job(conf); // Class<K> keyClass = (Class<K>)job.getMapOutputKeyClass(); Class<K> keyClass = (Class<K>) LongWritable.class; K[] splitPoints = readPartitions(fs, partFile, keyClass, conf); LOG.info("Number of Reducer " + job.getNumReduceTasks()); if (splitPoints.length != job.getNumReduceTasks() - 1) { throw new IOException("Wrong number of partitions in keyset"); } /*RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator();*/ RawComparator<K> comparator = (RawComparator<K>) WritableComparator.get(LongWritable.class); for (int i = 0; i < splitPoints.length - 1; ++i) { if (comparator.compare(splitPoints[i], splitPoints[i + 1]) >= 0) { throw new IOException("Split points are out of order"); } } boolean natOrder = conf.getBoolean(NATURAL_ORDER, true); if (natOrder && BinaryComparable.class.isAssignableFrom(keyClass)) { partitions = buildTrie((BinaryComparable[]) splitPoints, 0, splitPoints.length, new byte[0], // Now that blocks of identical splitless trie nodes are // represented reentrantly, and we develop a leaf for any trie // node with only one split point, the only reason for a depth // limit is to refute stack overflow or bloat in the pathological // case where the split points are long and mostly look like bytes // iii...iixii...iii . Therefore, we make the default depth // limit large but not huge. conf.getInt(MAX_TRIE_DEPTH, 200)); } else { partitions = new BinarySearchNode(splitPoints, comparator); } } catch (IOException e) { throw new IllegalArgumentException("Can't read partitions file", e); } }
From source file:sampler.TotalOrderPartitioner.java
License:Open Source License
/** * Read in the partition file and build indexing data structures. * If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and * <tt>total.order.partitioner.natural.order</tt> is not false, a trie * of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes * will be built. Otherwise, keys will be located using a binary search of * the partition keyset using the {@link org.apache.hadoop.io.RawComparator} * defined for this job. The input file must be sorted with the same * comparator and contain {@link Job#getNumReduceTasks()} - 1 keys. *//*from w w w .j a v a 2 s . co m*/ @SuppressWarnings("unchecked") // keytype from conf not static public void setConf(Configuration conf) { try { this.conf = conf; String parts = getPartitionFile(conf); final Path partFile = new Path(parts); final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? FileSystem.getLocal(conf) // assume in DistributedCache : partFile.getFileSystem(conf); Job job = new Job(conf); Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass(); K[] splitPoints = readPartitions(fs, partFile, keyClass, conf); if (splitPoints.length != job.getNumReduceTasks() - 1) { System.out.println(job.getNumReduceTasks()); System.out.println(splitPoints.length); throw new IOException("Wrong number of partitions in keyset:" + splitPoints.length); } RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator(); for (int i = 0; i < splitPoints.length - 1; ++i) { if (comparator.compare(splitPoints[i], splitPoints[i + 1]) >= 0) { throw new IOException("Split points are out of order"); } } boolean natOrder = conf.getBoolean(NATURAL_ORDER, true); if (natOrder && BinaryComparable.class.isAssignableFrom(keyClass)) { partitions = buildTrie((BinaryComparable[]) splitPoints, 0, splitPoints.length, new byte[0], // Now that blocks of identical splitless trie nodes are // represented reentrantly, and we develop a leaf for any trie // node with only one split point, the only reason for a depth // limit is to refute stack overflow or bloat in the pathological // case where the split points are long and mostly look like bytes // iii...iixii...iii . Therefore, we make the default depth // limit large but not huge. conf.getInt(MAX_TRIE_DEPTH, 200)); } else { partitions = new BinarySearchNode(splitPoints, comparator); } } catch (IOException e) { throw new IllegalArgumentException("Can't read partitions file", e); } }