List of usage examples for org.apache.hadoop.mapreduce Job getNumReduceTasks
public int getNumReduceTasks()
From source file:com.linkedin.thirdeye.hadoop.aggregation.AggregationPhaseJob.java
License:Apache License
public Job run() throws Exception { Job job = Job.getInstance(getConf()); job.setJobName(name);/*from w w w . ja v a 2 s . co m*/ job.setJarByClass(AggregationPhaseJob.class); FileSystem fs = FileSystem.get(getConf()); Configuration configuration = job.getConfiguration(); // Properties LOGGER.info("Properties {}", props); // Input Path String inputPathDir = getAndSetConfiguration(configuration, AGG_PHASE_INPUT_PATH); LOGGER.info("Input path dir: " + inputPathDir); for (String inputPath : inputPathDir.split(ThirdEyeConstants.FIELD_SEPARATOR)) { LOGGER.info("Adding input:" + inputPath); Path input = new Path(inputPath); FileInputFormat.addInputPath(job, input); } // Output path Path outputPath = new Path(getAndSetConfiguration(configuration, AGG_PHASE_OUTPUT_PATH)); LOGGER.info("Output path dir: " + outputPath.toString()); if (fs.exists(outputPath)) { fs.delete(outputPath, true); } FileOutputFormat.setOutputPath(job, outputPath); // Schema Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir); LOGGER.info("Schema : {}", avroSchema.toString(true)); job.getConfiguration().set(AGG_PHASE_AVRO_SCHEMA.toString(), avroSchema.toString()); // ThirdEyeConfig String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty( props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()), props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema); props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty); ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props); LOGGER.info("Thirdeye Config {}", thirdeyeConfig.encode()); job.getConfiguration().set(AGG_PHASE_THIRDEYE_CONFIG.toString(), OBJECT_MAPPER.writeValueAsString(thirdeyeConfig)); // Map config job.setMapperClass(AggregationMapper.class); job.setInputFormatClass(AvroKeyInputFormat.class); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(BytesWritable.class); // Reduce config job.setReducerClass(AggregationReducer.class); job.setOutputKeyClass(AvroKey.class); job.setOutputValueClass(NullWritable.class); AvroJob.setOutputKeySchema(job, avroSchema); job.setOutputFormatClass(AvroKeyOutputFormat.class); String numReducers = props.getProperty(ThirdEyeJobProperties.THIRDEYE_NUM_REDUCERS.getName()); LOGGER.info("Num Reducers : {}", numReducers); if (StringUtils.isNotBlank(numReducers)) { job.setNumReduceTasks(Integer.valueOf(numReducers)); LOGGER.info("Setting num reducers {}", job.getNumReduceTasks()); } job.waitForCompletion(true); Counter counter = job.getCounters().findCounter(AggregationCounter.NUMBER_OF_RECORDS); LOGGER.info(counter.getDisplayName() + " : " + counter.getValue()); if (counter.getValue() == 0) { throw new IllegalStateException("No input records in " + inputPathDir); } counter = job.getCounters().findCounter(AggregationCounter.NUMBER_OF_RECORDS_FLATTENED); LOGGER.info(counter.getDisplayName() + " : " + counter.getValue()); for (String metric : thirdeyeConfig.getMetricNames()) { counter = job.getCounters().findCounter(thirdeyeConfig.getCollection(), metric); LOGGER.info(counter.getDisplayName() + " : " + counter.getValue()); } return job; }
From source file:com.linkedin.whiteelephant.mapreduce.MyAvroMultipleOutputs.java
License:Apache License
private TaskAttemptContext getContext(String nameOutput) throws IOException { TaskAttemptContext taskContext = taskContexts.get(nameOutput); if (taskContext != null) { return taskContext; }//from w w w . jav a 2 s. com // The following trick leverages the instantiation of a record writer via // the job thus supporting arbitrary output formats. context.getConfiguration().set("avro.mo.config.namedOutput", nameOutput); Job job = new Job(context.getConfiguration()); job.setOutputFormatClass(getNamedOutputFormatClass(context, nameOutput)); Schema keySchema = keySchemas.get(nameOutput + "_KEYSCHEMA"); Schema valSchema = valSchemas.get(nameOutput + "_VALSCHEMA"); boolean isMaponly = job.getNumReduceTasks() == 0; if (keySchema != null) { if (isMaponly) AvroJob.setMapOutputKeySchema(job, keySchema); else AvroJob.setOutputKeySchema(job, keySchema); } if (valSchema != null) { if (isMaponly) AvroJob.setMapOutputValueSchema(job, valSchema); else AvroJob.setOutputValueSchema(job, valSchema); } taskContext = new TaskAttemptContext(job.getConfiguration(), context.getTaskAttemptID()); taskContexts.put(nameOutput, taskContext); return taskContext; }
From source file:com.moz.fiji.mapreduce.output.TestFileMapReduceJobOutput.java
License:Apache License
@Test public void testConfigure() throws ClassNotFoundException, IOException { final Path filePath = new Path("foo/bar"); final int numSplits = 42; final Class<? extends OutputFormat> outputFormatClass = TextOutputFormat.class; FileMapReduceJobOutput jobOutput = new ConcreteFileMapReduceJobOutput(filePath, numSplits, outputFormatClass);//from w w w . ja va 2 s . c o m Job job = new Job(); jobOutput.configure(job); // The output format class should be set in the job configuration. assertEquals(outputFormatClass, job.getOutputFormatClass()); // The file output path should be set in the job configuration. assert (FileOutputFormat.getOutputPath(job).toString().endsWith(filePath.toString())); // The number of reduce tasks should be set to the number of splits. assertEquals(numSplits, job.getNumReduceTasks()); }
From source file:com.moz.fiji.mapreduce.TestFijiBulkImportJobBuilder.java
License:Apache License
@Test public void testBuildWithHFileOutput() throws Exception { final FijiMapReduceJob mrjob = FijiBulkImportJobBuilder.create().withConf(getConf()) .withInput(MapReduceJobInputs.newTextMapReduceJobInput(new Path(mTempPath, "input"))) .withBulkImporter(NoopBulkImporter.class).withOutput(MapReduceJobOutputs .newHFileMapReduceJobOutput(mTable.getURI(), new Path(mTempPath, "output"), 10)) .build();//from w w w . java 2 s. c o m final Job job = mrjob.getHadoopJob(); assertEquals(TextInputFormat.class, job.getInputFormatClass()); assertEquals(BulkImportMapper.class, job.getMapperClass()); assertEquals(NoopBulkImporter.class, job.getConfiguration().getClass(FijiConfKeys.FIJI_BULK_IMPORTER_CLASS, null)); assertEquals(IdentityReducer.class, job.getReducerClass()); assertEquals(10, job.getNumReduceTasks()); assertEquals(FijiHFileOutputFormat.class, job.getOutputFormatClass()); assertEquals(TotalOrderPartitioner.class, job.getPartitionerClass()); }
From source file:com.moz.fiji.mapreduce.TestFijiBulkImportJobBuilder.java
License:Apache License
@Test public void testBuildWithKeyValueStore() throws Exception { final FijiMapReduceJob mrjob = FijiBulkImportJobBuilder.create().withConf(getConf()) .withInput(MapReduceJobInputs.newTextMapReduceJobInput(new Path(mTempPath, "input"))) .withBulkImporter(KVStoreBulkImporter.class).withOutput(MapReduceJobOutputs .newHFileMapReduceJobOutput(mTable.getURI(), new Path(mTempPath, "output"), 10)) .build();/*w w w . j a va2 s. c o m*/ final Job job = mrjob.getHadoopJob(); // Verify that everything else is what we expected as in the previous test // (except the bulk importer class name)... assertEquals(TextInputFormat.class, job.getInputFormatClass()); assertEquals(BulkImportMapper.class, job.getMapperClass()); assertEquals(KVStoreBulkImporter.class, job.getConfiguration().getClass(FijiConfKeys.FIJI_BULK_IMPORTER_CLASS, null)); assertEquals(IdentityReducer.class, job.getReducerClass()); assertEquals(10, job.getNumReduceTasks()); assertEquals(FijiHFileOutputFormat.class, job.getOutputFormatClass()); assertEquals(TotalOrderPartitioner.class, job.getPartitionerClass()); // KeyValueStore-specific checks here. final Configuration confOut = job.getConfiguration(); assertEquals(1, confOut.getInt(KeyValueStoreConfigSerializer.CONF_KEY_VALUE_STORE_COUNT, 0)); assertEquals(EmptyKeyValueStore.class.getName(), confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0." + KeyValueStoreConfigSerializer.CONF_CLASS)); assertEquals("foostore", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0." + KeyValueStoreConfigSerializer.CONF_NAME)); }
From source file:com.moz.fiji.mapreduce.TestFijiGatherJobBuilder.java
License:Apache License
@Test public void testGatherToHFile() throws Exception { final FijiMapReduceJob gatherJob = FijiGatherJobBuilder.create().withConf(getConf()) .withInputTable(mTable.getURI()).withGatherer(GatherToHFile.class).withOutput(MapReduceJobOutputs .newHFileMapReduceJobOutput(mTable.getURI(), getLocalTestPath("hfile"), 10)) .build();/*from w w w.j a va 2 s . c om*/ final Job job = gatherJob.getHadoopJob(); final Configuration conf = job.getConfiguration(); assertEquals(GatherToHFile.class.getName(), conf.get(FijiConfKeys.FIJI_GATHERER_CLASS)); assertEquals(null, job.getCombinerClass()); assertEquals(IdentityReducer.class, job.getReducerClass()); assertEquals(10, job.getNumReduceTasks()); assertEquals(FijiHFileOutputFormat.class, job.getOutputFormatClass()); assertEquals(HFileKeyValue.class, job.getOutputKeyClass()); assertEquals(NullWritable.class, job.getOutputValueClass()); }
From source file:com.moz.fiji.mapreduce.TestFijiGatherJobBuilder.java
License:Apache License
@Test public void testGatherReducerToHFile() throws Exception { final FijiMapReduceJob gatherJob = FijiGatherJobBuilder.create().withConf(getConf()) .withInputTable(mTable.getURI()).withGatherer(SimpleGatherer.class) .withReducer(ReducerToHFile.class).withOutput(MapReduceJobOutputs .newHFileMapReduceJobOutput(mTable.getURI(), getLocalTestPath("hfile"), 10)) .build();/*from www . j a v a 2 s. c o m*/ final Job job = gatherJob.getHadoopJob(); final Configuration conf = job.getConfiguration(); assertEquals(SimpleGatherer.class.getName(), conf.get(FijiConfKeys.FIJI_GATHERER_CLASS)); assertEquals(null, job.getCombinerClass()); assertEquals(ReducerToHFile.class, job.getReducerClass()); assertEquals(10, job.getNumReduceTasks()); assertEquals(SequenceFileOutputFormat.class, job.getOutputFormatClass()); assertEquals(HFileKeyValue.class, job.getOutputKeyClass()); assertEquals(NullWritable.class, job.getOutputValueClass()); }
From source file:com.moz.fiji.mapreduce.TestFijiMapReduceJobBuilder.java
License:Apache License
@Test public void testBuild() throws Exception { final FijiMapReduceJob job = FijiMapReduceJobBuilder.create().withConf(mConf) .withInput(MapReduceJobInputs.newTextMapReduceJobInput(new Path("/path/to/my/input"))) .withMapper(MyMapper.class).withReducer(MyReducer.class) .withOutput(MapReduceJobOutputs.newTextMapReduceJobOutput(new Path("/path/to/my/output"), 16)) .build();/* w w w . j a va 2s . c o m*/ final Job hadoopJob = job.getHadoopJob(); assertEquals(TextInputFormat.class, hadoopJob.getInputFormatClass()); assertEquals(MyMapper.class, hadoopJob.getMapperClass()); assertEquals(MyReducer.class, hadoopJob.getReducerClass()); assertEquals(16, hadoopJob.getNumReduceTasks()); assertEquals(TextOutputFormat.class, hadoopJob.getOutputFormatClass()); // KeyValueStore-specific checks here. Configuration confOut = hadoopJob.getConfiguration(); assertEquals(2, confOut.getInt(KeyValueStoreConfigSerializer.CONF_KEY_VALUE_STORE_COUNT, 0)); assertEquals(EmptyKeyValueStore.class.getName(), confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0." + KeyValueStoreConfigSerializer.CONF_CLASS)); assertEquals("mapperMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0." + KeyValueStoreConfigSerializer.CONF_NAME)); assertEquals(EmptyKeyValueStore.class.getName(), confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1." + KeyValueStoreConfigSerializer.CONF_CLASS)); assertEquals("reducerMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1." + KeyValueStoreConfigSerializer.CONF_NAME)); }
From source file:com.moz.fiji.mapreduce.TestFijiMapReduceJobBuilder.java
License:Apache License
@Test public void testBuildWithXmlKVStores() throws Exception { // Test that we can override default configuration KeyValueStores from an XML file. final InputStream xmlStores = Resources.openSystemResource("com.moz.fiji/mapreduce/test-kvstores.xml"); // This file needs to exist before we build the job, or else // we can't build the job; it's referenced by a key-value store that checks // for its presence. final File tmpFile = new File("/tmp/foo.seq"); if (tmpFile.createNewFile()) { // We created this temp file, we're responsible for deleting it. tmpFile.deleteOnExit();/*from w w w .j ava2s. co m*/ } LOG.info("Building job..."); final FijiMapReduceJob job = FijiMapReduceJobBuilder.create().withConf(mConf) .withInput(MapReduceJobInputs.newTextMapReduceJobInput(new Path("/path/to/my/input"))) .withMapper(MyMapper.class).withReducer(MyReducer.class) .withOutput(MapReduceJobOutputs.newTextMapReduceJobOutput(new Path("/path/to/my/output"), 16)) .withStoreBindings(xmlStores).build(); xmlStores.close(); LOG.info("Verifying job configuration..."); final Job hadoopJob = job.getHadoopJob(); assertEquals(TextInputFormat.class, hadoopJob.getInputFormatClass()); assertEquals(MyMapper.class, hadoopJob.getMapperClass()); assertEquals(MyReducer.class, hadoopJob.getReducerClass()); assertEquals(16, hadoopJob.getNumReduceTasks()); assertEquals(TextOutputFormat.class, hadoopJob.getOutputFormatClass()); // KeyValueStore-specific checks here. // We override mapperMap with a SeqFileKeyValueStore. Configuration confOut = hadoopJob.getConfiguration(); assertEquals(2, confOut.getInt(KeyValueStoreConfigSerializer.CONF_KEY_VALUE_STORE_COUNT, 0)); assertEquals(SeqFileKeyValueStore.class.getName(), confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0." + KeyValueStoreConfigSerializer.CONF_CLASS)); assertEquals("mapperMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0." + KeyValueStoreConfigSerializer.CONF_NAME)); assertEquals(EmptyKeyValueStore.class.getName(), confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1." + KeyValueStoreConfigSerializer.CONF_CLASS)); assertEquals("reducerMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1." + KeyValueStoreConfigSerializer.CONF_NAME)); }
From source file:com.moz.fiji.mapreduce.TestFijiProduceJobBuilder.java
License:Apache License
@Test public void testBuildWithHFileOutput() throws ClassNotFoundException, IOException { final FijiMapReduceJob produceJob = FijiProduceJobBuilder.create().withConf(getConf()) .withInputTable(mTable.getURI()).withProducer(MyProducer.class) .withOutput(//from w w w . j av a 2 s . c o m MapReduceJobOutputs.newHFileMapReduceJobOutput(mTable.getURI(), new Path("foo/bar"), 10)) .build(); // Verify that the MR Job was configured correctly. final Job job = produceJob.getHadoopJob(); assertEquals(HBaseFijiTableInputFormat.class, job.getInputFormatClass()); assertEquals(ProduceMapper.class, job.getMapperClass()); assertEquals(MyProducer.class, job.getConfiguration().getClass(FijiConfKeys.FIJI_PRODUCER_CLASS, null)); assertEquals(10, job.getNumReduceTasks()); assertEquals(FijiHFileOutputFormat.class, job.getOutputFormatClass()); }