Example usage for org.apache.hadoop.mapreduce Job getNumReduceTasks

List of usage examples for org.apache.hadoop.mapreduce Job getNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getNumReduceTasks.

Prototype

public int getNumReduceTasks() 

Source Link

Document

Get configured the number of reduce tasks for this job.

Usage

From source file:com.pinterest.terrapin.hadoop.HadoopJobLoaderTest.java

License:Apache License

@Test
public void testUpload() throws Exception {
    ZooKeeperManager zkManager = mock(ZooKeeperManager.class);
    Job job = mock(Job.class);
    Configuration conf = new Configuration();
    TerrapinUploaderOptions options = new TerrapinUploaderOptions();
    options.terrapinFileSet = FILE_SET;//from   www  . j  a va  2 s.c om

    when(zkManager.getClusterInfo()).thenReturn(new ClusterInfo(NAME_NODE, REPLICA_FACTOR));
    doNothing().when(zkManager).lockFileSet(anyString(), any(FileSetInfo.class));
    doNothing().when(zkManager).unlockFileSet(anyString());
    when(job.waitForCompletion(anyBoolean())).then(new Answer<Object>() {
        @Override
        public Object answer(InvocationOnMock invocationOnMock) throws Throwable {
            Thread.sleep(1000);
            return true;
        }
    });
    when(job.getNumReduceTasks()).thenReturn(NUM_PART);
    when(job.getConfiguration()).thenReturn(conf);
    doNothing().when(job).setOutputFormatClass(Matchers.<Class<? extends OutputFormat>>any());
    doNothing().when(job).setOutputKeyClass(Matchers.<Class<?>>any());
    doNothing().when(job).setOutputValueClass(Matchers.<Class<?>>any());

    TestHadoopJobLoader uploader = new TestHadoopJobLoader(zkManager, options, job);
    assertTrue(uploader.waitForCompletion());

    assertEquals(Constants.MAPRED_MAP_MAX_ATTEMPTS, Integer.parseInt(conf.get("mapred.map.max.attempts")));
    assertEquals(Constants.CHECKSUM_BYTES, Integer.parseInt(conf.get("io.bytes.per.checksum")));
    assertEquals(Constants.DEFAULT_MAX_SHARD_SIZE_BYTES, Long.parseLong(conf.get("dfs.block.size")));
    assertEquals(REPLICA_FACTOR, Integer.parseInt(conf.get("dfs.replication")));

    verify(zkManager).getClusterInfo();
    verify(zkManager).lockFileSet(eq(FILE_SET), any(FileSetInfo.class));
    verify(zkManager).unlockFileSet(eq(FILE_SET));
    verify(job).waitForCompletion(eq(true));
    verify(job).setOutputFormatClass(eq(HFileOutputFormat.class));
    verify(job).setOutputValueClass(eq(BytesWritable.class));
    verify(job).setOutputKeyClass(eq(BytesWritable.class));
}

From source file:com.savy3.nonequijoin.MapOutputSampler.java

License:Apache License

/**
 * Write a partition file for the given job, using the Sampler provided.
 * Queries the sampler for a sample keyset, sorts by the output key
 * comparator, selects the keys for each rank, and writes to the destination
 * returned from {@link TotalOrderPartitioner#getPartitionFile}.
 *///from   w w  w. j a  v  a 2s. com

@SuppressWarnings("unchecked")
// getInputFormat, getOutputKeyComparator
public static <K, V> void writePartitionFile(Job job, Sampler<K, V> sampler)
        throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = job.getConfiguration();
    final InputFormat inf = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
    int numPartitions = job.getNumReduceTasks();
    HashMap<K, V> samples = (HashMap<K, V>) sampler.getSample(inf, job);
    LOG.info("Using " + samples.size() + " samples");

    // write the input samples in to file <partitionfile>/mapIn
    Path dstOut = new Path(TotalOrderPartitioner.getPartitionFile(conf));

    Path dst = new Path(dstOut, "mapIn");
    FileSystem fs = dst.getFileSystem(conf);
    SequenceFile.Writer sampleWriter = null;
    for (Map.Entry<K, V> sample : samples.entrySet()) {
        sampleWriter = SequenceFile.createWriter(fs, conf, dst, sample.getKey().getClass(),
                sample.getValue().getClass());
        break;
    }
    for (Map.Entry<K, V> sample : samples.entrySet()) {
        sampleWriter.append(sample.getKey(), sample.getValue());
    }
    sampleWriter.close();
    LOG.info("Sample Input File location " + dst.toString());
    // run map reduce on the samples input
    runMap(job, dst);
}

From source file:com.savy3.nonequijoin.MapOutputSampler.java

License:Apache License

/**
 * Driver for InputSampler MapReduce Job
 *//*from   w ww  .  j a v a2 s . c o  m*/
public static void runMap(Job job, Path sampleInputPath)
        throws IOException, IllegalStateException, ClassNotFoundException, InterruptedException {
    LOG.info("Running a MapReduce Job on Sample Input File" + sampleInputPath.toString());

    Configuration conf = new Configuration();
    conf.setBoolean("mapreduce.job.ubertask.enable", true);
    conf.set("numSamples", "" + (job.getNumReduceTasks() - 1));
    Job sampleJob = new Job(conf);
    sampleJob.setMapperClass(job.getMapperClass());
    sampleJob.setReducerClass(SampleKeyReducer.class);
    sampleJob.setJarByClass(job.getMapperClass());
    sampleJob.setMapOutputKeyClass(job.getMapOutputKeyClass());
    sampleJob.setMapOutputValueClass(job.getMapOutputValueClass());
    sampleJob.setOutputKeyClass(job.getMapOutputKeyClass());
    sampleJob.setOutputValueClass(NullWritable.class);
    sampleJob.setInputFormatClass(SequenceFileInputFormat.class);
    sampleJob.setOutputFormatClass(SequenceFileOutputFormat.class);

    SequenceFileInputFormat.addInputPath(sampleJob, sampleInputPath);
    FileSystem fs = FileSystem.get(conf);

    Path out = new Path(sampleInputPath.getParent(), "mapOut");
    fs.delete(out, true);

    SequenceFileOutputFormat.setOutputPath(sampleJob, out);

    sampleJob.waitForCompletion(true);

    LOG.info("Sample MapReduce Job Output File" + out.toString());

    Path partFile = new Path(out, "part-r-00000");
    Path tmpFile = new Path("/_tmp");
    fs.delete(tmpFile, true);
    fs.rename(partFile, tmpFile);
    fs.delete(sampleInputPath.getParent(), true);
    fs.rename(new Path("/_tmp"), sampleInputPath.getParent());

    LOG.info("Sample partitioning file cpied to location " + sampleInputPath.getParent().toString());
}

From source file:com.savy3.nonequijoin.MapOutputSampler.java

License:Apache License

/**
 * Driver for InputSampler from the command line. Configures a JobConf
 * instance and calls {@link #writePartitionFile}.
 *///from  w  w  w.  j  av a 2  s  . c om
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    ArrayList<String> otherArgs = new ArrayList<String>();
    Sampler<K, V> sampler = null;

    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-r".equals(args[i])) {
                job.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-inFormat".equals(args[i])) {
                job.setInputFormatClass(Class.forName(args[++i]).asSubclass(InputFormat.class));
            } else if ("-keyClass".equals(args[i])) {
                job.setMapOutputKeyClass(Class.forName(args[++i]).asSubclass(WritableComparable.class));
            } else if ("-splitSample".equals(args[i])) {
                int numSamples = Integer.parseInt(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new SplitSampler<K, V>(numSamples, maxSplits);
            } else if ("-splitRandom".equals(args[i])) {
                System.out.println("Random sampling");
                double pcnt = Double.parseDouble(args[++i]);
                int numSamples = Integer.parseInt(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new RandomSampler<K, V>(pcnt, numSamples, maxSplits);
            } else if ("-splitInterval".equals(args[i])) {
                double pcnt = Double.parseDouble(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new IntervalSampler<K, V>(pcnt, maxSplits);
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    if (job.getNumReduceTasks() <= 1) {
        System.err.println("Sampler requires more than one reducer");
        return printUsage();
    }
    if (otherArgs.size() < 2) {
        System.out.println("ERROR: Wrong number of parameters: ");
        return printUsage();
    }
    if (null == sampler) {
        sampler = new RandomSampler<K, V>(0.1, 10000, 10);
    }
    System.out.println("before paths");
    Path outf = new Path(otherArgs.remove(otherArgs.size() - 1));
    TotalOrderPartitioner.setPartitionFile(getConf(), outf);
    for (String s : otherArgs) {
        FileInputFormat.addInputPath(job, new Path(s));
    }
    MapOutputSampler.<K, V>writePartitionFile(job, sampler);

    return 0;
}

From source file:com.splicemachine.mrio.api.SpliceTableMapReduceUtil.java

License:Apache License

/**
 * Use this before submitting a TableReduce job. It will
 * appropriately set up the JobConf./*from  w w w  .j  a  v a 2 s  .co  m*/
 *
 * @param table  The output Splice table name, The format should be Schema.tableName.
 * @param reducer  The reducer class to use.
 * @param job  The current job to adjust.  Make sure the passed job is
 * carrying all necessary configuration.
 * @param partitioner  Partitioner to use. Pass <code>null</code> to use
 * default partitioner.
 * @param quorumAddress Distant cluster to write to; default is null for
 * output to the cluster that is designated in <code>hbase-site.xml</code>.
 * Set this String to the zookeeper ensemble of an alternate remote cluster
 * when you would have the reduce write a cluster that is other than the
 * default; e.g. copying tables between clusters, the source would be
 * designated by <code>hbase-site.xml</code> and this param would have the
 * ensemble address of the remote cluster.  The format to pass is particular.
 * Pass <code> &lt;hbase.zookeeper.quorum>:&lt;hbase.zookeeper.client.port>:&lt;zookeeper.znode.parent>
 * </code> such as <code>server,server2,server3:2181:/hbase</code>.
 * @param serverClass redefined hbase.regionserver.class
 * @param serverImpl redefined hbase.regionserver.client
 * @param addDependencyJars upload HBase jars and jars for any of the configured
 *           job classes via the distributed cache (tmpjars).
 * @throws IOException When determining the region count fails.
 * @throws SQLException
 */
public static void initTableReducerJob(String table, Class<? extends Reducer> reducer, Job job,
        Class partitioner, String quorumAddress, String serverClass, String serverImpl,
        boolean addDependencyJars, Class<? extends OutputFormat> outputformatClass) throws IOException {

    Configuration conf = job.getConfiguration();
    job.setOutputFormatClass(outputformatClass);
    if (reducer != null)
        job.setReducerClass(reducer);
    conf.set(MRConstants.SPLICE_OUTPUT_TABLE_NAME, table);
    if (sqlUtil == null)
        sqlUtil = SMSQLUtil.getInstance(conf.get(MRConstants.SPLICE_JDBC_STR));
    // If passed a quorum/ensemble address, pass it on to TableOutputFormat.
    String hbaseTableID = null;
    try {
        hbaseTableID = sqlUtil.getConglomID(table);
    } catch (SQLException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        throw new IOException(e);
    }
    conf.set(MRConstants.HBASE_OUTPUT_TABLE_NAME, table);

    if (quorumAddress != null) {
        // Calling this will validate the format
        HBasePlatformUtils.validateClusterKey(quorumAddress);
        conf.set(TableOutputFormat.QUORUM_ADDRESS, quorumAddress);
    }
    if (serverClass != null && serverImpl != null) {
        conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass);
        conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);

    }
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(Object.class);
    if (partitioner == HRegionPartitioner.class) {
        job.setPartitionerClass(HRegionPartitioner.class);
        // TODO Where are the keys?
        int regions = getReduceNumberOfRegions(hbaseTableID);
        if (job.getNumReduceTasks() > regions) {
            job.setNumReduceTasks(regions);
        }
    } else if (partitioner != null) {
        job.setPartitionerClass(partitioner);
    }

    if (addDependencyJars) {
        addDependencyJars(job);
    }

    //initCredentials(job);
}

From source file:com.splicemachine.mrio.api.SpliceTableMapReduceUtil.java

License:Apache License

/**
 * Ensures that the given number of reduce tasks for the given job
 * configuration does not exceed the number of regions for the given table.
 *
 * @param table  The Splice table to get the region count for.
 * @param job  The current job to adjust.
 * @throws IOException When retrieving the table details fails.
 * @throws SQLException When Splice retrieving conglom ID fails.
 *///  ww w  .j a v a2 s.  c o m
public static void limitNumReduceTasks(String table, Job job) throws IOException, SQLException {
    Configuration conf = job.getConfiguration();
    if (sqlUtil == null)
        sqlUtil = SMSQLUtil.getInstance(conf.get(MRConstants.SPLICE_JDBC_STR));
    // If passed a quorum/ensemble address, pass it on to TableOutputFormat.
    String hbaseTableID = sqlUtil.getConglomID(table);
    int regions = getReduceNumberOfRegions(hbaseTableID);
    if (job.getNumReduceTasks() > regions)
        job.setNumReduceTasks(regions);
}

From source file:com.xiaomi.linden.hadoop.indexing.job.LindenJob.java

License:Apache License

@Override
public int run(String[] strings) throws Exception {
    Configuration conf = getConf();
    String dir = conf.get(LindenJobConfig.INPUT_DIR, null);
    logger.info("input dir:" + dir);
    Path inputPath = new Path(StringUtils.unEscapeString(dir));
    Path outputPath = new Path(conf.get(LindenJobConfig.OUTPUT_DIR));
    String indexPath = conf.get(LindenJobConfig.INDEX_PATH);

    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(outputPath)) {
        fs.delete(outputPath, true);//from w  w  w. ja va 2  s  .  co  m
    }
    if (fs.exists(new Path(indexPath))) {
        fs.delete(new Path(indexPath), true);
    }

    int numShards = conf.getInt(LindenJobConfig.NUM_SHARDS, 1);
    Shard[] shards = createShards(indexPath, numShards);

    Shard.setIndexShards(conf, shards);

    //empty trash;
    (new Trash(conf)).expunge();

    Job job = Job.getInstance(conf, "linden-hadoop-indexing");
    job.setJarByClass(LindenJob.class);
    job.setMapperClass(LindenMapper.class);
    job.setCombinerClass(LindenCombiner.class);
    job.setReducerClass(LindenReducer.class);
    job.setMapOutputKeyClass(Shard.class);
    job.setMapOutputValueClass(IntermediateForm.class);
    job.setOutputKeyClass(Shard.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(IndexUpdateOutputFormat.class);
    job.setReduceSpeculativeExecution(false);
    job.setNumReduceTasks(numShards);

    String lindenSchemaFile = conf.get(LindenJobConfig.SCHEMA_FILE_URL);
    if (lindenSchemaFile == null) {
        throw new IOException("no schema file is found");
    }
    logger.info("Adding schema file: " + lindenSchemaFile);
    job.addCacheFile(new URI(lindenSchemaFile + "#lindenSchema"));
    String lindenPropertiesFile = conf.get(LindenJobConfig.LINDEN_PROPERTIES_FILE_URL);
    if (lindenPropertiesFile == null) {
        throw new IOException("no linden properties file is found");
    }
    logger.info("Adding linden properties file: " + lindenPropertiesFile);
    job.addCacheFile(new URI(lindenPropertiesFile + "#lindenProperties"));

    FileInputFormat.setInputPaths(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    Path[] inputs = FileInputFormat.getInputPaths(job);
    StringBuilder buffer = new StringBuilder(inputs[0].toString());
    for (int i = 1; i < inputs.length; i++) {
        buffer.append(",");
        buffer.append(inputs[i].toString());
    }
    logger.info("mapreduce.input.dir = " + buffer.toString());
    logger.info("mapreduce.output.dir = " + FileOutputFormat.getOutputPath(job).toString());
    logger.info("mapreduce.job.num.reduce.tasks = " + job.getNumReduceTasks());
    logger.info(shards.length + " shards = " + conf.get(LindenJobConfig.INDEX_SHARDS));
    logger.info("mapreduce.input.format.class = " + job.getInputFormatClass());
    logger.info("mapreduce.output.format.class = " + job.getOutputFormatClass());
    logger.info("mapreduce.cluster.temp.dir = " + conf.get(MRJobConfig.TEMP_DIR));

    job.waitForCompletion(true);
    if (!job.isSuccessful()) {
        throw new RuntimeException("Job failed");
    }
    return 0;
}

From source file:com.yunrang.hadoop.app.utils.CustomizedUtil.java

License:Apache License

/**
 * Ensures that the given number of reduce tasks for the given job
 * configuration does not exceed the number of regions for the given table.
 * /*  ww w.j av a  2 s .co m*/
 * @param table
 *            The table to get the region count for.
 * @param job
 *            The current job to adjust.
 * @throws IOException
 *             When retrieving the table details fails.
 */
public static void limitNumReduceTasks(String table, Job job) throws IOException {
    HTable outputTable = new HTable(job.getConfiguration(), table);
    int regions = outputTable.getRegionsInfo().size();
    if (job.getNumReduceTasks() > regions)
        job.setNumReduceTasks(regions);
}

From source file:distributed.hadoop.MapReduceJobConfig.java

License:Open Source License

/**
 * Apply the settings encapsulated in this config and return a Job object
 * ready for execution./*  w w  w  . j a v  a2s  .  c o  m*/
 * 
 * @param jobName the name of the job
 * @param conf the Configuration object that will be wrapped in the Job
 * @param env environment variables
 * @return a configured Job object
 * @throws IOException if a problem occurs
 * @throws ClassNotFoundException if various classes are not found
 */
public Job configureForHadoop(String jobName, Configuration conf, Environment env)
        throws IOException, ClassNotFoundException {

    String jobTrackerPort = getJobTrackerPort();
    if (DistributedJobConfig.isEmpty(jobTrackerPort)) {
        jobTrackerPort = AbstractHadoopJobConfig.isHadoop2() ? AbstractHadoopJobConfig.DEFAULT_PORT_YARN
                : AbstractHadoopJobConfig.DEFAULT_PORT;
    }
    String jobTracker = getJobTrackerHost() + ":" + jobTrackerPort;
    if (DistributedJobConfig.isEmpty(jobTracker)) {
        System.err.println("No " + (AbstractHadoopJobConfig.isHadoop2() ? "resource manager " : "JobTracker ")
                + "set - running locally...");
    } else {
        jobTracker = environmentSubstitute(jobTracker, env);
        if (AbstractHadoopJobConfig.isHadoop2()) {
            conf.set(YARN_RESOURCE_MANAGER_ADDRESS, jobTracker);
            conf.set(YARN_RESOURCE_MANAGER_SCHEDULER_ADDRESS,
                    environmentSubstitute(getJobTrackerHost(), env) + ":8030");
        } else {
            conf.set(HADOOP_JOB_TRACKER_HOST, jobTracker);
        }
    }
    System.err.println("Using " + (AbstractHadoopJobConfig.isHadoop2() ? "resource manager: " : "jobtracker: ")
            + jobTracker);

    if (AbstractHadoopJobConfig.isHadoop2()) {
        // a few other properties needed to run against Yarn
        conf.set("yarn.nodemanager.aux-services", "mapreduce_shuffle");
        conf.set("mapreduce.framework.name", "yarn");
    }

    if (!DistributedJobConfig.isEmpty(getMapredMaxSplitSize())) {
        conf.set(AbstractHadoopJobConfig.isHadoop2() ? HADOOP2_MAPRED_MAX_SPLIT_SIZE
                : HADOOP_MAPRED_MAX_SPLIT_SIZE, getMapredMaxSplitSize());
    }

    // Do any user supplied properties here before creating the Job
    for (Map.Entry<String, String> e : m_additionalUserSuppliedProperties.entrySet()) {
        conf.set(e.getKey(), e.getValue());
    }

    m_hdfsConfig.configureForHadoop(conf, env);
    Job job = new Job(conf, jobName);

    String numMappers = getNumberOfMaps();
    if (!DistributedJobConfig.isEmpty(numMappers)) {
        numMappers = environmentSubstitute(numMappers, env);
        ((JobConf) job.getConfiguration()).setNumMapTasks(Integer.parseInt(numMappers));
    }

    // The number of map tasks that will be run simultaneously by a task tracker
    String maxConcurrentMapTasks = getTaskTrackerMapTasksMaximum();
    if (!DistributedJobConfig.isEmpty(maxConcurrentMapTasks)) {
        ((JobConf) job.getConfiguration()).set("mapred.tasktracker.map.tasks.maximum", maxConcurrentMapTasks);
    }

    String numReducers = getNumberOfReducers();
    if (!DistributedJobConfig.isEmpty(numReducers)) {
        numReducers = environmentSubstitute(numReducers, env);
        job.setNumReduceTasks(Integer.parseInt(numReducers));

        if (Integer.parseInt(numReducers) == 0) {
            System.err.println("Warning - no reducer class set. Configuring for a map only job");
        }
    } else {
        job.setNumReduceTasks(1);
    }
    String mapperClass = getMapperClass();
    if (DistributedJobConfig.isEmpty(mapperClass)) {
        throw new IOException("No mapper class specified!");
    }
    mapperClass = environmentSubstitute(mapperClass, env);

    @SuppressWarnings("unchecked")
    Class<? extends Mapper> mc = (Class<? extends Mapper>) Class.forName(mapperClass);

    job.setMapperClass(mc);

    String reducerClass = getReducerClass();
    if (DistributedJobConfig.isEmpty(reducerClass) && Integer.parseInt(numReducers) > 0) {
        throw new IOException("No reducer class specified!");
    } else if (job.getNumReduceTasks() > 0) {
        reducerClass = environmentSubstitute(reducerClass, env);

        @SuppressWarnings("unchecked")
        Class<? extends Reducer> rc = (Class<? extends Reducer>) Class.forName(reducerClass);

        job.setReducerClass(rc);
    }

    String combinerClass = getCombinerClass();
    if (!DistributedJobConfig.isEmpty(combinerClass)) {
        combinerClass = environmentSubstitute(combinerClass, env);

        @SuppressWarnings("unchecked")
        Class<? extends Reducer> cc = (Class<? extends Reducer>) Class.forName(combinerClass);

        job.setCombinerClass(cc);
    }

    String inputFormatClass = getInputFormatClass();
    if (DistributedJobConfig.isEmpty(inputFormatClass)) {
        throw new IOException("No input format class specified");
    }
    inputFormatClass = environmentSubstitute(inputFormatClass, env);

    @SuppressWarnings("unchecked")
    Class<? extends InputFormat> ifc = (Class<? extends InputFormat>) Class.forName(inputFormatClass);

    job.setInputFormatClass(ifc);

    String outputFormatClass = getOutputFormatClass();
    if (DistributedJobConfig.isEmpty(outputFormatClass)) {
        throw new IOException("No output format class specified");
    }
    outputFormatClass = environmentSubstitute(outputFormatClass, env);

    @SuppressWarnings("unchecked")
    Class<? extends OutputFormat> ofc = (Class<? extends OutputFormat>) Class.forName(outputFormatClass);
    job.setOutputFormatClass(ofc);

    String mapOutputKeyClass = getMapOutputKeyClass();
    if (DistributedJobConfig.isEmpty(mapOutputKeyClass)) {
        throw new IOException("No map output key class defined");
    }
    mapOutputKeyClass = environmentSubstitute(mapOutputKeyClass, env);
    Class mokc = Class.forName(mapOutputKeyClass);
    job.setMapOutputKeyClass(mokc);

    String mapOutputValueClass = getMapOutputValueClass();
    if (DistributedJobConfig.isEmpty(mapOutputValueClass)) {
        throw new IOException("No map output value class defined");
    }
    mapOutputValueClass = environmentSubstitute(mapOutputValueClass, env);
    Class movc = Class.forName(mapOutputValueClass);
    job.setMapOutputValueClass(movc);

    String outputKeyClass = getOutputKeyClass();
    if (DistributedJobConfig.isEmpty(outputKeyClass)) {
        throw new IOException("No output key class defined");
    }
    outputKeyClass = environmentSubstitute(outputKeyClass, env);
    Class okc = Class.forName(outputKeyClass);
    job.setOutputKeyClass(okc);

    String outputValueClass = getOutputValueClass();
    if (DistributedJobConfig.isEmpty(outputValueClass)) {
        throw new IOException("No output value class defined");
    }
    outputValueClass = environmentSubstitute(outputValueClass, env);
    Class ovc = Class.forName(outputValueClass);
    job.setOutputValueClass(ovc);

    String inputPaths = getInputPaths();
    // don't complain if there aren't any as inputs such as HBASE
    // require other properties to be set
    if (!DistributedJobConfig.isEmpty(inputPaths)) {
        inputPaths = environmentSubstitute(inputPaths, env);
        FileInputFormat.setInputPaths(job, inputPaths);
    }

    String outputPath = getOutputPath();
    if (DistributedJobConfig.isEmpty(outputPath)) {
        throw new IOException("No output path specified");
    }
    outputPath = environmentSubstitute(outputPath, env);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job;
}

From source file:eu.dnetlib.iis.core.javamapreduce.hack.AvroMultipleOutputs.java

License:Apache License

/** Hacked method */
private void setSchema(Job job, Schema keySchema, Schema valSchema) {

    boolean isMaponly = job.getNumReduceTasks() == 0;
    if (keySchema != null) {
        if (isMaponly) {
            AvroJob.setMapOutputKeySchema(job, keySchema);
        }//from   www.jav a2s . c  om
        AvroJob.setOutputKeySchema(job, keySchema);
    }
    if (valSchema != null) {
        if (isMaponly) {
            AvroJob.setMapOutputValueSchema(job, valSchema);
        }
        AvroJob.setOutputValueSchema(job, valSchema);
    }

}