com.moz.fiji.mapreduce.produce.FijiProduceJobBuilder.java Source code

Java tutorial

Introduction

Here is the source code for com.moz.fiji.mapreduce.produce.FijiProduceJobBuilder.java

Source

/**
 * (c) Copyright 2012 WibiData, Inc.
 *
 * See the NOTICE file distributed with this work for additional
 * information regarding copyright ownership.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.moz.fiji.mapreduce.produce;

import java.io.IOException;
import java.util.Map;

import com.google.common.base.Preconditions;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.map.FijiMultithreadedMapper;
import org.apache.hadoop.util.ReflectionUtils;

import com.moz.fiji.annotations.ApiAudience;
import com.moz.fiji.annotations.ApiStability;
import com.moz.fiji.mapreduce.JobConfigurationException;
import com.moz.fiji.mapreduce.FijiMapReduceJob;
import com.moz.fiji.mapreduce.FijiMapper;
import com.moz.fiji.mapreduce.FijiReducer;
import com.moz.fiji.mapreduce.MapReduceJobOutput;
import com.moz.fiji.mapreduce.framework.FijiConfKeys;
import com.moz.fiji.mapreduce.framework.FijiTableInputJobBuilder;
import com.moz.fiji.mapreduce.kvstore.KeyValueStore;
import com.moz.fiji.mapreduce.output.FijiTableMapReduceJobOutput;
import com.moz.fiji.mapreduce.produce.impl.FijiProducers;
import com.moz.fiji.mapreduce.produce.impl.ProduceMapper;
import com.moz.fiji.mapreduce.reducer.IdentityReducer;
import com.moz.fiji.schema.EntityId;
import com.moz.fiji.schema.FijiDataRequest;
import com.moz.fiji.schema.FijiRowData;
import com.moz.fiji.schema.FijiTable;

/** Builds jobs that run a producer over a Fiji table. */
@ApiAudience.Public
@ApiStability.Stable
public final class FijiProduceJobBuilder extends FijiTableInputJobBuilder<FijiProduceJobBuilder> {

    /** The default number of threads per mapper to use for running producers. */
    private static final int DEFAULT_NUM_THREADS_PER_MAPPER = 1;

    /** The class of the producer to run. */
    private Class<? extends FijiProducer> mProducerClass;

    /** The number of threads per mapper to use for running producers. */
    private int mNumThreadsPerMapper;

    /** Producer job output. */
    private FijiTableMapReduceJobOutput mJobOutput;

    /** The producer instance. */
    private FijiProducer mProducer;

    /** The mapper instance. */
    private FijiMapper<?, ?, ?, ?> mMapper;

    /** The reducer instance. */
    private FijiReducer<?, ?, ?, ?> mReducer;

    /** The data request for the job's table input. */
    private FijiDataRequest mDataRequest;

    /** Constructs a builder for jobs that run a Fiji producer over a Fiji table. */
    private FijiProduceJobBuilder() {
        mProducerClass = null;
        mNumThreadsPerMapper = DEFAULT_NUM_THREADS_PER_MAPPER;
        mJobOutput = null;
        mProducer = null;
        mMapper = null;
        mReducer = null;
        mDataRequest = null;
    }

    /**
     * Creates a new builder for Fiji produce jobs.
     *
     * @return a new Fiji produce job builder.
     */
    public static FijiProduceJobBuilder create() {
        return new FijiProduceJobBuilder();
    }

    /**
     * Configures the job with the Fiji producer to run.
     *
     * @param producerClass The producer class.
     * @return This builder instance so you may chain configuration method calls.
     */
    public FijiProduceJobBuilder withProducer(Class<? extends FijiProducer> producerClass) {
        mProducerClass = producerClass;
        return this;
    }

    /**
     * Configures the producer output.
     *
     * @param jobOutput Output table of the producer must match the input table.
     * @return this builder instance so you may chain configuration method calls.
     */
    public FijiProduceJobBuilder withOutput(FijiTableMapReduceJobOutput jobOutput) {
        mJobOutput = jobOutput;
        return super.withOutput(jobOutput);
    }

    /**
     * {@inheritDoc}
     *
     * @param jobOutput Output table of the producer must match the input table. Must be an instance
     *     of FijiTableMapReduceJobOutput or a subclass.
     */
    @Override
    public FijiProduceJobBuilder withOutput(MapReduceJobOutput jobOutput) {
        if (jobOutput instanceof FijiTableMapReduceJobOutput) {
            return withOutput((FijiTableMapReduceJobOutput) jobOutput);
        } else {
            // Throw a more helpful debugging message.
            throw new RuntimeException("jobOutput parameter of FijiProduceJobBuilder.withOutput() must "
                    + "be a FijiTableMapReduceJobOutput.");
        }
    }

    /**
     * Sets the number of threads to use for running the producer in parallel.
     *
     * <p>You may use this setting to run multiple instances of your producer in parallel
     * within each map task of the job.  This may useful for increasing throughput when your
     * producer is not CPU bound.</p>
     *
     * @param numThreads The number of produce-runner threads to use per mapper.
     * @return This build instance so you may chain configuration method calls.
     */
    public FijiProduceJobBuilder withNumThreads(int numThreads) {
        Preconditions.checkArgument(numThreads >= 1, "numThreads must be positive, got %d", numThreads);
        mNumThreadsPerMapper = numThreads;
        return this;
    }

    /** {@inheritDoc} */
    @Override
    protected void configureJob(Job job) throws IOException {
        final Configuration conf = job.getConfiguration();

        // Construct the producer instance.
        if (null == mProducerClass) {
            throw new JobConfigurationException("Must specify a producer.");
        }

        // Serialize the producer class name into the job configuration.
        conf.setClass(FijiConfKeys.FIJI_PRODUCER_CLASS, mProducerClass, FijiProducer.class);

        // Write to the table, but make sure the output table is the same as the input table.
        if (!getInputTableURI().equals(mJobOutput.getOutputTableURI())) {
            throw new JobConfigurationException("Output table must be the same as the input table.");
        }

        // Producers should output to HFiles.
        mMapper = new ProduceMapper();
        mReducer = new IdentityReducer<Object, Object>();

        job.setJobName("Fiji produce: " + mProducerClass.getSimpleName());

        mProducer = ReflectionUtils.newInstance(mProducerClass, job.getConfiguration());
        mDataRequest = mProducer.getDataRequest();

        // Configure the table input job.
        super.configureJob(job);
    }

    /** {@inheritDoc} */
    @Override
    protected void configureMapper(Job job) throws IOException {
        super.configureMapper(job);

        // Configure map-parallelism if configured.
        if (mNumThreadsPerMapper > 1) {
            @SuppressWarnings("unchecked")
            Class<? extends Mapper<EntityId, FijiRowData, Object, Object>> childMapperClass = (Class<? extends Mapper<EntityId, FijiRowData, Object, Object>>) mMapper
                    .getClass();
            FijiMultithreadedMapper.setMapperClass(job, childMapperClass);
            FijiMultithreadedMapper.setNumberOfThreads(job, mNumThreadsPerMapper);
            job.setMapperClass(FijiMultithreadedMapper.class);
        }
    }

    /** {@inheritDoc} */
    @Override
    protected Map<String, KeyValueStore<?, ?>> getRequiredStores() throws IOException {
        return mProducer.getRequiredStores();
    }

    /** {@inheritDoc} */
    @Override
    protected FijiMapReduceJob build(Job job) {
        return FijiMapReduceJob.create(job);
    }

    /** {@inheritDoc} */
    @Override
    protected FijiDataRequest getDataRequest() {
        return mDataRequest;
    }

    /** {@inheritDoc} */
    @Override
    protected void validateInputTable(FijiTable inputTable) throws IOException {
        // Validate the Fiji data request against the input table layout:
        super.validateInputTable(inputTable);

        // Validate the producer output column against the output table (ie. the input table):
        FijiProducers.validateOutputColumn(mProducer, inputTable.getLayout());
    }

    /** {@inheritDoc} */
    @Override
    protected FijiMapper<?, ?, ?, ?> getMapper() {
        return mMapper;
    }

    /** {@inheritDoc} */
    @Override
    protected FijiReducer<?, ?, ?, ?> getCombiner() {
        // Producers can't have combiners.
        return null;
    }

    /** {@inheritDoc} */
    @Override
    protected FijiReducer<?, ?, ?, ?> getReducer() {
        return mReducer;
    }

    /** {@inheritDoc} */
    @Override
    protected Class<?> getJarClass() {
        return mProducerClass;
    }
}