com.moz.fiji.mapreduce.bulkimport.FijiBulkImportJobBuilder.java Source code

Java tutorial

Introduction

Here is the source code for com.moz.fiji.mapreduce.bulkimport.FijiBulkImportJobBuilder.java

Source

/**
 * (c) Copyright 2012 WibiData, Inc.
 *
 * See the NOTICE file distributed with this work for additional
 * information regarding copyright ownership.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.moz.fiji.mapreduce.bulkimport;

import java.io.IOException;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.ReflectionUtils;

import com.moz.fiji.annotations.ApiAudience;
import com.moz.fiji.annotations.ApiStability;
import com.moz.fiji.mapreduce.JobConfigurationException;
import com.moz.fiji.mapreduce.FijiMapReduceJob;
import com.moz.fiji.mapreduce.FijiMapper;
import com.moz.fiji.mapreduce.FijiReducer;
import com.moz.fiji.mapreduce.MapReduceJobInput;
import com.moz.fiji.mapreduce.MapReduceJobOutput;
import com.moz.fiji.mapreduce.bulkimport.impl.BulkImportMapper;
import com.moz.fiji.mapreduce.framework.FijiConfKeys;
import com.moz.fiji.mapreduce.framework.MapReduceJobBuilder;
import com.moz.fiji.mapreduce.kvstore.KeyValueStore;
import com.moz.fiji.mapreduce.output.DirectFijiTableMapReduceJobOutput;
import com.moz.fiji.mapreduce.output.HFileMapReduceJobOutput;
import com.moz.fiji.mapreduce.output.FijiTableMapReduceJobOutput;
import com.moz.fiji.mapreduce.reducer.IdentityReducer;

/** Builds a job that runs a FijiBulkImporter to import data into a Fiji table. */
@ApiAudience.Public
@ApiStability.Stable
public final class FijiBulkImportJobBuilder extends MapReduceJobBuilder<FijiBulkImportJobBuilder> {

    /** The class of the bulk importer to run. */
    @SuppressWarnings("rawtypes")
    private Class<? extends FijiBulkImporter> mBulkImporterClass;

    /** The bulk importer instance. */
    private FijiBulkImporter<?, ?> mBulkImporter;
    /** The mapper instance to run (which runs the bulk importer inside it). */
    private FijiMapper<?, ?, ?, ?> mMapper;
    /** The reducer instance to run (may be null). */
    private FijiReducer<?, ?, ?, ?> mReducer;

    /** The job input. */
    private MapReduceJobInput mJobInput;

    /** Job output must be a Fiji table. */
    private FijiTableMapReduceJobOutput mJobOutput;

    /** Constructs a builder for jobs that run a FijiBulkImporter. */
    private FijiBulkImportJobBuilder() {
        mBulkImporterClass = null;

        mBulkImporter = null;
        mMapper = null;
        mReducer = null;

        mJobInput = null;
        mJobOutput = null;
    }

    /**
     * Creates a new builder for Fiji bulk import jobs.
     *
     * @return a new Fiji bulk import job builder.
     */
    public static FijiBulkImportJobBuilder create() {
        return new FijiBulkImportJobBuilder();
    }

    /**
     * Configures the job with input.
     *
     * @param jobInput The input for the job.
     * @return This builder instance so you may chain configuration method calls.
     */
    public FijiBulkImportJobBuilder withInput(MapReduceJobInput jobInput) {
        mJobInput = jobInput;
        return this;
    }

    /**
     * Configures the bulk-importer output Fiji table.
     *
     * @param jobOutput Bulk importer must output to a Fiji table.
     * @return this builder.
     */
    public FijiBulkImportJobBuilder withOutput(FijiTableMapReduceJobOutput jobOutput) {
        mJobOutput = jobOutput;
        super.withOutput(jobOutput);
        return this;
    }

    /**
     * Configures the job output.
     *
     * @param jobOutput The output for the job.
     *     Bulk importer must output to a Fiji table.
     * @return This builder instance so you may chain configuration method calls.
     *
     * {@inheritDoc}
     */
    @Override
    public FijiBulkImportJobBuilder withOutput(MapReduceJobOutput jobOutput) {
        if (!(jobOutput instanceof FijiTableMapReduceJobOutput)) {
            throw new JobConfigurationException(String.format("Invalid job output %s: expecting %s or %s",
                    jobOutput.getClass().getName(), DirectFijiTableMapReduceJobOutput.class.getName(),
                    HFileMapReduceJobOutput.class.getName()));
        }
        return withOutput((FijiTableMapReduceJobOutput) jobOutput);
    }

    /**
     * Configures the job with a bulk importer to run in the map phase.
     *
     * @param bulkImporterClass The bulk importer class to use in the job.
     * @return This builder instance so you may chain configuration method calls.
     */
    @SuppressWarnings("rawtypes")
    public FijiBulkImportJobBuilder withBulkImporter(Class<? extends FijiBulkImporter> bulkImporterClass) {
        mBulkImporterClass = bulkImporterClass;
        return this;
    }

    /** {@inheritDoc} */
    @Override
    protected void configureJob(Job job) throws IOException {
        final Configuration conf = job.getConfiguration();

        // Store the name of the the importer to use in the job configuration so the mapper can
        // create instances of it.
        // Construct the bulk importer instance.
        if (null == mBulkImporterClass) {
            throw new JobConfigurationException("Must specify a bulk importer.");
        }
        conf.setClass(FijiConfKeys.FIJI_BULK_IMPORTER_CLASS, mBulkImporterClass, FijiBulkImporter.class);

        mJobOutput.configure(job);

        // Configure the mapper and reducer. This part depends on whether we're going to write
        // to HFiles or directly to the table.
        configureJobForHFileOutput(job);

        job.setJobName("Fiji bulk import: " + mBulkImporterClass.getSimpleName());

        mBulkImporter = ReflectionUtils.newInstance(mBulkImporterClass, conf);

        // Configure the MapReduce job (requires mBulkImporter to be set properly):
        super.configureJob(job);
    }

    /**
     * Configures the job settings specific to writing HFiles.
     *
     * @param job The job to configure.
     */
    protected void configureJobForHFileOutput(Job job) {
        // Construct the mapper instance that runs the importer.
        mMapper = new BulkImportMapper<Object, Object>();

        // Don't need to do anything during the Reducer, but we need to run the reduce phase
        // so the KeyValue records output from the map phase get sorted.
        mReducer = new IdentityReducer<Object, Object>();
    }

    /** {@inheritDoc} */
    @Override
    protected FijiMapReduceJob build(Job job) {
        return FijiMapReduceJob.create(job);
    }

    /** {@inheritDoc} */
    @Override
    protected Map<String, KeyValueStore<?, ?>> getRequiredStores() throws IOException {
        return mBulkImporter.getRequiredStores();
    }

    /** {@inheritDoc} */
    @Override
    protected MapReduceJobInput getJobInput() {
        return mJobInput;
    }

    /** {@inheritDoc} */
    @Override
    protected FijiMapper<?, ?, ?, ?> getMapper() {
        return mMapper;
    }

    /** {@inheritDoc} */
    @Override
    protected FijiReducer<?, ?, ?, ?> getCombiner() {
        // Use no combiner.
        return null;
    }

    /** {@inheritDoc} */
    @Override
    protected FijiReducer<?, ?, ?, ?> getReducer() {
        return mReducer;
    }

    /** {@inheritDoc} */
    @Override
    protected Class<?> getJarClass() {
        return mBulkImporterClass;
    }
}