com.ebay.erl.mobius.core.builder.DatasetBuildersFactory.java Source code

Java tutorial

Introduction

Here is the source code for com.ebay.erl.mobius.core.builder.DatasetBuildersFactory.java

Source

package com.ebay.erl.mobius.core.builder;

import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.HashMap;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.OutputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;

import com.ebay.erl.mobius.core.MobiusJob;

/**
 * 
 * Gets the implementation of {@link AbstractDatasetBuilder}
 * based on a given {@link OutputFormat}.
 * <p>
 * 
 * This class is used by the Mobius engine to build a dataset 
 * from an intermediate result based on its output format.
 * <p>
 * 
 * By default, Mobius uses {@link TSVDatasetBuilder} to build a
 * dataset if the intermediate result of an analysis flow is 
 * in text format. Alternatively, Mobius uses {@link SeqFileDatasetBuilder} 
 * if the intermediate result is in sequence file format.
 * <p>
 * 
 * The intermediate result is created by the Mobius job. Users
 * should not use this class to build their own dataset
 * on HDFS.
 * 
 * 
 * 
 * <p>
 * This product is licensed under the Apache License,  Version 2.0, 
 * available at http://www.apache.org/licenses/LICENSE-2.0.
 * 
 * This product contains portions derived from Apache hadoop which is 
 * licensed under the Apache License, Version 2.0, available at 
 * http://hadoop.apache.org.
 * 
 *  2007  2012 eBay Inc., Evan Chiu, Woody Zhou, Neel Sundaresan
 */
@SuppressWarnings({ "deprecation", "unchecked" })
public class DatasetBuildersFactory {
    private static final Log LOGGER = LogFactory.getLog(DatasetBuildersFactory.class);

    /**
     * mapping from a given {@link OutputFormat} to an implementation of 
     * {@link AbstractDatasetBuilder}.
     */
    protected Map<Class<? extends OutputFormat>, Class<? extends AbstractDatasetBuilder>> _DATASET_BUILDERS;

    private static DatasetBuildersFactory _INSTANCE;

    private MobiusJob job;

    private DatasetBuildersFactory(MobiusJob job) throws IOException {
        this._DATASET_BUILDERS = new HashMap<Class<? extends OutputFormat>, Class<? extends AbstractDatasetBuilder>>();
        this.register(TextOutputFormat.class, TSVDatasetBuilder.class);
        this.register(SequenceFileOutputFormat.class, SeqFileDatasetBuilder.class);
        this.job = job;
    }

    /**
     * Get the singleton instance of {@link DatasetBuildersFactory}.
     */
    public static DatasetBuildersFactory getInstance(MobiusJob job) throws IOException {
        if (DatasetBuildersFactory._INSTANCE == null)
            DatasetBuildersFactory._INSTANCE = new DatasetBuildersFactory(job);

        return DatasetBuildersFactory._INSTANCE;
    }

    /**
     * This method is used to generate a {@link Dataset} based on a result generated by previous
     * Mobius job, so that the user can continue to refine the {@link Dataset} 
     * 
     * @param prevJobOutFmt the output format of previous job (an intermediate result in a flow).
     * @param datasetName the name to be used for the new dataset.
     * @return an implementation of {@link AbstractDatasetBuilder} for building a dataset from
     * the intermediate result.
     */
    public AbstractDatasetBuilder getBuilder(Class<? extends FileOutputFormat> prevJobOutFmt, String datasetName) {
        Class<? extends AbstractDatasetBuilder> builderClass = _DATASET_BUILDERS.get(prevJobOutFmt);
        if (builderClass != null) {
            LOGGER.info("Using " + builderClass.getCanonicalName() + " as the dataset builder for "
                    + prevJobOutFmt.getCanonicalName());

            AbstractDatasetBuilder<?> builder = null;
            try {
                builder = builderClass.getDeclaredConstructor(MobiusJob.class, String.class).newInstance(this.job,
                        datasetName);
                return builder;
            } catch (SecurityException e) {
                throw new RuntimeException(e);
            } catch (InstantiationException e) {
                throw new RuntimeException(e);
            } catch (IllegalAccessException e) {
                throw new RuntimeException(e);
            } catch (InvocationTargetException e) {
                throw new RuntimeException(e);
            } catch (NoSuchMethodException e) {
                throw new RuntimeException(builderClass.getCanonicalName()
                        + " doesn't provide a constructor which accepts one " + MobiusJob.class.getCanonicalName()
                        + " and String.class as the arguments, please provide such constructor.");
            }
        } else {
            throw new RuntimeException(
                    "Cannot find a dataset builder for output format:" + prevJobOutFmt.getCanonicalName() + ", "
                            + "please use " + DatasetBuildersFactory.class.getCanonicalName()
                            + "#register to register a builder for this output format.");
        }
    }

    /**
     * Register a new implementation of {@link AbstractDatasetBuilder} which generates a {@link Dataset}
     * that read the data generated by the {@link OutputFormat}.
     * 
     * @param outputFormat an output format type from previous job that the given <code>builder</code>
     * will be used to create a dataset.
     * @param builder an implementation of AbstractDatasetBuilder to build the dataset from an intermediate
     * result (in the format of the given <code>outputFormat</code>).
     * @return the {@link DatasetBuildersFactory} itself.
     * @throws IOException
     */
    public DatasetBuildersFactory register(Class<? extends OutputFormat> outputFormat,
            Class<? extends AbstractDatasetBuilder> builder) throws IOException {
        LOGGER.info(
                "Set dataset buider for " + outputFormat.getCanonicalName() + " to " + builder.getCanonicalName());
        this._DATASET_BUILDERS.put(outputFormat, builder);
        return this;
    }
}