Java tutorial
package com.ebay.erl.mobius.core.builder; import java.io.IOException; import java.lang.reflect.InvocationTargetException; import java.util.HashMap; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.OutputFormat; import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.hadoop.mapred.TextOutputFormat; import com.ebay.erl.mobius.core.MobiusJob; /** * * Gets the implementation of {@link AbstractDatasetBuilder} * based on a given {@link OutputFormat}. * <p> * * This class is used by the Mobius engine to build a dataset * from an intermediate result based on its output format. * <p> * * By default, Mobius uses {@link TSVDatasetBuilder} to build a * dataset if the intermediate result of an analysis flow is * in text format. Alternatively, Mobius uses {@link SeqFileDatasetBuilder} * if the intermediate result is in sequence file format. * <p> * * The intermediate result is created by the Mobius job. Users * should not use this class to build their own dataset * on HDFS. * * * * <p> * This product is licensed under the Apache License, Version 2.0, * available at http://www.apache.org/licenses/LICENSE-2.0. * * This product contains portions derived from Apache hadoop which is * licensed under the Apache License, Version 2.0, available at * http://hadoop.apache.org. * * 2007 2012 eBay Inc., Evan Chiu, Woody Zhou, Neel Sundaresan */ @SuppressWarnings({ "deprecation", "unchecked" }) public class DatasetBuildersFactory { private static final Log LOGGER = LogFactory.getLog(DatasetBuildersFactory.class); /** * mapping from a given {@link OutputFormat} to an implementation of * {@link AbstractDatasetBuilder}. */ protected Map<Class<? extends OutputFormat>, Class<? extends AbstractDatasetBuilder>> _DATASET_BUILDERS; private static DatasetBuildersFactory _INSTANCE; private MobiusJob job; private DatasetBuildersFactory(MobiusJob job) throws IOException { this._DATASET_BUILDERS = new HashMap<Class<? extends OutputFormat>, Class<? extends AbstractDatasetBuilder>>(); this.register(TextOutputFormat.class, TSVDatasetBuilder.class); this.register(SequenceFileOutputFormat.class, SeqFileDatasetBuilder.class); this.job = job; } /** * Get the singleton instance of {@link DatasetBuildersFactory}. */ public static DatasetBuildersFactory getInstance(MobiusJob job) throws IOException { if (DatasetBuildersFactory._INSTANCE == null) DatasetBuildersFactory._INSTANCE = new DatasetBuildersFactory(job); return DatasetBuildersFactory._INSTANCE; } /** * This method is used to generate a {@link Dataset} based on a result generated by previous * Mobius job, so that the user can continue to refine the {@link Dataset} * * @param prevJobOutFmt the output format of previous job (an intermediate result in a flow). * @param datasetName the name to be used for the new dataset. * @return an implementation of {@link AbstractDatasetBuilder} for building a dataset from * the intermediate result. */ public AbstractDatasetBuilder getBuilder(Class<? extends FileOutputFormat> prevJobOutFmt, String datasetName) { Class<? extends AbstractDatasetBuilder> builderClass = _DATASET_BUILDERS.get(prevJobOutFmt); if (builderClass != null) { LOGGER.info("Using " + builderClass.getCanonicalName() + " as the dataset builder for " + prevJobOutFmt.getCanonicalName()); AbstractDatasetBuilder<?> builder = null; try { builder = builderClass.getDeclaredConstructor(MobiusJob.class, String.class).newInstance(this.job, datasetName); return builder; } catch (SecurityException e) { throw new RuntimeException(e); } catch (InstantiationException e) { throw new RuntimeException(e); } catch (IllegalAccessException e) { throw new RuntimeException(e); } catch (InvocationTargetException e) { throw new RuntimeException(e); } catch (NoSuchMethodException e) { throw new RuntimeException(builderClass.getCanonicalName() + " doesn't provide a constructor which accepts one " + MobiusJob.class.getCanonicalName() + " and String.class as the arguments, please provide such constructor."); } } else { throw new RuntimeException( "Cannot find a dataset builder for output format:" + prevJobOutFmt.getCanonicalName() + ", " + "please use " + DatasetBuildersFactory.class.getCanonicalName() + "#register to register a builder for this output format."); } } /** * Register a new implementation of {@link AbstractDatasetBuilder} which generates a {@link Dataset} * that read the data generated by the {@link OutputFormat}. * * @param outputFormat an output format type from previous job that the given <code>builder</code> * will be used to create a dataset. * @param builder an implementation of AbstractDatasetBuilder to build the dataset from an intermediate * result (in the format of the given <code>outputFormat</code>). * @return the {@link DatasetBuildersFactory} itself. * @throws IOException */ public DatasetBuildersFactory register(Class<? extends OutputFormat> outputFormat, Class<? extends AbstractDatasetBuilder> builder) throws IOException { LOGGER.info( "Set dataset buider for " + outputFormat.getCanonicalName() + " to " + builder.getCanonicalName()); this._DATASET_BUILDERS.put(outputFormat, builder); return this; } }