org.apache.gora.mapreduce.GoraOutputFormat.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.gora.mapreduce.GoraOutputFormat.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.gora.mapreduce;

import java.io.IOException;

import org.apache.gora.persistency.Persistent;
import org.apache.gora.store.DataStore;
import org.apache.gora.store.DataStoreFactory;
import org.apache.gora.store.FileBackedDataStore;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * {@link OutputFormat} for Hadoop jobs that want to store the job outputs 
 * to a Gora store. 
 * <p>
 * Hadoop jobs can be either configured through static 
 * <code>setOutput()</code> methods, or if the job is not map-only from {@link GoraReducer}.
 * @see GoraReducer 
 */
public class GoraOutputFormat<K, T extends Persistent> extends OutputFormat<K, T> {

    public static final String DATA_STORE_CLASS = "gora.outputformat.datastore.class";

    public static final String OUTPUT_KEY_CLASS = "gora.outputformat.key.class";

    public static final String OUTPUT_VALUE_CLASS = "gora.outputformat.value.class";

    @Override
    public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
    }

    @Override
    public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
        return new NullOutputCommitter();
    }

    private void setOutputPath(DataStore<K, T> store, TaskAttemptContext context) {
        if (store instanceof FileBackedDataStore) {
            FileBackedDataStore<K, T> fileStore = (FileBackedDataStore<K, T>) store;
            String uniqueName = FileOutputFormat.getUniqueFile(context, "part", "");

            //if file store output is not set, then get the output from FileOutputFormat
            if (fileStore.getOutputPath() == null) {
                fileStore.setOutputPath(FileOutputFormat.getOutputPath(context).toString());
            }

            //set the unique name of the data file
            String path = fileStore.getOutputPath();
            fileStore.setOutputPath(path + Path.SEPARATOR + uniqueName);
        }
    }

    @Override
    @SuppressWarnings("unchecked")
    public RecordWriter<K, T> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
        Configuration conf = context.getConfiguration();
        Class<? extends DataStore<K, T>> dataStoreClass = (Class<? extends DataStore<K, T>>) conf
                .getClass(DATA_STORE_CLASS, null);
        Class<K> keyClass = (Class<K>) conf.getClass(OUTPUT_KEY_CLASS, null);
        Class<T> rowClass = (Class<T>) conf.getClass(OUTPUT_VALUE_CLASS, null);
        final DataStore<K, T> store = DataStoreFactory.createDataStore(dataStoreClass, keyClass, rowClass,
                context.getConfiguration());

        setOutputPath(store, context);

        return new GoraRecordWriter(store, context);
    }

    /**
     * Sets the output parameters for the job
     * @param job the job to set the properties for
     * @param dataStore the datastore as the output
     * @param reuseObjects whether to reuse objects in serialization
     */
    public static <K, V extends Persistent> void setOutput(Job job, DataStore<K, V> dataStore,
            boolean reuseObjects) {
        setOutput(job, dataStore.getClass(), dataStore.getKeyClass(), dataStore.getPersistentClass(), reuseObjects);
    }

    /**
     * Sets the output parameters for the job 
     * @param job the job to set the properties for
     * @param dataStoreClass the datastore class
     * @param keyClass output key class
     * @param persistentClass output value class
     * @param reuseObjects whether to reuse objects in serialization
     */
    @SuppressWarnings("rawtypes")
    public static <K, V extends Persistent> void setOutput(Job job, Class<? extends DataStore> dataStoreClass,
            Class<K> keyClass, Class<V> persistentClass, boolean reuseObjects) {

        Configuration conf = job.getConfiguration();

        GoraMapReduceUtils.setIOSerializations(conf, reuseObjects);

        job.setOutputFormatClass(GoraOutputFormat.class);
        job.setOutputKeyClass(keyClass);
        job.setOutputValueClass(persistentClass);
        conf.setClass(GoraOutputFormat.DATA_STORE_CLASS, dataStoreClass, DataStore.class);
        conf.setClass(GoraOutputFormat.OUTPUT_KEY_CLASS, keyClass, Object.class);
        conf.setClass(GoraOutputFormat.OUTPUT_VALUE_CLASS, persistentClass, Persistent.class);
    }
}