co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputsMainOutputWrapper.java Source code

Java tutorial

Introduction

Here is the source code for co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputsMainOutputWrapper.java

Source

/*
 * Copyright  2015 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.cdap.internal.app.runtime.batch.dataset.output;

import co.cask.cdap.common.lang.InstantiatorFactory;
import com.google.common.reflect.TypeToken;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

/**
 * OutputFormat that wraps a root OutputFormat and provides an OutputFormatCommitter that delegates to multiple
 * preconfigured OutputFormatCommitters.
 *
 * @param <K> Type of key
 * @param <V> Type of value
 */
public class MultipleOutputsMainOutputWrapper<K, V> extends OutputFormat<K, V> {

    private static final String ROOT_OUTPUT_FORMAT = "co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputsMainOutputWrapper.rootOutputFormat";
    private OutputFormat<K, V> innerFormat;
    private OutputCommitter committer;

    @Override
    public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
        OutputFormat<K, V> rootOutputFormat = getRootOutputFormat(job);
        return rootOutputFormat.getRecordWriter(job);
    }

    @Override
    public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
        for (String name : MultipleOutputs.getNamedOutputsList(context)) {
            Class<? extends OutputFormat> namedOutputFormatClass = MultipleOutputs
                    .getNamedOutputFormatClass(context, name);

            OutputFormat outputFormat = new InstantiatorFactory(false).get(TypeToken.of(namedOutputFormatClass))
                    .create();

            JobContext namedContext = MultipleOutputs.getNamedJobContext(context, name);
            outputFormat.checkOutputSpecs(namedContext);
        }
    }

    /**
     * Sets an OutputFormat class as the root OutputFormat for the Hadoop job.
     *
     * @param job the job on which to set the OutputFormat class
     * @param outputFormatClass the class to set as the root OutputFormat for the job
     * @param outputConfig the configuration to set for the specified OutputFormat
     */
    public static void setRootOutputFormat(Job job, String outputFormatClass, Map<String, String> outputConfig) {
        job.getConfiguration().set(ROOT_OUTPUT_FORMAT, outputFormatClass);

        for (Map.Entry<String, String> confEntry : outputConfig.entrySet()) {
            job.getConfiguration().set(confEntry.getKey(), confEntry.getValue());
        }
    }

    // the root OutputFormat is used only for writing, not for checking output specs or committing of the output
    // because the root is also in the delegates, which check the output spec and commit the output.
    private OutputFormat<K, V> getRootOutputFormat(JobContext context) {
        if (innerFormat == null) {
            Configuration conf = context.getConfiguration();
            @SuppressWarnings("unchecked")
            Class<OutputFormat<K, V>> c = (Class<OutputFormat<K, V>>) conf.getClass(ROOT_OUTPUT_FORMAT,
                    FileOutputFormat.class);
            try {
                innerFormat = c.newInstance();
            } catch (InstantiationException | IllegalAccessException e) {
                throw new RuntimeException(e);
            }
        }
        return innerFormat;
    }

    @Override
    public synchronized OutputCommitter getOutputCommitter(TaskAttemptContext context)
            throws IOException, InterruptedException {
        // return a MultipleOutputsCommitter that commits for the root output format as well as all delegate outputformats
        if (committer == null) {
            Map<String, OutputCommitter> committers = new HashMap<>();
            for (String name : MultipleOutputs.getNamedOutputsList(context)) {
                Class<? extends OutputFormat> namedOutputFormatClass = MultipleOutputs
                        .getNamedOutputFormatClass(context, name);

                TaskAttemptContext namedContext = MultipleOutputs.getNamedTaskContext(context, name);

                OutputFormat outputFormat = new InstantiatorFactory(false).get(TypeToken.of(namedOutputFormatClass))
                        .create();
                committers.put(name, outputFormat.getOutputCommitter(namedContext));
            }
            committer = new MultipleOutputsCommitter(committers);
        }

        return committer;
    }
}