co.cask.cdap.internal.app.runtime.batch.ReducerWrapper.java Source code

Java tutorial

Introduction

Here is the source code for co.cask.cdap.internal.app.runtime.batch.ReducerWrapper.java

Source

/*
 * Copyright  2014-2015 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.cdap.internal.app.runtime.batch;

import co.cask.cdap.api.ProgramLifecycle;
import co.cask.cdap.api.RuntimeContext;
import co.cask.cdap.common.lang.ClassLoaders;
import co.cask.cdap.common.lang.PropertyFieldSetter;
import co.cask.cdap.internal.app.runtime.DataSetFieldSetter;
import co.cask.cdap.internal.app.runtime.MetricsFieldSetter;
import co.cask.cdap.internal.lang.Reflections;
import com.google.common.base.Throwables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;

/**
 * Wraps user-defined implementation of {@link Reducer} class which allows perform extra configuration.
 */
public class ReducerWrapper extends Reducer {

    private static final Logger LOG = LoggerFactory.getLogger(MapperWrapper.class);
    private static final String ATTR_REDUCER_CLASS = "c.reducer.class";

    /**
     * Wraps the mapper defined in the job with this {@link MapperWrapper} if it is defined.
     * @param job The MapReduce job
     */
    public static void wrap(Job job) {
        // NOTE: we don't use job.getReducerClass() as we don't need to load user class here
        Configuration conf = job.getConfiguration();
        String reducerClass = conf.get(MRJobConfig.REDUCE_CLASS_ATTR);
        if (reducerClass != null) {
            conf.set(ReducerWrapper.ATTR_REDUCER_CLASS, reducerClass);
            job.setReducerClass(ReducerWrapper.class);
        }
    }

    @SuppressWarnings("unchecked")
    @Override
    public void run(Context context) throws IOException, InterruptedException {
        MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(context.getConfiguration());
        BasicMapReduceTaskContext basicMapReduceContext = classLoader.getTaskContextProvider().get(context);

        // this is a hook for periodic flushing of changes buffered by datasets (to avoid OOME)
        WrappedReducer.Context flushingContext = createAutoFlushingContext(context, basicMapReduceContext);
        basicMapReduceContext.setHadoopContext(flushingContext);

        String userReducer = context.getConfiguration().get(ATTR_REDUCER_CLASS);
        ClassLoader programClassLoader = classLoader.getProgramClassLoader();
        Reducer delegate = createReducerInstance(programClassLoader, userReducer);

        // injecting runtime components, like datasets, etc.
        try {
            Reflections.visit(delegate, delegate.getClass(),
                    new PropertyFieldSetter(basicMapReduceContext.getSpecification().getProperties()),
                    new MetricsFieldSetter(basicMapReduceContext.getMetrics()),
                    new DataSetFieldSetter(basicMapReduceContext));
        } catch (Throwable t) {
            LOG.error("Failed to inject fields to {}.", delegate.getClass(), t);
            throw Throwables.propagate(t);
        }

        ClassLoader oldClassLoader;
        if (delegate instanceof ProgramLifecycle) {
            oldClassLoader = ClassLoaders.setContextClassLoader(programClassLoader);
            try {
                ((ProgramLifecycle) delegate).initialize(new MapReduceLifecycleContext(basicMapReduceContext));
            } catch (Exception e) {
                LOG.error("Failed to initialize mapper with {}", basicMapReduceContext, e);
                throw Throwables.propagate(e);
            } finally {
                ClassLoaders.setContextClassLoader(oldClassLoader);
            }
        }

        oldClassLoader = ClassLoaders.setContextClassLoader(programClassLoader);
        try {
            delegate.run(flushingContext);
        } finally {
            ClassLoaders.setContextClassLoader(oldClassLoader);
        }

        // transaction is not finished, but we want all operations to be dispatched (some could be buffered in
        // memory by tx agent)
        try {
            basicMapReduceContext.flushOperations();
        } catch (Exception e) {
            LOG.error("Failed to flush operations at the end of reducer of " + basicMapReduceContext, e);
            throw Throwables.propagate(e);
        }

        // Close all writers created by MultipleOutputs
        basicMapReduceContext.closeMultiOutputs();

        if (delegate instanceof ProgramLifecycle) {
            oldClassLoader = ClassLoaders.setContextClassLoader(programClassLoader);
            try {
                ((ProgramLifecycle<? extends RuntimeContext>) delegate).destroy();
            } catch (Exception e) {
                LOG.error("Error during destroy of reducer {}", basicMapReduceContext, e);
                // Do nothing, try to finish
            } finally {
                ClassLoaders.setContextClassLoader(oldClassLoader);
            }
        }
    }

    private WrappedReducer.Context createAutoFlushingContext(final Context context,
            final BasicMapReduceTaskContext basicMapReduceContext) {
        // NOTE: we will change auto-flush to take into account size of buffered data, so no need to do/test a lot with
        //       current approach
        final int flushFreq = context.getConfiguration().getInt("c.reducer.flush.freq", 10000);

        @SuppressWarnings("unchecked")
        WrappedReducer.Context flushingContext = new WrappedReducer().new Context(context) {
            private int processedRecords = 0;

            @Override
            public boolean nextKeyValue() throws IOException, InterruptedException {
                boolean result = super.nextKey();
                if (++processedRecords > flushFreq) {
                    try {
                        LOG.info("Flushing dataset operations...");
                        basicMapReduceContext.flushOperations();
                    } catch (Exception e) {
                        LOG.error("Failed to persist changes", e);
                        throw Throwables.propagate(e);
                    }
                    processedRecords = 0;
                }
                return result;
            }
        };
        return flushingContext;
    }

    private Reducer createReducerInstance(ClassLoader classLoader, String userReducer) {
        try {
            return (Reducer) classLoader.loadClass(userReducer).newInstance();
        } catch (Exception e) {
            LOG.error("Failed to create instance of the user-defined Reducer class: " + userReducer);
            throw Throwables.propagate(e);
        }
    }
}