co.cask.cdap.internal.app.runtime.batch.MapperWrapper.java Source code

Introduction

Here is the source code for co.cask.cdap.internal.app.runtime.batch.MapperWrapper.java
Source

/*
 * Copyright  2014-2016 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.cdap.internal.app.runtime.batch;

import co.cask.cdap.api.ProgramLifecycle;
import co.cask.cdap.api.RuntimeContext;
import co.cask.cdap.common.lang.ClassLoaders;
import co.cask.cdap.common.lang.PropertyFieldSetter;
import co.cask.cdap.internal.app.runtime.DataSetFieldSetter;
import co.cask.cdap.internal.app.runtime.MetricsFieldSetter;
import co.cask.cdap.internal.app.runtime.batch.dataset.input.TaggedInputSplit;
import co.cask.cdap.internal.lang.Reflections;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.map.WrappedMapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;

/**
 * Wraps user-defined implementation of {@link Mapper} class which allows perform extra configuration.
 */
public class MapperWrapper extends Mapper {

    private static final Logger LOG = LoggerFactory.getLogger(MapperWrapper.class);
    private static final String ATTR_MAPPER_CLASS = "c.mapper.class";

    /**
     * Wraps the mapper defined in the job with this {@link MapperWrapper} if it is defined.
     * @param job The MapReduce job
     */
    public static void wrap(Job job) {
        // NOTE: we don't use job.getMapperClass() as we don't need to load user class here
        Configuration conf = job.getConfiguration();
        String mapClass = conf.get(MRJobConfig.MAP_CLASS_ATTR, Mapper.class.getName());
        conf.set(MapperWrapper.ATTR_MAPPER_CLASS, mapClass);
        job.setMapperClass(MapperWrapper.class);
    }

    /**
     * Retrieves the class name of the wrapped mapper class from a Job's configuration.
     *
     * @param conf The conf from which to get the wrapped class.
     * @return the class name of the wrapped Mapper class
     */
    public static String getWrappedMapper(Configuration conf) {
        String wrappedMapperClassName = conf.get(MapperWrapper.ATTR_MAPPER_CLASS);
        Preconditions.checkNotNull(wrappedMapperClassName, "Wrapped mapper class could not be found.");
        return wrappedMapperClassName;
    }

    @SuppressWarnings("unchecked")
    @Override
    public void run(Context context) throws IOException, InterruptedException {
        MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(context.getConfiguration());
        BasicMapReduceTaskContext basicMapReduceContext = classLoader.getTaskContextProvider().get(context);

        // this is a hook for periodic flushing of changes buffered by datasets (to avoid OOME)
        WrappedMapper.Context flushingContext = createAutoFlushingContext(context, basicMapReduceContext);

        basicMapReduceContext.setHadoopContext(flushingContext);
        InputSplit inputSplit = context.getInputSplit();
        if (inputSplit instanceof TaggedInputSplit) {
            basicMapReduceContext.setInputName(((TaggedInputSplit) inputSplit).getName());
        }

        ClassLoader programClassLoader = classLoader.getProgramClassLoader();
        Mapper delegate = createMapperInstance(programClassLoader, getWrappedMapper(context.getConfiguration()),
                context);

        // injecting runtime components, like datasets, etc.
        try {
            Reflections.visit(delegate, delegate.getClass(),
                    new PropertyFieldSetter(basicMapReduceContext.getSpecification().getProperties()),
                    new MetricsFieldSetter(basicMapReduceContext.getMetrics()),
                    new DataSetFieldSetter(basicMapReduceContext));
        } catch (Throwable t) {
            LOG.error("Failed to inject fields to {}.", delegate.getClass(), t);
            throw Throwables.propagate(t);
        }

        ClassLoader oldClassLoader;
        if (delegate instanceof ProgramLifecycle) {
            oldClassLoader = ClassLoaders.setContextClassLoader(programClassLoader);
            try {
                ((ProgramLifecycle) delegate).initialize(new MapReduceLifecycleContext(basicMapReduceContext));
            } catch (Exception e) {
                LOG.error("Failed to initialize mapper with {}", basicMapReduceContext, e);
                throw Throwables.propagate(e);
            } finally {
                ClassLoaders.setContextClassLoader(oldClassLoader);
            }
        }

        oldClassLoader = ClassLoaders.setContextClassLoader(programClassLoader);
        try {
            delegate.run(flushingContext);
        } finally {
            ClassLoaders.setContextClassLoader(oldClassLoader);
        }

        // transaction is not finished, but we want all operations to be dispatched (some could be buffered in
        // memory by tx agent)
        try {
            basicMapReduceContext.flushOperations();
        } catch (Exception e) {
            LOG.error("Failed to flush operations at the end of mapper of {}", basicMapReduceContext, e);
            throw Throwables.propagate(e);
        }

        // Close all writers created by MultipleOutputs
        basicMapReduceContext.closeMultiOutputs();

        if (delegate instanceof ProgramLifecycle) {
            oldClassLoader = ClassLoaders.setContextClassLoader(programClassLoader);
            try {
                ((ProgramLifecycle<? extends RuntimeContext>) delegate).destroy();
            } catch (Exception e) {
                LOG.error("Error during destroy of mapper {}", basicMapReduceContext, e);
                // Do nothing, try to finish
            } finally {
                ClassLoaders.setContextClassLoader(oldClassLoader);
            }
        }
    }

    private WrappedMapper.Context createAutoFlushingContext(final Context context,
            final BasicMapReduceTaskContext basicMapReduceContext) {
        // NOTE: we will change auto-flush to take into account size of buffered data, so no need to do/test a lot with
        //       current approach
        final int flushFreq = context.getConfiguration().getInt("c.mapper.flush.freq", 10000);

        @SuppressWarnings("unchecked")
        WrappedMapper.Context flushingContext = new WrappedMapper().new Context(context) {
            private int processedRecords = 0;

            @Override
            public boolean nextKeyValue() throws IOException, InterruptedException {
                boolean result = super.nextKeyValue();
                if (++processedRecords > flushFreq) {
                    try {
                        LOG.info("Flushing dataset operations...");
                        basicMapReduceContext.flushOperations();
                    } catch (Exception e) {
                        LOG.error("Failed to persist changes", e);
                        throw Throwables.propagate(e);
                    }
                    processedRecords = 0;
                }
                return result;
            }

            @Override
            public InputSplit getInputSplit() {
                InputSplit inputSplit = super.getInputSplit();
                if (inputSplit instanceof TaggedInputSplit) {
                    // expose the delegate InputSplit to the user
                    inputSplit = ((TaggedInputSplit) inputSplit).getInputSplit();
                }
                return inputSplit;
            }

            @Override
            public Class<? extends InputFormat<?, ?>> getInputFormatClass() throws ClassNotFoundException {
                InputSplit inputSplit = super.getInputSplit();
                if (inputSplit instanceof TaggedInputSplit) {
                    // expose the delegate InputFormat to the user
                    return ((TaggedInputSplit) inputSplit).getInputFormatClass();
                }
                return super.getInputFormatClass();
            }
        };
        return flushingContext;
    }

    private Mapper createMapperInstance(ClassLoader classLoader, String userMapper, Context context) {
        if (context.getInputSplit() instanceof TaggedInputSplit) {
            // Find the delegate Mapper from the TaggedInputSplit.
            userMapper = ((TaggedInputSplit) context.getInputSplit()).getMapperClassName();
        }
        try {
            return (Mapper) classLoader.loadClass(userMapper).newInstance();
        } catch (Exception e) {
            LOG.error("Failed to create instance of the user-defined Mapper class: " + userMapper);
            throw Throwables.propagate(e);
        }
    }
}