org.apache.hadoop.hive.ql.exec.ExecMapper.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hive.ql.exec.ExecMapper.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.exec;

import java.io.IOException;
import java.io.Serializable;
import java.lang.management.ManagementFactory;
import java.lang.management.MemoryMXBean;
import java.net.URLClassLoader;
import java.util.Arrays;
import java.util.List;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.plan.MapredLocalWork;
import org.apache.hadoop.hive.ql.plan.MapredWork;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.StringUtils;

/**
 * ExecMapper.
 *
 */
public class ExecMapper extends MapReduceBase implements Mapper {

    private MapOperator mo;
    private Map<String, FetchOperator> fetchOperators;
    private OutputCollector oc;
    private JobConf jc;
    private boolean abort = false;
    private Reporter rp;
    public static final Log l4j = LogFactory.getLog("ExecMapper");
    private static boolean done;

    // used to log memory usage periodically
    public static MemoryMXBean memoryMXBean;
    private long numRows = 0;
    private long nextCntr = 1;
    private MapredLocalWork localWork = null;
    private boolean isLogInfoEnabled = false;

    private final ExecMapperContext execContext = new ExecMapperContext();

    @Override
    public void configure(JobConf job) {
        // Allocate the bean at the beginning -
        memoryMXBean = ManagementFactory.getMemoryMXBean();
        l4j.info("maximum memory = " + memoryMXBean.getHeapMemoryUsage().getMax());

        isLogInfoEnabled = l4j.isInfoEnabled();

        try {
            l4j.info("conf classpath = " + Arrays.asList(((URLClassLoader) job.getClassLoader()).getURLs()));
            l4j.info("thread classpath = "
                    + Arrays.asList(((URLClassLoader) Thread.currentThread().getContextClassLoader()).getURLs()));
        } catch (Exception e) {
            l4j.info("cannot get classpath: " + e.getMessage());
        }
        try {
            jc = job;
            execContext.setJc(jc);
            // create map and fetch operators
            MapredWork mrwork = Utilities.getMapRedWork(job);
            mo = new MapOperator();
            mo.setConf(mrwork);
            // initialize map operator
            mo.setChildren(job);
            l4j.info(mo.dump(0));
            // initialize map local work
            localWork = mrwork.getMapLocalWork();
            execContext.setLocalWork(localWork);

            mo.setExecContext(execContext);
            mo.initializeLocalWork(jc);
            mo.initialize(jc, null);

            if (localWork == null) {
                return;
            }

            //The following code is for mapjoin
            //initialize all the dummy ops
            l4j.info("Initializing dummy operator");
            List<Operator<? extends Serializable>> dummyOps = localWork.getDummyParentOp();
            for (Operator<? extends Serializable> dummyOp : dummyOps) {
                dummyOp.setExecContext(execContext);
                dummyOp.initialize(jc, null);
            }

        } catch (Throwable e) {
            abort = true;
            if (e instanceof OutOfMemoryError) {
                // will this be true here?
                // Don't create a new object if we are already out of memory
                throw (OutOfMemoryError) e;
            } else {
                throw new RuntimeException("Map operator initialization failed", e);
            }
        }
    }

    public void map(Object key, Object value, OutputCollector output, Reporter reporter) throws IOException {
        if (oc == null) {
            oc = output;
            rp = reporter;
            mo.setOutputCollector(oc);
            mo.setReporter(rp);
        }
        // reset the execContext for each new row
        execContext.resetRow();

        try {
            if (mo.getDone()) {
                done = true;
            } else {
                // Since there is no concept of a group, we don't invoke
                // startGroup/endGroup for a mapper
                mo.process((Writable) value);
                if (isLogInfoEnabled) {
                    numRows++;
                    if (numRows == nextCntr) {
                        long used_memory = memoryMXBean.getHeapMemoryUsage().getUsed();
                        l4j.info("ExecMapper: processing " + numRows + " rows: used memory = " + used_memory);
                        nextCntr = getNextCntr(numRows);
                    }
                }
            }
        } catch (Throwable e) {
            abort = true;
            if (e instanceof OutOfMemoryError) {
                // Don't create a new object if we are already out of memory
                throw (OutOfMemoryError) e;
            } else {
                l4j.fatal(StringUtils.stringifyException(e));
                throw new RuntimeException(e);
            }
        }
    }

    private long getNextCntr(long cntr) {
        // A very simple counter to keep track of number of rows processed by the
        // reducer. It dumps
        // every 1 million times, and quickly before that
        if (cntr >= 1000000) {
            return cntr + 1000000;
        }

        return 10 * cntr;
    }

    @Override
    public void close() {
        // No row was processed
        if (oc == null) {
            l4j.trace("Close called. no row processed by map.");
        }

        // check if there are IOExceptions
        if (!abort) {
            abort = execContext.getIoCxt().getIOExceptions();
        }

        // detecting failed executions by exceptions thrown by the operator tree
        // ideally hadoop should let us know whether map execution failed or not
        try {
            mo.close(abort);

            //for close the local work
            if (localWork != null) {
                List<Operator<? extends Serializable>> dummyOps = localWork.getDummyParentOp();

                for (Operator<? extends Serializable> dummyOp : dummyOps) {
                    dummyOp.close(abort);
                }
            }

            if (fetchOperators != null) {
                MapredLocalWork localWork = mo.getConf().getMapLocalWork();
                for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) {
                    Operator<? extends Serializable> forwardOp = localWork.getAliasToWork().get(entry.getKey());
                    forwardOp.close(abort);
                }
            }

            if (isLogInfoEnabled) {
                long used_memory = memoryMXBean.getHeapMemoryUsage().getUsed();
                l4j.info("ExecMapper: processed " + numRows + " rows: used memory = " + used_memory);
            }

            reportStats rps = new reportStats(rp);
            mo.preorderMap(rps);
            return;
        } catch (Exception e) {
            if (!abort) {
                // signal new failure to map-reduce
                l4j.error("Hit error while closing operators - failing tree");
                throw new RuntimeException("Hive Runtime Error while closing operators", e);
            }
        }
    }

    public static boolean getDone() {
        return done;
    }

    public boolean isAbort() {
        return abort;
    }

    public void setAbort(boolean abort) {
        this.abort = abort;
    }

    public static void setDone(boolean done) {
        ExecMapper.done = done;
    }

    /**
     * reportStats.
     *
     */
    public static class reportStats implements Operator.OperatorFunc {
        Reporter rp;

        public reportStats(Reporter rp) {
            this.rp = rp;
        }

        public void func(Operator op) {
            Map<Enum, Long> opStats = op.getStats();
            for (Map.Entry<Enum, Long> e : opStats.entrySet()) {
                if (rp != null) {
                    rp.incrCounter(e.getKey(), e.getValue());
                }
            }
        }
    }
}