org.apache.pig.backend.hadoop.executionengine.tez.TezLauncher.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.pig.backend.hadoop.executionengine.tez.TezLauncher.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pig.backend.hadoop.executionengine.tez;

import java.io.IOException;
import java.io.PrintStream;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadFactory;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.counters.Limits;
import org.apache.hadoop.util.StringUtils;
import org.apache.pig.PigConfiguration;
import org.apache.pig.PigException;
import org.apache.pig.PigWarning;
import org.apache.pig.backend.BackendException;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil;
import org.apache.pig.backend.hadoop.executionengine.Launcher;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.TezCompiler;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.TezOperPlan;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.TezPOPackageAnnotator;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.TezPlanContainer;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.TezPlanContainerNode;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.TezPlanContainerPrinter;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.operator.NativeTezOper;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.optimizer.AccumulatorOptimizer;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.optimizer.CombinerOptimizer;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.optimizer.LoaderProcessor;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.optimizer.MultiQueryOptimizerTez;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.optimizer.NoopFilterRemover;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.optimizer.ParallelismSetter;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.optimizer.SecondaryKeyOptimizerTez;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.optimizer.UnionOptimizer;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.plan.CompilationMessageCollector;
import org.apache.pig.impl.plan.CompilationMessageCollector.MessageType;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.impl.plan.PlanException;
import org.apache.pig.impl.plan.VisitorException;
import org.apache.pig.impl.util.LogUtils;
import org.apache.pig.impl.util.UDFContext;
import org.apache.pig.tools.pigstats.OutputStats;
import org.apache.pig.tools.pigstats.PigStats;
import org.apache.pig.tools.pigstats.tez.TezPigScriptStats;
import org.apache.pig.tools.pigstats.tez.TezScriptState;
import org.apache.pig.tools.pigstats.tez.TezVertexStats;
import org.apache.tez.dag.api.DAG;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.Vertex;
import org.apache.tez.dag.api.client.DAGStatus;
import org.apache.tez.dag.app.dag.impl.DAGSchedulerNaturalOrderControlled;
import org.apache.tez.runtime.library.api.TezRuntimeConfiguration;

import com.google.common.util.concurrent.ThreadFactoryBuilder;

/**
 * Main class that launches pig for Tez
 */
public class TezLauncher extends Launcher {
    private static final Log log = LogFactory.getLog(TezLauncher.class);
    private static ThreadFactory namedThreadFactory;
    private ExecutorService executor;
    private boolean aggregateWarning = false;
    private TezScriptState tezScriptState;
    private TezPigScriptStats tezStats;
    private TezJob runningJob;

    public TezLauncher() {
        if (namedThreadFactory == null) {
            namedThreadFactory = new ThreadFactoryBuilder().setNameFormat("PigTezLauncher-%d").setDaemon(true)
                    .setUncaughtExceptionHandler(new JobControlThreadExceptionHandler()).build();
        }
    }

    @Override
    public PigStats launchPig(PhysicalPlan php, String grpName, PigContext pc) throws Exception {
        synchronized (this) {
            if (executor == null) {
                executor = Executors.newSingleThreadExecutor(namedThreadFactory);
            }
        }
        if (pc.getExecType().isLocal()) {
            pc.getProperties().setProperty(TezConfiguration.TEZ_LOCAL_MODE, "true");
            pc.getProperties().setProperty(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, "true");
            pc.getProperties().setProperty(TezConfiguration.TEZ_IGNORE_LIB_URIS, "true");
            pc.getProperties().setProperty(TezConfiguration.TEZ_AM_DAG_SCHEDULER_CLASS,
                    DAGSchedulerNaturalOrderControlled.class.getName());
        }
        Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties(), true);
        // Make sure MR counter does not exceed limit
        if (conf.get(TezConfiguration.TEZ_COUNTERS_MAX) != null) {
            conf.setInt(org.apache.hadoop.mapreduce.MRJobConfig.COUNTERS_MAX_KEY,
                    Math.max(conf.getInt(org.apache.hadoop.mapreduce.MRJobConfig.COUNTERS_MAX_KEY, 0),
                            conf.getInt(TezConfiguration.TEZ_COUNTERS_MAX, 0)));
        }
        if (conf.get(TezConfiguration.TEZ_COUNTERS_MAX_GROUPS) != null) {
            conf.setInt(org.apache.hadoop.mapreduce.MRJobConfig.COUNTER_GROUPS_MAX_KEY,
                    Math.max(conf.getInt(org.apache.hadoop.mapreduce.MRJobConfig.COUNTER_GROUPS_MAX_KEY, 0),
                            conf.getInt(TezConfiguration.TEZ_COUNTERS_MAX_GROUPS, 0)));
        }

        // This is hacky, but Limits cannot be initialized twice
        try {
            Field f = Limits.class.getDeclaredField("isInited");
            f.setAccessible(true);
            f.setBoolean(null, false);
            Limits.init(conf);
        } catch (Throwable e) {
            log.warn("Error when setting counter limit: " + e.getMessage());
        }

        if (pc.defaultParallel == -1 && !conf.getBoolean(PigConfiguration.PIG_TEZ_AUTO_PARALLELISM, true)) {
            pc.defaultParallel = 1;
        }
        aggregateWarning = conf.getBoolean("aggregate.warning", false);

        TezResourceManager tezResourceManager = TezResourceManager.getInstance();
        tezResourceManager.init(pc, conf);

        String stagingDir = conf.get(TezConfiguration.TEZ_AM_STAGING_DIR);
        String resourcesDir = tezResourceManager.getResourcesDir().toString();
        if (stagingDir == null) {
            // If not set in tez-site.xml, use Pig's tez resources directory as staging directory
            // instead of TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT
            stagingDir = resourcesDir;
            conf.set(TezConfiguration.TEZ_AM_STAGING_DIR, resourcesDir);
        }
        log.info("Tez staging directory is " + stagingDir + " and resources directory is " + resourcesDir);

        List<TezOperPlan> processedPlans = new ArrayList<TezOperPlan>();

        tezScriptState = TezScriptState.get();
        tezStats = new TezPigScriptStats(pc);
        PigStats.start(tezStats);

        conf.set(TezConfiguration.TEZ_USE_CLUSTER_HADOOP_LIBS, "true");
        TezJobCompiler jc = new TezJobCompiler(pc, conf);
        TezPlanContainer tezPlanContainer = compile(php, pc);

        tezStats.initialize(tezPlanContainer);
        tezScriptState.emitInitialPlanNotification(tezPlanContainer);
        tezScriptState.emitLaunchStartedNotification(tezPlanContainer.size()); //number of DAGs to Launch

        boolean stop_on_failure = Boolean.valueOf(pc.getProperties().getProperty("stop.on.failure", "false"));
        boolean stoppedOnFailure = false;

        TezPlanContainerNode tezPlanContainerNode;
        TezOperPlan tezPlan;
        int processedDAGs = 0;
        while ((tezPlanContainerNode = tezPlanContainer.getNextPlan(processedPlans)) != null) {
            tezPlan = tezPlanContainerNode.getTezOperPlan();
            processLoadAndParallelism(tezPlan, pc);
            processedPlans.add(tezPlan);
            ProgressReporter reporter = new ProgressReporter(tezPlanContainer.size(), processedDAGs);
            if (tezPlan.size() == 1 && tezPlan.getRoots().get(0) instanceof NativeTezOper) {
                // Native Tez Plan
                NativeTezOper nativeOper = (NativeTezOper) tezPlan.getRoots().get(0);
                tezScriptState.emitJobsSubmittedNotification(1);
                nativeOper.runJob(tezPlanContainerNode.getOperatorKey().toString());
            } else {
                TezPOPackageAnnotator pkgAnnotator = new TezPOPackageAnnotator(tezPlan);
                pkgAnnotator.visit();

                runningJob = jc.compile(tezPlanContainerNode, tezPlanContainer);
                //TODO: Exclude vertex groups from numVerticesToLaunch ??
                tezScriptState.dagLaunchNotification(runningJob.getName(), tezPlan, tezPlan.size());
                runningJob.setPigStats(tezStats);

                // Set the thread UDFContext so registered classes are available.
                final UDFContext udfContext = UDFContext.getUDFContext();
                Runnable task = new Runnable() {
                    @Override
                    public void run() {
                        Thread.currentThread().setContextClassLoader(PigContext.getClassLoader());
                        UDFContext.setUdfContext(udfContext.clone());
                        runningJob.run();
                    }
                };

                // Mark the times that the jobs were submitted so it's reflected in job
                // history props. TODO: Fix this. unused now
                long scriptSubmittedTimestamp = System.currentTimeMillis();
                // Job.getConfiguration returns the shared configuration object
                Configuration jobConf = runningJob.getConfiguration();
                jobConf.set("pig.script.submitted.timestamp", Long.toString(scriptSubmittedTimestamp));
                jobConf.set("pig.job.submitted.timestamp", Long.toString(System.currentTimeMillis()));

                Future<?> future = executor.submit(task);
                tezScriptState.emitJobsSubmittedNotification(1);

                boolean jobStarted = false;

                while (!future.isDone()) {
                    if (!jobStarted && runningJob.getApplicationId() != null) {
                        jobStarted = true;
                        String appId = runningJob.getApplicationId().toString();
                        //For Oozie Pig action job id matching compatibility with MR mode
                        log.info("HadoopJobId: " + appId.replace("application", "job"));
                        tezScriptState.emitJobStartedNotification(appId);
                        tezScriptState.dagStartedNotification(runningJob.getName(), appId);
                    }
                    reporter.notifyUpdate();
                    Thread.sleep(1000);
                }
                // For tez_local mode where PigProcessor destroys all UDFContext
                UDFContext.setUdfContext(udfContext);
                try {
                    // In case of FutureTask there is no uncaught exception
                    // Need to do future.get() to get any exception
                    future.get();
                } catch (ExecutionException e) {
                    setJobException(e.getCause());
                }
            }
            processedDAGs++;
            if (tezPlanContainer.size() == processedDAGs) {
                tezScriptState.emitProgressUpdatedNotification(100);
            } else {
                tezScriptState.emitProgressUpdatedNotification(
                        ((tezPlanContainer.size() - processedDAGs) / tezPlanContainer.size()) * 100);
            }
            handleUnCaughtException(pc);
            boolean tezDAGSucceeded = reporter.notifyFinishedOrFailed();
            tezPlanContainer.updatePlan(tezPlan, tezDAGSucceeded);
            // if stop_on_failure is enabled, we need to stop immediately when any job has failed
            if (!tezDAGSucceeded) {
                if (stop_on_failure) {
                    stoppedOnFailure = true;
                    break;
                } else {
                    log.warn("Ooops! Some job has failed! Specify -stop_on_failure if you "
                            + "want Pig to stop immediately on failure.");
                }
            }
        }

        tezStats.finish();
        tezScriptState.emitLaunchCompletedNotification(tezStats.getNumberSuccessfulJobs());

        for (OutputStats output : tezStats.getOutputStats()) {
            POStore store = output.getPOStore();
            try {
                if (!output.isSuccessful()) {
                    store.getStoreFunc().cleanupOnFailure(store.getSFile().getFileName(),
                            Job.getInstance(output.getConf()));
                } else {
                    store.getStoreFunc().cleanupOnSuccess(store.getSFile().getFileName(),
                            Job.getInstance(output.getConf()));
                }
            } catch (IOException e) {
                throw new ExecException(e);
            } catch (AbstractMethodError nsme) {
                // Just swallow it.  This means we're running against an
                // older instance of a StoreFunc that doesn't implement
                // this method.
            }
        }

        if (stoppedOnFailure) {
            throw new ExecException("Stopping execution on job failure with -stop_on_failure option", 6017,
                    PigException.REMOTE_ENVIRONMENT);
        }

        return tezStats;
    }

    private void handleUnCaughtException(PigContext pc) throws Exception {
        //check for the uncaught exceptions from TezJob thread
        //if the job controller fails before launching the jobs then there are
        //no jobs to check for failure
        if (jobControlException != null) {
            if (jobControlException instanceof PigException) {
                if (jobControlExceptionStackTrace != null) {
                    LogUtils.writeLog("Error message from Tez Job", jobControlExceptionStackTrace,
                            pc.getProperties().getProperty("pig.logfile"), log);
                }
                throw jobControlException;
            } else {
                int errCode = 2117;
                String msg = "Unexpected error when launching Tez job.";
                throw new ExecException(msg, errCode, PigException.BUG, jobControlException);
            }
        }
    }

    private void computeWarningAggregate(Map<String, Map<String, Long>> counterGroups, Map<Enum, Long> aggMap) {
        for (Map<String, Long> counters : counterGroups.values()) {
            for (Enum e : PigWarning.values()) {
                if (counters.containsKey(e.toString())) {
                    if (aggMap.containsKey(e.toString())) {
                        Long currentCount = aggMap.get(e.toString());
                        currentCount = (currentCount == null ? 0 : currentCount);
                        if (counters != null) {
                            currentCount += counters.get(e.toString());
                        }
                        aggMap.put(e, currentCount);
                    } else {
                        aggMap.put(e, counters.get(e.toString()));
                    }
                }
            }
        }
    }

    private class ProgressReporter {
        private int totalDAGs;
        private int processedDAGS;
        private int count = 0;
        private int prevProgress = 0;

        public ProgressReporter(int totalDAGs, int processedDAGs) {
            this.totalDAGs = totalDAGs;
            this.processedDAGS = processedDAGs;
        }

        public void notifyUpdate() {
            DAGStatus dagStatus = runningJob.getDAGStatus();
            if (dagStatus != null && dagStatus.getState() == DAGStatus.State.RUNNING) {
                // Emit notification when the job has progressed more than 1%,
                // or every 20 seconds
                int currProgress = Math.round(runningJob.getDAGProgress() * 100f);
                if (currProgress - prevProgress >= 1 || count % 100 == 0) {
                    tezScriptState.dagProgressNotification(runningJob.getName(), -1, currProgress);
                    tezScriptState
                            .emitProgressUpdatedNotification((currProgress + (100 * processedDAGS)) / totalDAGs);
                    prevProgress = currProgress;
                }
                count++;
            }
            // TODO: Add new vertex tracking methods to PigTezProgressNotificationListener
            // and emit notifications for individual vertex start, progress and completion
        }

        public boolean notifyFinishedOrFailed() {
            DAGStatus dagStatus = runningJob.getDAGStatus();
            if (dagStatus == null) {
                return false;
            }
            if (dagStatus.getState() == DAGStatus.State.SUCCEEDED) {
                Map<Enum, Long> warningAggMap = new HashMap<Enum, Long>();
                DAG dag = runningJob.getDAG();
                for (Vertex v : dag.getVertices()) {
                    TezVertexStats tts = tezStats.getVertexStats(dag.getName(), v.getName());
                    if (tts == null) {
                        continue; //vertex groups
                    }
                    Map<String, Map<String, Long>> counterGroups = tts.getCounters();
                    if (counterGroups == null) {
                        log.warn("Counters are not available for vertex " + v.getName()
                                + ". Not computing warning aggregates.");
                    } else {
                        computeWarningAggregate(counterGroups, warningAggMap);
                    }
                }
                if (aggregateWarning) {
                    CompilationMessageCollector.logAggregate(warningAggMap, MessageType.Warning, log);
                }
                return true;
            }
            return false;
        }
    }

    @Override
    public void explain(PhysicalPlan php, PigContext pc, PrintStream ps, String format, boolean verbose)
            throws PlanException, VisitorException, IOException {
        log.debug("Entering TezLauncher.explain");
        TezPlanContainer tezPlanContainer = compile(php, pc);

        if (format.equals("text")) {
            TezPlanContainerPrinter printer = new TezPlanContainerPrinter(ps, tezPlanContainer);
            printer.setVerbose(verbose);
            printer.visit();
        } else {
            // TODO: add support for other file format
            throw new IOException("Non-text output of explain is not supported.");
        }
    }

    public TezPlanContainer compile(PhysicalPlan php, PigContext pc)
            throws PlanException, IOException, VisitorException {
        TezCompiler comp = new TezCompiler(php, pc);
        comp.compile();
        TezPlanContainer planContainer = comp.getPlanContainer();
        for (Map.Entry<OperatorKey, TezPlanContainerNode> entry : planContainer.getKeys().entrySet()) {
            TezOperPlan tezPlan = entry.getValue().getTezOperPlan();
            optimize(tezPlan, pc);
        }
        return planContainer;
    }

    private void optimize(TezOperPlan tezPlan, PigContext pc) throws VisitorException {
        Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
        boolean aggregateWarning = conf.getBoolean("aggregate.warning", false);

        NoopFilterRemover filter = new NoopFilterRemover(tezPlan);
        filter.visit();

        // Run CombinerOptimizer on Tez plan
        boolean nocombiner = conf.getBoolean(PigConfiguration.PIG_EXEC_NO_COMBINER, false);
        if (!pc.inIllustrator && !nocombiner) {
            boolean doMapAgg = Boolean
                    .parseBoolean(pc.getProperties().getProperty(PigConfiguration.PIG_EXEC_MAP_PARTAGG, "false"));
            CombinerOptimizer co = new CombinerOptimizer(tezPlan, doMapAgg);
            co.visit();
            co.getMessageCollector().logMessages(MessageType.Warning, aggregateWarning, log);
        }

        // Run optimizer to make use of secondary sort key when possible for nested foreach
        // order by and distinct. Should be done before AccumulatorOptimizer
        boolean noSecKeySort = conf.getBoolean(PigConfiguration.PIG_EXEC_NO_SECONDARY_KEY, false);
        if (!pc.inIllustrator && !noSecKeySort) {
            SecondaryKeyOptimizerTez skOptimizer = new SecondaryKeyOptimizerTez(tezPlan);
            skOptimizer.visit();
        }

        boolean isUnionOpt = conf.getBoolean(PigConfiguration.PIG_TEZ_OPT_UNION, true);
        List<String> supportedStoreFuncs = null;
        String unionSupported = conf.get(PigConfiguration.PIG_TEZ_OPT_UNION_SUPPORTED_STOREFUNCS);
        if (unionSupported != null && unionSupported.trim().length() > 0) {
            supportedStoreFuncs = Arrays.asList(StringUtils.split(unionSupported.trim()));
        }
        List<String> unionUnsupportedStoreFuncs = null;
        String unionUnSupported = conf.get(PigConfiguration.PIG_TEZ_OPT_UNION_UNSUPPORTED_STOREFUNCS);
        if (unionUnSupported != null && unionUnSupported.trim().length() > 0) {
            unionUnsupportedStoreFuncs = Arrays.asList(StringUtils.split(unionUnSupported.trim()));
        }

        boolean isMultiQuery = conf.getBoolean(PigConfiguration.PIG_OPT_MULTIQUERY, true);
        if (isMultiQuery) {
            // reduces the number of TezOpers in the Tez plan generated
            // by multi-query (multi-store) script.
            MultiQueryOptimizerTez mqOptimizer = new MultiQueryOptimizerTez(tezPlan, isUnionOpt,
                    supportedStoreFuncs, unionUnsupportedStoreFuncs);
            mqOptimizer.visit();
        }

        // Run AccumulatorOptimizer on Tez plan
        boolean isAccum = conf.getBoolean(PigConfiguration.PIG_OPT_ACCUMULATOR, true);
        if (isAccum) {
            AccumulatorOptimizer accum = new AccumulatorOptimizer(tezPlan);
            accum.visit();
        }

        // Use VertexGroup in Tez
        if (isUnionOpt) {
            UnionOptimizer uo = new UnionOptimizer(tezPlan, supportedStoreFuncs, unionUnsupportedStoreFuncs);
            uo.visit();
        }

    }

    public static void processLoadAndParallelism(TezOperPlan tezPlan, PigContext pc) throws VisitorException {
        if (!pc.inExplain && !pc.inDumpSchema) {
            LoaderProcessor loaderStorer = new LoaderProcessor(tezPlan, pc);
            loaderStorer.visit();

            ParallelismSetter parallelismSetter = new ParallelismSetter(tezPlan, pc);
            parallelismSetter.visit();
            tezPlan.setEstimatedParallelism(parallelismSetter.getEstimatedTotalParallelism());
        }
    }

    @Override
    public void kill() throws BackendException {
        if (runningJob != null) {
            try {
                runningJob.killJob();
            } catch (Exception e) {
                throw new BackendException(e);
            }
        }
        destroy();
    }

    @Override
    public void killJob(String jobID, Configuration conf) throws BackendException {
        if (runningJob != null && runningJob.getApplicationId().toString() == jobID) {
            try {
                runningJob.killJob();
            } catch (Exception e) {
                throw new BackendException(e);
            }
        } else {
            log.info("Cannot find job: " + jobID);
        }
    }

    @Override
    public void destroy() {
        try {
            if (executor != null && !executor.isShutdown()) {
                log.info("Shutting down thread pool");
                executor.shutdownNow();
            }
        } catch (Exception e) {
            log.warn("Error shutting down threadpool");
        }
    }

}