org.apache.sysml.api.DMLScript.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.sysml.api.DMLScript.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.sysml.api;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URI;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Scanner;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.sysml.api.mlcontext.ScriptType;
import org.apache.sysml.conf.CompilerConfig;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.conf.DMLConfig;
import org.apache.sysml.debug.DMLDebugger;
import org.apache.sysml.debug.DMLDebuggerException;
import org.apache.sysml.debug.DMLDebuggerProgramInfo;
import org.apache.sysml.hops.HopsException;
import org.apache.sysml.hops.OptimizerUtils;
import org.apache.sysml.hops.OptimizerUtils.OptimizationLevel;
import org.apache.sysml.hops.globalopt.GlobalOptimizerWrapper;
import org.apache.sysml.lops.Lop;
import org.apache.sysml.lops.LopsException;
import org.apache.sysml.parser.AParserWrapper;
import org.apache.sysml.parser.DMLProgram;
import org.apache.sysml.parser.DMLTranslator;
import org.apache.sysml.parser.LanguageException;
import org.apache.sysml.parser.ParseException;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.DMLScriptException;
import org.apache.sysml.runtime.controlprogram.Program;
import org.apache.sysml.runtime.controlprogram.caching.CacheStatistics;
import org.apache.sysml.runtime.controlprogram.caching.CacheableData;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContextFactory;
import org.apache.sysml.runtime.instructions.gpu.context.GPUContext;
import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
import org.apache.sysml.runtime.controlprogram.parfor.ProgramConverter;
import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
import org.apache.sysml.runtime.controlprogram.parfor.util.IDHandler;
import org.apache.sysml.runtime.matrix.CleanupMR;
import org.apache.sysml.runtime.matrix.mapred.MRConfigurationNames;
import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
import org.apache.sysml.runtime.util.LocalFileUtils;
import org.apache.sysml.runtime.util.MapReduceTool;
import org.apache.sysml.utils.Explain;
import org.apache.sysml.utils.Explain.ExplainCounts;
import org.apache.sysml.utils.Explain.ExplainType;
import org.apache.sysml.utils.Statistics;
import org.apache.sysml.yarn.DMLAppMasterUtils;
import org.apache.sysml.yarn.DMLYarnClientProxy;

public class DMLScript {
    public enum RUNTIME_PLATFORM {
        HADOOP, // execute all matrix operations in MR
        SINGLE_NODE, // execute all matrix operations in CP
        HYBRID, // execute matrix operations in CP or MR
        HYBRID_SPARK, // execute matrix operations in CP or Spark   
        SPARK // execute matrix operations in Spark
    }

    public static RUNTIME_PLATFORM rtplatform = OptimizerUtils.getDefaultExecutionMode();
    public static boolean STATISTICS = false; //default statistics
    public static int STATISTICS_COUNT = 10; //default statistics maximum heavy hitter count
    public static boolean ENABLE_DEBUG_MODE = false; //default debug mode
    public static boolean USE_LOCAL_SPARK_CONFIG = false; //set default local spark configuration - used for local testing
    public static String DML_FILE_PATH_ANTLR_PARSER = null;
    public static ExplainType EXPLAIN = ExplainType.NONE; //default explain
    /**
     * Global variable indicating the script type (DML or PYDML). Can be used
     * for DML/PYDML-specific tasks, such as outputting booleans in the correct
     * case (TRUE/FALSE for DML and True/False for PYDML).
     */
    public static ScriptType SCRIPT_TYPE = ScriptType.DML;

    public static boolean USE_ACCELERATOR = false;
    public static boolean FORCE_ACCELERATOR = false;

    // ------------------------------------------------------------------------
    // We have identified two performance bugs that frequently occurs in deep learning script.
    //
    // First, we repeatedly perform unnecessary conversion to sparse format.
    // Also, the operations such as matrix multiplication (including BLAS and CuBLAS) are 
    // optimized for dense.
    //
    // Second, even with large memory budget, we sometimes spend almost 20-30% time in caching.
    // These two bugs are tracked in SYSTEMML-1140 and should be addressed before SystemML 1.0 release.
    // We are keeping this flag for performance debugging until SYSTEMML-1140is resolved.
    public static boolean DISABLE_SPARSE = false;
    public static boolean DISABLE_CACHING = false;
    // ------------------------------------------------------------------------

    // flag that indicates whether or not to suppress any prints to stdout
    public static boolean _suppressPrint2Stdout = false;

    public static String _uuid = IDHandler.createDistributedUniqueID();
    public static boolean _activeAM = false;

    private static final Log LOG = LogFactory.getLog(DMLScript.class.getName());

    public static String USAGE = "Usage is " + DMLScript.class.getCanonicalName() + " -f <filename>"
    //+ " (-exec <mode>)?" + " (-explain <type>)?" + " (-stats)?" + " (-clean)?" + " (-config=<config_filename>)? 
            + " [-options] ([-args | -nvargs] <args-list>)? \n"
            + "   -f: <filename> will be interpreted as a filename path (if <filename> is prefixed\n"
            + "         with hdfs or gpfs it is read from DFS, otherwise from local file system)\n"
            //undocumented feature in beta 08/2014 release
            //+ "   -s: <filename> will be interpreted as a DML script string \n"
            + "   -python: (optional) parses Python-like DML\n" + "   -debug: (optional) run in debug mode\n"
            + "   -gpu: <flags> (optional) use acceleration whenever possible. Current version only supports CUDA.\n"
            + "         Supported <flags> for this mode is force=(true|false)\n"
            // Later add optional flags to indicate optimizations turned on or off. Currently they are turned off.
            //+ "   -debug: <flags> (optional) run in debug mode\n"
            //+ "         Optional <flags> that is supported for this mode is optimize=(on|off)\n"
            + "   -exec: <mode> (optional) execution mode (hadoop, singlenode, [hybrid], hybrid_spark)\n"
            + "   -explain: <type> (optional) explain plan (hops, [runtime], recompile_hops, recompile_runtime)\n"
            + "   -stats: <count> (optional) monitor and report caching/recompilation statistics, default heavy hitter count is 10\n"
            + "   -clean: (optional) cleanup all SystemML working directories (FS, DFS).\n"
            + "         All other flags are ignored in this mode. \n"
            + "   -config: (optional) use config file <config_filename> (default: use parameter\n"
            + "         values in default SystemML-config.xml config file; if <config_filename> is\n"
            + "         prefixed with hdfs or gpfs it is read from DFS, otherwise from local file system)\n"
            + "   -args: (optional) parameterize DML script with contents of [args list], ALL args\n"
            + "         after -args flag, each argument must be an unnamed-argument, where 1st value\n"
            + "         after -args will replace $1 in DML script, 2nd value will replace $2, etc.\n"
            + "   -nvargs: (optional) parameterize DML script with contents of [args list], ALL args\n"
            + "         after -nvargs flag, each argument must be be named-argument of form argName=argValue,\n"
            + "         where value will replace $argName in DML script, argName must be a valid DML variable\n"
            + "         name (start with letter, contain only letters, numbers, or underscores).\n"
            + "   <args-list>: (optional) args to DML script \n"
            + "   -? | -help: (optional) show this help message \n";

    ///////////////////////////////
    // public external interface
    ////////

    public static String getUUID() {
        return _uuid;
    }

    /**
     * Used to set master UUID on all nodes (in parfor remote_mr, where DMLScript passed) 
     * in order to simplify cleanup of scratch_space and local working dirs.
     * 
     * @param uuid master UUID to set on all nodes
     */
    public static void setUUID(String uuid) {
        _uuid = uuid;
    }

    public static boolean suppressPrint2Stdout() {
        return _suppressPrint2Stdout;
    }

    public static void setActiveAM() {
        _activeAM = true;
    }

    public static boolean isActiveAM() {
        return _activeAM;
    }

    /**
     * Default DML script invocation (e.g., via 'hadoop jar SystemML.jar -f Test.dml')
     * 
     * @param args command-line arguments
     * @throws IOException if an IOException occurs
     * @throws DMLException if a DMLException occurs
     */
    public static void main(String[] args) throws IOException, DMLException {
        Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

        try {
            DMLScript.executeScript(conf, otherArgs);
        } catch (ParseException pe) {
            System.err.println(pe.getMessage());
        } catch (DMLScriptException e) {
            // In case of DMLScriptException, simply print the error message.
            System.err.println(e.getMessage());
        }
    }

    /**
     * Single entry point for all public invocation alternatives (e.g.,
     * main, executeScript, JaqlUdf etc)
     * 
     * @param conf Hadoop configuration
     * @param args arguments
     * @return true if success, false otherwise
     * @throws DMLException if DMLException occurs
     * @throws ParseException if ParseException occurs
     */
    public static boolean executeScript(Configuration conf, String[] args) throws DMLException {
        //Step 1: parse arguments 
        //check for help 
        if (args.length == 0
                || (args.length == 1 && (args[0].equalsIgnoreCase("-help") || args[0].equalsIgnoreCase("-?")))) {
            System.err.println(USAGE);
            return true;
        }

        //check for clean
        else if (args.length == 1 && args[0].equalsIgnoreCase("-clean")) {
            cleanSystemMLWorkspace();
            return true;
        }

        //check number of args - print usage if incorrect
        if (args.length < 2) {
            System.err.println("ERROR: Unrecognized invocation arguments.");
            System.err.println(USAGE);
            return false;
        }

        //check script arg - print usage if incorrect
        if (!(args[0].equals("-f") || args[0].equals("-s"))) {
            System.err.println("ERROR: First argument must be either -f or -s");
            System.err.println(USAGE);
            return false;
        }

        //parse arguments and set execution properties
        RUNTIME_PLATFORM oldrtplatform = rtplatform; //keep old rtplatform
        ExplainType oldexplain = EXPLAIN; //keep old explain

        // Reset global flags to avoid errors in test suite
        ENABLE_DEBUG_MODE = false;

        boolean parsePyDML = false;
        try {
            String fnameOptConfig = null; //optional config filename
            String[] scriptArgs = null; //optional script arguments
            boolean namedScriptArgs = false;

            for (int i = 2; i < args.length; i++) {
                if (args[i].equalsIgnoreCase("-explain")) {
                    EXPLAIN = ExplainType.RUNTIME;
                    if (args.length > (i + 1) && !args[i + 1].startsWith("-"))
                        EXPLAIN = Explain.parseExplainType(args[++i]);
                } else if (args[i].equalsIgnoreCase("-stats")) {
                    STATISTICS = true;
                    if (args.length > (i + 1) && !args[i + 1].startsWith("-"))
                        STATISTICS_COUNT = Integer.parseInt(args[++i]);
                } else if (args[i].equalsIgnoreCase("-exec")) {
                    rtplatform = parseRuntimePlatform(args[++i]);
                    if (rtplatform == null)
                        return false;
                } else if (args[i].startsWith("-config=")) // legacy
                    fnameOptConfig = args[i].substring(8).replaceAll("\"", "");
                else if (args[i].equalsIgnoreCase("-config"))
                    fnameOptConfig = args[++i];
                else if (args[i].equalsIgnoreCase("-debug")) {
                    ENABLE_DEBUG_MODE = true;
                } else if (args[i].equalsIgnoreCase("-gpu")) {
                    USE_ACCELERATOR = true;
                    if (args.length > (i + 1) && !args[i + 1].startsWith("-")) {
                        String flag = args[++i];
                        if (flag.startsWith("force=")) {
                            String[] flagOptions = flag.split("=");
                            if (flagOptions.length == 2)
                                FORCE_ACCELERATOR = Boolean.parseBoolean(flagOptions[1]);
                            else
                                throw new DMLRuntimeException("Unsupported \"force\" option for -gpu:" + flag);
                        } else {
                            throw new DMLRuntimeException("Unsupported flag for -gpu:" + flag);
                        }
                    }
                    GPUContext.createGPUContext(); // Set GPU memory budget
                } else if (args[i].equalsIgnoreCase("-python")) {
                    parsePyDML = true;
                } else if (args[i].startsWith("-args") || args[i].startsWith("-nvargs")) {
                    namedScriptArgs = args[i].startsWith("-nvargs");
                    i++;
                    scriptArgs = new String[args.length - i];
                    System.arraycopy(args, i, scriptArgs, 0, scriptArgs.length);
                    break;
                } else {
                    System.err.println("ERROR: Unknown argument: " + args[i]);
                    return false;
                }
            }

            //set log level
            if (!ENABLE_DEBUG_MODE)
                setLoggingProperties(conf);

            //Step 2: prepare script invocation
            if (StringUtils.endsWithIgnoreCase(args[1], ".pydml")) {
                parsePyDML = true;
            }
            String dmlScriptStr = readDMLScript(args[0], args[1]);
            Map<String, String> argVals = createArgumentsMap(namedScriptArgs, scriptArgs);

            DML_FILE_PATH_ANTLR_PARSER = args[1];

            //Step 3: invoke dml script
            printInvocationInfo(args[1], fnameOptConfig, argVals);
            if (ENABLE_DEBUG_MODE) {
                // inner try loop is just to isolate the debug exception, which will allow to manage the bugs from debugger v/s runtime
                launchDebugger(dmlScriptStr, fnameOptConfig, argVals, parsePyDML);
            } else {
                execute(dmlScriptStr, fnameOptConfig, argVals, args, parsePyDML);
            }

        } catch (ParseException pe) {
            throw pe;
        } catch (DMLScriptException e) {
            //rethrow DMLScriptException to propagate stop call
            throw e;
        } catch (Exception ex) {
            LOG.error("Failed to execute DML script.", ex);
            throw new DMLException(ex);
        } finally {
            //reset runtime platform and visualize flag
            rtplatform = oldrtplatform;
            EXPLAIN = oldexplain;
        }

        return true;
    }

    ///////////////////////////////
    // private internal utils (argument parsing)
    ////////

    protected static Map<String, String> createArgumentsMap(boolean hasNamedArgs, String[] args)
            throws LanguageException {
        Map<String, String> argMap = new HashMap<String, String>();

        if (args == null)
            return argMap;

        for (int i = 1; i <= args.length; i++) {
            String arg = args[i - 1];

            if (arg.equalsIgnoreCase("-l") || arg.equalsIgnoreCase("-log") || arg.equalsIgnoreCase("-v")
                    || arg.equalsIgnoreCase("-visualize") || arg.equalsIgnoreCase("-explain")
                    || arg.equalsIgnoreCase("-debug") || arg.equalsIgnoreCase("-stats")
                    || arg.equalsIgnoreCase("-exec") || arg.equalsIgnoreCase("-debug")
                    || arg.startsWith("-config=")) {
                throw new LanguageException("-args or -nvargs must be the final argument for DMLScript!");
            }

            //parse arguments (named args / args by position)
            if (hasNamedArgs) {
                // CASE: named argument argName=argValue -- must add <argName, argValue> pair to _argVals
                String[] argPieces = arg.split("=");
                if (argPieces.length < 2)
                    throw new LanguageException(
                            "for -nvargs option, elements in arg list must be named and have form argName=argValue");
                String argName = argPieces[0];
                StringBuilder sb = new StringBuilder();
                for (int jj = 1; jj < argPieces.length; jj++) {
                    sb.append(argPieces[jj]);
                }

                String varNameRegex = "^[a-zA-Z]([a-zA-Z0-9_])*$";
                if (!argName.matches(varNameRegex))
                    throw new LanguageException("argName " + argName
                            + " must be a valid variable name in DML. Valid variable names in DML start with upper-case or lower-case letter, and contain only letters, digits, or underscores");

                argMap.put("$" + argName, sb.toString());
            } else {
                // CASE: unnamed argument -- use position in arg list for name
                argMap.put("$" + i, arg);
            }
        }

        return argMap;
    }

    protected static String readDMLScript(String argname, String script) throws IOException, LanguageException {
        boolean fromFile = argname.equals("-f");
        String dmlScriptStr;

        if (fromFile) {
            //read DML script from file
            if (script == null)
                throw new LanguageException("DML script path was not specified!");

            StringBuilder sb = new StringBuilder();
            BufferedReader in = null;
            try {
                //read from hdfs or gpfs file system
                if (script.startsWith("hdfs:") || script.startsWith("gpfs:")) {
                    if (!LocalFileUtils.validateExternalFilename(script, true))
                        throw new LanguageException("Invalid (non-trustworthy) hdfs filename.");
                    FileSystem fs = FileSystem.get(ConfigurationManager.getCachedJobConf());
                    Path scriptPath = new Path(script);
                    in = new BufferedReader(new InputStreamReader(fs.open(scriptPath)));
                }
                // from local file system
                else {
                    if (!LocalFileUtils.validateExternalFilename(script, false))
                        throw new LanguageException("Invalid (non-trustworthy) local filename.");
                    in = new BufferedReader(new FileReader(script));
                }

                //core script reading
                String tmp = null;
                while ((tmp = in.readLine()) != null) {
                    sb.append(tmp);
                    sb.append("\n");
                }
            } catch (IOException ex) {
                LOG.error("Failed to read the script from the file system", ex);
                throw ex;
            } finally {
                if (in != null)
                    in.close();
            }

            dmlScriptStr = sb.toString();
        } else {
            //parse given script string 
            if (script == null)
                throw new LanguageException("DML script was not specified!");

            InputStream is = new ByteArrayInputStream(script.getBytes());
            Scanner scan = new Scanner(is);
            dmlScriptStr = scan.useDelimiter("\\A").next();
            scan.close();
        }

        return dmlScriptStr;
    }

    private static RUNTIME_PLATFORM parseRuntimePlatform(String platform) {
        RUNTIME_PLATFORM lrtplatform = null;

        if (platform.equalsIgnoreCase("hadoop"))
            lrtplatform = RUNTIME_PLATFORM.HADOOP;
        else if (platform.equalsIgnoreCase("singlenode"))
            lrtplatform = RUNTIME_PLATFORM.SINGLE_NODE;
        else if (platform.equalsIgnoreCase("hybrid"))
            lrtplatform = RUNTIME_PLATFORM.HYBRID;
        else if (platform.equalsIgnoreCase("spark"))
            lrtplatform = RUNTIME_PLATFORM.SPARK;
        else if (platform.equalsIgnoreCase("hybrid_spark"))
            lrtplatform = RUNTIME_PLATFORM.HYBRID_SPARK;
        else
            System.err.println("ERROR: Unknown runtime platform: " + platform);

        return lrtplatform;
    }

    private static void setLoggingProperties(Configuration conf) {
        String debug = conf.get("systemml.logging");

        if (debug == null)
            debug = System.getProperty("systemml.logging");

        if (debug != null) {
            if (debug.equalsIgnoreCase("debug")) {
                Logger.getLogger("org.apache.sysml").setLevel((Level) Level.DEBUG);
            } else if (debug.equalsIgnoreCase("trace")) {
                Logger.getLogger("org.apache.sysml").setLevel((Level) Level.TRACE);
            }
        }
    }

    ///////////////////////////////
    // private internal interface 
    // (core compilation and execute)
    ////////

    /**
     * The running body of DMLScript execution. This method should be called after execution properties have been correctly set,
     * and customized parameters have been put into _argVals
     * 
     * @param dmlScriptStr DML script string
     * @param fnameOptConfig configuration file
     * @param argVals map of argument values
     * @param allArgs arguments
     * @param parsePyDML true if PYDML, false if DML
     * @throws ParseException if ParseException occurs
     * @throws IOException if IOException occurs
     * @throws DMLRuntimeException if DMLRuntimeException occurs
     * @throws LanguageException if LanguageException occurs
     * @throws HopsException if HopsException occurs
     * @throws LopsException if LopsException occurs
     */
    private static void execute(String dmlScriptStr, String fnameOptConfig, Map<String, String> argVals,
            String[] allArgs, boolean parsePyDML) throws ParseException, IOException, DMLRuntimeException,
            LanguageException, HopsException, LopsException {
        SCRIPT_TYPE = parsePyDML ? ScriptType.PYDML : ScriptType.DML;

        //print basic time and environment info
        printStartExecInfo(dmlScriptStr);

        //Step 1: parse configuration files
        DMLConfig dmlconf = DMLConfig.readConfigurationFile(fnameOptConfig);
        ConfigurationManager.setGlobalConfig(dmlconf);
        CompilerConfig cconf = OptimizerUtils.constructCompilerConfig(dmlconf);
        ConfigurationManager.setGlobalConfig(cconf);
        LOG.debug("\nDML config: \n" + dmlconf.getConfigInfo());

        //Step 2: set local/remote memory if requested (for compile in AM context) 
        if (dmlconf.getBooleanValue(DMLConfig.YARN_APPMASTER)) {
            DMLAppMasterUtils.setupConfigRemoteMaxMemory(dmlconf);
        }

        //Step 3: parse dml script
        Statistics.startCompileTimer();
        AParserWrapper parser = AParserWrapper.createParser(parsePyDML);
        DMLProgram prog = parser.parse(DML_FILE_PATH_ANTLR_PARSER, dmlScriptStr, argVals);

        //Step 4: construct HOP DAGs (incl LVA and validate)
        DMLTranslator dmlt = new DMLTranslator(prog);
        dmlt.liveVariableAnalysis(prog);
        dmlt.validateParseTree(prog);
        dmlt.constructHops(prog);

        if (LOG.isDebugEnabled()) {
            LOG.debug("\n********************** HOPS DAG (Before Rewrite) *******************");
            dmlt.printHops(prog);
            DMLTranslator.resetHopsDAGVisitStatus(prog);
        }

        //Step 5: rewrite HOP DAGs (incl IPA and memory estimates)
        dmlt.rewriteHopsDAG(prog);

        if (LOG.isDebugEnabled()) {
            LOG.debug("\n********************** HOPS DAG (After Rewrite) *******************");
            dmlt.printHops(prog);
            DMLTranslator.resetHopsDAGVisitStatus(prog);

            LOG.debug("\n********************** OPTIMIZER *******************\n" + "Level = "
                    + OptimizerUtils.getOptLevel() + "\n" + "Available Memory = "
                    + ((double) InfrastructureAnalyzer.getLocalMaxMemory() / 1024 / 1024) + " MB" + "\n"
                    + "Memory Budget = " + ((double) OptimizerUtils.getLocalMemBudget() / 1024 / 1024) + " MB"
                    + "\n");
        }

        //Step 6: construct lops (incl exec type and op selection)
        dmlt.constructLops(prog);

        if (LOG.isDebugEnabled()) {
            LOG.debug("\n********************** LOPS DAG *******************");
            dmlt.printLops(prog);
            dmlt.resetLopsDAGVisitStatus(prog);
        }

        //Step 7: generate runtime program
        Program rtprog = prog.getRuntimeProgram(dmlconf);

        //Step 8: [optional global data flow optimization]
        if (OptimizerUtils.isOptLevel(OptimizationLevel.O4_GLOBAL_TIME_MEMORY)) {
            LOG.warn("Optimization level '" + OptimizationLevel.O4_GLOBAL_TIME_MEMORY + "' "
                    + "is still in experimental state and not intended for production use.");
            rtprog = GlobalOptimizerWrapper.optimizeProgram(prog, rtprog);
        }

        //launch SystemML appmaster (if requested and not already in launched AM)
        if (dmlconf.getBooleanValue(DMLConfig.YARN_APPMASTER)) {
            if (!isActiveAM() && DMLYarnClientProxy.launchDMLYarnAppmaster(dmlScriptStr, dmlconf, allArgs, rtprog))
                return; //if AM launch unsuccessful, fall back to normal execute
            if (isActiveAM()) //in AM context (not failed AM launch)
                DMLAppMasterUtils.setupProgramMappingRemoteMaxMemory(rtprog);
        }

        //Step 9: prepare statistics [and optional explain output]
        //count number compiled MR jobs / SP instructions   
        ExplainCounts counts = Explain.countDistributedOperations(rtprog);
        Statistics.resetNoOfCompiledJobs(counts.numJobs);

        //explain plan of program (hops or runtime)
        if (EXPLAIN != ExplainType.NONE) {
            LOG.info("EXPLAIN (" + EXPLAIN.toString() + "):\n" + Explain.explainMemoryBudget(counts) + "\n"
                    + Explain.explainDegreeOfParallelism(counts) + Explain.explain(prog, rtprog, EXPLAIN));
        }

        Statistics.stopCompileTimer();

        //double costs = CostEstimationWrapper.getTimeEstimate(rtprog, ExecutionContextFactory.createContext());
        //System.out.println("Estimated costs: "+costs);

        //Step 10: execute runtime program
        Statistics.startRunTimer();
        ExecutionContext ec = null;
        try {
            initHadoopExecution(dmlconf);

            if (DISABLE_CACHING) {
                //disable caching globally 
                CacheableData.disableCaching();
            }

            //run execute (w/ exception handling to ensure proper shutdown)
            ec = ExecutionContextFactory.createContext(rtprog);
            rtprog.execute(ec);

        } finally //ensure cleanup/shutdown
        {
            if (DMLScript.USE_ACCELERATOR && ec != null) {
                ec.destroyGPUContext();
            }
            if (ec != null && ec instanceof SparkExecutionContext) {
                ((SparkExecutionContext) ec).close();
            }

            //display statistics (incl caching stats if enabled)
            Statistics.stopRunTimer();
            LOG.info(Statistics.display());
            LOG.info("END DML run " + getDateTime());

            //cleanup scratch_space and all working dirs
            cleanupHadoopExecution(dmlconf);
        }
    }

    /**
     * Launcher for DML debugger. This method should be called after 
     * execution and debug properties have been correctly set, and customized parameters
     * 
     * @param dmlScriptStr DML script contents (including new lines)
     * @param fnameOptConfig Full path of configuration file for SystemML
     * @param argVals Key-value pairs defining arguments of DML script
     * @param parsePyDML true if PYDML, false if DML
     * @throws ParseException if ParseException occurs
     * @throws IOException if IOException occurs
     * @throws DMLRuntimeException if DMLRuntimeException occurs
     * @throws DMLDebuggerException if DMLDebuggerException occurs
     * @throws LanguageException if LanguageException occurs
     * @throws HopsException if HopsException occurs
     * @throws LopsException if LopsException occurs
     */
    private static void launchDebugger(String dmlScriptStr, String fnameOptConfig, Map<String, String> argVals,
            boolean parsePyDML) throws ParseException, IOException, DMLRuntimeException, DMLDebuggerException,
            LanguageException, HopsException, LopsException {
        DMLDebuggerProgramInfo dbprog = new DMLDebuggerProgramInfo();

        //Step 1: parse configuration files
        DMLConfig conf = DMLConfig.readConfigurationFile(fnameOptConfig);
        ConfigurationManager.setGlobalConfig(conf);

        //Step 2: parse dml script
        AParserWrapper parser = AParserWrapper.createParser(parsePyDML);
        DMLProgram prog = parser.parse(DML_FILE_PATH_ANTLR_PARSER, dmlScriptStr, argVals);

        //Step 3: construct HOP DAGs (incl LVA and validate)
        DMLTranslator dmlt = new DMLTranslator(prog);
        dmlt.liveVariableAnalysis(prog);
        dmlt.validateParseTree(prog);
        dmlt.constructHops(prog);

        //Step 4: rewrite HOP DAGs (incl IPA and memory estimates)
        dmlt.rewriteHopsDAG(prog);

        //Step 5: construct LOP DAGs
        dmlt.constructLops(prog);

        //Step 6: generate runtime program
        dbprog.rtprog = prog.getRuntimeProgram(conf);

        try {
            //set execution environment
            initHadoopExecution(conf);

            //initialize an instance of SystemML debugger
            DMLDebugger SystemMLdb = new DMLDebugger(dbprog, dmlScriptStr);
            //run SystemML debugger
            SystemMLdb.runSystemMLDebugger();
        } finally {
            //cleanup scratch_space and all working dirs
            cleanupHadoopExecution(conf);
        }
    }

    public static void initHadoopExecution(DMLConfig config)
            throws IOException, ParseException, DMLRuntimeException {
        //check security aspects
        checkSecuritySetup(config);

        //create scratch space with appropriate permissions
        String scratch = config.getTextValue(DMLConfig.SCRATCH_SPACE);
        MapReduceTool.createDirIfNotExistOnHDFS(scratch, DMLConfig.DEFAULT_SHARED_DIR_PERMISSION);

        //cleanup working dirs from previous aborted runs with same pid in order to prevent conflicts
        cleanupHadoopExecution(config);

        //init caching (incl set active)
        LocalFileUtils.createWorkingDirectory();
        CacheableData.initCaching();

        //reset statistics (required if multiple scripts executed in one JVM)
        Statistics.resetNoOfExecutedJobs(0);
        if (STATISTICS) {
            CacheStatistics.reset();
            Statistics.reset();
        }
    }

    private static void checkSecuritySetup(DMLConfig config) throws IOException, DMLRuntimeException {
        //analyze local configuration
        String userName = System.getProperty("user.name");
        HashSet<String> groupNames = new HashSet<String>();
        try {
            //check existence, for backwards compatibility to < hadoop 0.21
            if (UserGroupInformation.class.getMethod("getCurrentUser") != null) {
                String[] groups = UserGroupInformation.getCurrentUser().getGroupNames();
                Collections.addAll(groupNames, groups);
            }
        } catch (Exception ex) {
        }

        //analyze hadoop configuration
        JobConf job = ConfigurationManager.getCachedJobConf();
        boolean localMode = InfrastructureAnalyzer.isLocalMode(job);
        String taskController = job.get(MRConfigurationNames.MR_TASKTRACKER_TASKCONTROLLER,
                "org.apache.hadoop.mapred.DefaultTaskController");
        String ttGroupName = job.get(MRConfigurationNames.MR_TASKTRACKER_GROUP, "null");
        String perm = job.get(MRConfigurationNames.DFS_PERMISSIONS_ENABLED, "null"); //note: job.get("dfs.permissions.supergroup",null);
        URI fsURI = FileSystem.getDefaultUri(job);

        //determine security states
        boolean flagDiffUser = !(taskController.equals("org.apache.hadoop.mapred.LinuxTaskController") //runs map/reduce tasks as the current user
                || localMode // run in the same JVM anyway
                || groupNames.contains(ttGroupName)); //user in task tracker group 
        boolean flagLocalFS = fsURI == null || fsURI.getScheme().equals("file");
        boolean flagSecurity = perm.equals("yes");

        LOG.debug("SystemML security check: " + "local.user.name = " + userName + ", " + "local.user.groups = "
                + ProgramConverter.serializeStringCollection(groupNames) + ", "
                + MRConfigurationNames.MR_JOBTRACKER_ADDRESS + " = "
                + job.get(MRConfigurationNames.MR_JOBTRACKER_ADDRESS) + ", "
                + MRConfigurationNames.MR_TASKTRACKER_TASKCONTROLLER + " = " + taskController + ","
                + MRConfigurationNames.MR_TASKTRACKER_GROUP + " = " + ttGroupName + ", "
                + MRConfigurationNames.FS_DEFAULTFS + " = " + ((fsURI != null) ? fsURI.getScheme() : "null") + ", "
                + MRConfigurationNames.DFS_PERMISSIONS_ENABLED + " = " + perm);

        //print warning if permission issues possible
        if (flagDiffUser && (flagLocalFS || flagSecurity)) {
            LOG.warn("Cannot run map/reduce tasks as user '" + userName + "'. Using tasktracker group '"
                    + ttGroupName + "'.");
        }

        //validate external filenames working directories
        String localtmpdir = config.getTextValue(DMLConfig.LOCAL_TMP_DIR);
        String hdfstmpdir = config.getTextValue(DMLConfig.SCRATCH_SPACE);
        if (!LocalFileUtils.validateExternalFilename(localtmpdir, false))
            throw new DMLRuntimeException("Invalid (non-trustworthy) local working directory.");
        if (!LocalFileUtils.validateExternalFilename(hdfstmpdir, true))
            throw new DMLRuntimeException("Invalid (non-trustworthy) hdfs working directory.");
    }

    public static void cleanupHadoopExecution(DMLConfig config) throws IOException, ParseException {
        //create dml-script-specific suffix
        StringBuilder sb = new StringBuilder();
        sb.append(Lop.FILE_SEPARATOR);
        sb.append(Lop.PROCESS_PREFIX);
        sb.append(DMLScript.getUUID());
        String dirSuffix = sb.toString();

        //1) cleanup scratch space (everything for current uuid) 
        //(required otherwise export to hdfs would skip assumed unnecessary writes if same name)
        MapReduceTool.deleteFileIfExistOnHDFS(config.getTextValue(DMLConfig.SCRATCH_SPACE) + dirSuffix);

        //2) cleanup hadoop working dirs (only required for LocalJobRunner (local job tracker), because
        //this implementation does not create job specific sub directories)
        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        if (InfrastructureAnalyzer.isLocalMode(job)) {
            try {
                LocalFileUtils.deleteFileIfExists(DMLConfig.LOCAL_MR_MODE_STAGING_DIR + //staging dir (for local mode only) 
                        dirSuffix);
                LocalFileUtils.deleteFileIfExists(MRJobConfiguration.getLocalWorkingDirPrefix(job) + //local dir
                        dirSuffix);
                MapReduceTool.deleteFileIfExistOnHDFS(MRJobConfiguration.getSystemWorkingDirPrefix(job) + //system dir
                        dirSuffix);
                MapReduceTool.deleteFileIfExistOnHDFS(MRJobConfiguration.getStagingWorkingDirPrefix(job) + //staging dir
                        dirSuffix);
            } catch (Exception ex) {
                //we give only a warning because those directories are written by the mapred deamon 
                //and hence, execution can still succeed
                LOG.warn("Unable to cleanup hadoop working dirs: " + ex.getMessage());
            }
        }

        //3) cleanup systemml-internal working dirs
        CacheableData.cleanupCacheDir(); //might be local/hdfs
        LocalFileUtils.cleanupWorkingDirectory();
    }

    ///////////////////////////////
    // private internal helper functionalities
    ////////

    private static void printInvocationInfo(String fnameScript, String fnameOptConfig,
            Map<String, String> argVals) {
        LOG.debug("****** args to DML Script ******\n" + "UUID: " + getUUID() + "\n" + "SCRIPT PATH: " + fnameScript
                + "\n" + "RUNTIME: " + rtplatform + "\n" + "BUILTIN CONFIG: "
                + DMLConfig.DEFAULT_SYSTEMML_CONFIG_FILEPATH + "\n" + "OPTIONAL CONFIG: " + fnameOptConfig + "\n");

        if (!argVals.isEmpty()) {
            LOG.debug("Script arguments are: \n");
            for (int i = 1; i <= argVals.size(); i++)
                LOG.debug("Script argument $" + i + " = " + argVals.get("$" + i));
        }
    }

    private static void printStartExecInfo(String dmlScriptString) {
        LOG.info("BEGIN DML run " + getDateTime());
        LOG.debug("DML script: \n" + dmlScriptString);

        if (rtplatform == RUNTIME_PLATFORM.HADOOP || rtplatform == RUNTIME_PLATFORM.HYBRID) {
            String hadoop_home = System.getenv("HADOOP_HOME");
            LOG.info("HADOOP_HOME: " + hadoop_home);
        }
    }

    private static String getDateTime() {
        DateFormat dateFormat = new SimpleDateFormat("MM/dd/yyyy HH:mm:ss");
        Date date = new Date();
        return dateFormat.format(date);
    }

    private static void cleanSystemMLWorkspace() throws DMLException {
        try {
            //read the default config
            DMLConfig conf = DMLConfig.readConfigurationFile(null);

            //run cleanup job to clean remote local tmp dirs
            CleanupMR.runJob(conf);

            //cleanup scratch space (on HDFS)
            String scratch = conf.getTextValue(DMLConfig.SCRATCH_SPACE);
            if (scratch != null)
                MapReduceTool.deleteFileIfExistOnHDFS(scratch);

            //cleanup local working dir
            String localtmp = conf.getTextValue(DMLConfig.LOCAL_TMP_DIR);
            if (localtmp != null)
                LocalFileUtils.cleanupRcWorkingDirectory(localtmp);
        } catch (Exception ex) {
            throw new DMLException("Failed to run SystemML workspace cleanup.", ex);
        }
    }
}