com.ibm.bi.dml.api.DMLScript.java Source code

Java tutorial

Introduction

Here is the source code for com.ibm.bi.dml.api.DMLScript.java

Source

/**
 * (C) Copyright IBM Corp. 2010, 2015
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * 
*/

package com.ibm.bi.dml.api;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URI;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Scanner;

import javax.xml.parsers.ParserConfigurationException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.xml.sax.SAXException;

import com.ibm.bi.dml.conf.ConfigurationManager;
import com.ibm.bi.dml.conf.DMLConfig;
import com.ibm.bi.dml.debug.DMLDebugger;
import com.ibm.bi.dml.debug.DMLDebuggerException;
import com.ibm.bi.dml.debug.DMLDebuggerProgramInfo;
import com.ibm.bi.dml.hops.HopsException;
import com.ibm.bi.dml.hops.OptimizerUtils;
import com.ibm.bi.dml.hops.OptimizerUtils.OptimizationLevel;
import com.ibm.bi.dml.hops.globalopt.GlobalOptimizerWrapper;
import com.ibm.bi.dml.lops.Lop;
import com.ibm.bi.dml.lops.LopsException;
import com.ibm.bi.dml.parser.AParserWrapper;
import com.ibm.bi.dml.parser.DMLProgram;
import com.ibm.bi.dml.parser.DMLTranslator;
import com.ibm.bi.dml.parser.LanguageException;
import com.ibm.bi.dml.parser.ParseException;
import com.ibm.bi.dml.runtime.DMLRuntimeException;
import com.ibm.bi.dml.runtime.DMLScriptException;
import com.ibm.bi.dml.runtime.DMLUnsupportedOperationException;
import com.ibm.bi.dml.runtime.controlprogram.Program;
import com.ibm.bi.dml.runtime.controlprogram.caching.CacheStatistics;
import com.ibm.bi.dml.runtime.controlprogram.caching.CacheableData;
import com.ibm.bi.dml.runtime.controlprogram.context.ExecutionContext;
import com.ibm.bi.dml.runtime.controlprogram.context.ExecutionContextFactory;
import com.ibm.bi.dml.runtime.controlprogram.context.SparkExecutionContext;
import com.ibm.bi.dml.runtime.controlprogram.parfor.ProgramConverter;
import com.ibm.bi.dml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
import com.ibm.bi.dml.runtime.controlprogram.parfor.util.IDHandler;
import com.ibm.bi.dml.runtime.matrix.CleanupMR;
import com.ibm.bi.dml.runtime.matrix.mapred.MRConfigurationNames;
import com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration;
import com.ibm.bi.dml.runtime.util.LocalFileUtils;
import com.ibm.bi.dml.runtime.util.MapReduceTool;
import com.ibm.bi.dml.utils.Explain;
import com.ibm.bi.dml.utils.Explain.ExplainCounts;
import com.ibm.bi.dml.utils.Explain.ExplainType;
import com.ibm.bi.dml.utils.Statistics;
import com.ibm.bi.dml.yarn.DMLAppMasterUtils;
// import com.ibm.bi.dml.utils.visualize.DotGraph;
import com.ibm.bi.dml.yarn.DMLYarnClientProxy;

public class DMLScript {
    public enum RUNTIME_PLATFORM {
        HADOOP, // execute all matrix operations in MR
        SINGLE_NODE, // execute all matrix operations in CP
        HYBRID, // execute matrix operations in CP or MR
        HYBRID_SPARK, // execute matrix operations in CP or Spark   
        SPARK // execute matrix operations in Spark
    };

    public static RUNTIME_PLATFORM rtplatform = RUNTIME_PLATFORM.HYBRID; //default exec mode
    public static boolean STATISTICS = false; //default statistics
    public static boolean ENABLE_DEBUG_MODE = false; //default debug mode
    public static boolean USE_LOCAL_SPARK_CONFIG = false; //set default local spark configuration - used for local testing
    public static String DML_FILE_PATH_ANTLR_PARSER = null;
    public static ExplainType EXPLAIN = ExplainType.NONE; //default explain

    // flag that indicates whether or not to suppress any prints to stdout
    public static boolean _suppressPrint2Stdout = false;

    public static String _uuid = IDHandler.createDistributedUniqueID();
    public static boolean _activeAM = false;

    private static final Log LOG = LogFactory.getLog(DMLScript.class.getName());

    public static String USAGE = "Usage is " + DMLScript.class.getCanonicalName() + " -f <filename>"
    //+ " (-exec <mode>)?" + " (-explain <type>)?" + " (-stats)?" + " (-clean)?" + " (-config=<config_filename>)? 
            + " [-options] ([-args | -nvargs] <args-list>)? \n"
            + "   -f: <filename> will be interpreted as a filename path (if <filename> is prefixed\n"
            + "         with hdfs or gpfs it is read from DFS, otherwise from local file system)\n"
            //undocumented feature in beta 08/2014 release
            //+ "   -s: <filename> will be interpreted as a DML script string \n"
            + "   -python: (optional) parses Python-like DML\n" + "   -debug: (optional) run in debug mode\n"
            // Later add optional flags to indicate optimizations turned on or off. Currently they are turned off.
            //+ "   -debug: <flags> (optional) run in debug mode\n"
            //+ "         Optional <flags> that is supported for this mode is optimize=(on|off)\n"
            + "   -exec: <mode> (optional) execution mode (hadoop, singlenode, [hybrid], hybrid_spark)\n"
            + "   -explain: <type> (optional) explain plan (hops, [runtime], recompile_hops, recompile_runtime)\n"
            + "   -stats: (optional) monitor and report caching/recompilation statistics\n"
            + "   -clean: (optional) cleanup all SystemML working directories (FS, DFS).\n"
            + "         All other flags are ignored in this mode. \n"
            + "   -config: (optional) use config file <config_filename> (default: use parameter\n"
            + "         values in default SystemML-config.xml config file; if <config_filename> is\n"
            + "         prefixed with hdfs or gpfs it is read from DFS, otherwise from local file system)\n"
            + "   -args: (optional) parameterize DML script with contents of [args list], ALL args\n"
            + "         after -args flag, each argument must be an unnamed-argument, where 1st value\n"
            + "         after -args will replace $1 in DML script, 2nd value will replace $2, etc.\n"
            + "   -nvargs: (optional) parameterize DML script with contents of [args list], ALL args\n"
            + "         after -nvargs flag, each argument must be be named-argument of form argName=argValue,\n"
            + "         where value will replace $argName in DML script, argName must be a valid DML variable\n"
            + "         name (start with letter, contain only letters, numbers, or underscores).\n"
            + "   <args-list>: (optional) args to DML script \n"
            + "   -? | -help: (optional) show this help message \n";

    ///////////////////////////////
    // public external interface
    ////////

    public static String getUUID() {
        return _uuid;
    }

    /**
     * Used to set master UUID on all nodes (in parfor remote_mr, where DMLScript passed) 
     * in order to simplify cleanup of scratch_space and local working dirs.
     * 
     * @param uuid
     */
    public static void setUUID(String uuid) {
        _uuid = uuid;
    }

    public static boolean suppressPrint2Stdout() {
        return _suppressPrint2Stdout;
    }

    public static void setActiveAM() {
        _activeAM = true;
    }

    public static boolean isActiveAM() {
        return _activeAM;
    }

    /**
     * Default DML script invocation (e.g., via 'hadoop jar SystemML.jar -f Test.dml')
     * 
     * @param args
     * @throws ParseException
     * @throws IOException
     * @throws SAXException
     * @throws ParserConfigurationException
     */
    public static void main(String[] args) throws IOException, DMLException {
        Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

        try {
            DMLScript.executeScript(conf, otherArgs);
        } catch (DMLScriptException e) {
            // In case of DMLScriptException, simply print the error message.
            System.err.println(e.getMessage());
        }
    }

    public static boolean executeScript(String[] args) throws DMLException {
        Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());
        return executeScript(conf, args);
    }

    /**
     * This version of executeScript() is invoked from RJaqlUdf (from BigR).
     *  
     * @param conf
     * @param args
     * @param suppress
     * @return
     * @throws DMLException
     */
    public static String executeScript(Configuration conf, String[] args, boolean suppress) throws DMLException {
        _suppressPrint2Stdout = suppress;
        try {
            boolean ret = executeScript(conf, args);
            return Boolean.toString(ret);
        } catch (DMLScriptException e) {
            return e.getMessage();
        }
    }

    /**
     * Single entry point for all public invocation alternatives (e.g.,
     * main, executeScript, JaqlUdf etc)
     * 
     * @param conf
     * @param args
     * @return
     * @throws LanguageException 
     */
    public static boolean executeScript(Configuration conf, String[] args) throws DMLException {
        boolean ret = false;

        //Step 1: parse arguments 
        //check for help 
        if (args.length == 0
                || (args.length == 1 && (args[0].equalsIgnoreCase("-help") || args[0].equalsIgnoreCase("-?")))) {
            System.err.println(USAGE);
            return true;
        }

        //check for clean
        else if (args.length == 1 && args[0].equalsIgnoreCase("-clean")) {
            cleanSystemMLWorkspace();
            return true;
        }

        //check number of args - print usage if incorrect
        if (args.length < 2) {
            System.err.println("ERROR: Unrecognized invocation arguments.");
            System.err.println(USAGE);
            return ret;
        }

        //check script arg - print usage if incorrect
        if (!(args[0].equals("-f") || args[0].equals("-s"))) {
            System.err.println("ERROR: First argument must be either -f or -s");
            System.err.println(USAGE);
            return ret;
        }

        //parse arguments and set execution properties
        RUNTIME_PLATFORM oldrtplatform = rtplatform; //keep old rtplatform
        ExplainType oldexplain = EXPLAIN; //keep old explain   

        // Reset global flags to avoid errors in test suite
        ENABLE_DEBUG_MODE = false;

        boolean parsePyDML = false;
        try {
            String fnameOptConfig = null; //optional config filename
            String[] scriptArgs = null; //optional script arguments
            boolean namedScriptArgs = false;

            for (int i = 2; i < args.length; i++) {
                if (args[i].equalsIgnoreCase("-explain")) {
                    EXPLAIN = ExplainType.RUNTIME;
                    if (args.length > (i + 1) && !args[i + 1].startsWith("-"))
                        EXPLAIN = Explain.parseExplainType(args[++i]);
                } else if (args[i].equalsIgnoreCase("-stats"))
                    STATISTICS = true;
                else if (args[i].equalsIgnoreCase("-exec")) {
                    rtplatform = parseRuntimePlatform(args[++i]);
                    if (rtplatform == null)
                        return ret;
                } else if (args[i].startsWith("-config="))
                    fnameOptConfig = args[i].substring(8).replaceAll("\"", "");
                else if (args[i].equalsIgnoreCase("-debug")) {
                    ENABLE_DEBUG_MODE = true;
                } else if (args[i].equalsIgnoreCase("-python")) {
                    parsePyDML = true;
                } else if (args[i].startsWith("-args") || args[i].startsWith("-nvargs")) {
                    namedScriptArgs = args[i].startsWith("-nvargs");
                    i++;
                    scriptArgs = new String[args.length - i];
                    System.arraycopy(args, i, scriptArgs, 0, scriptArgs.length);
                    break;
                } else {
                    System.err.println("ERROR: Unknown argument: " + args[i]);
                    return ret;
                }
            }

            //set log level
            if (!ENABLE_DEBUG_MODE)
                setLoggingProperties(conf);

            //Step 2: prepare script invocation
            String dmlScriptStr = readDMLScript(args[0], args[1]);
            HashMap<String, String> argVals = createArgumentsMap(namedScriptArgs, scriptArgs);

            DML_FILE_PATH_ANTLR_PARSER = args[1];

            //Step 3: invoke dml script
            printInvocationInfo(args[1], fnameOptConfig, argVals);
            if (ENABLE_DEBUG_MODE) {
                // inner try loop is just to isolate the debug exception, which will allow to manage the bugs from debugger v/s runtime
                launchDebugger(dmlScriptStr, fnameOptConfig, argVals, parsePyDML);
            } else {
                execute(dmlScriptStr, fnameOptConfig, argVals, args, parsePyDML);
            }

            ret = true;
        } catch (DMLScriptException e) {
            //rethrow DMLScriptException to propagate stop call
            throw e;
        } catch (Exception ex) {
            LOG.error("Failed to execute DML script.", ex);
            throw new DMLException(ex);
        } finally {
            //reset runtime platform and visualize flag
            rtplatform = oldrtplatform;
            EXPLAIN = oldexplain;
        }

        return ret;
    }

    ///////////////////////////////
    // private internal utils (argument parsing)
    ////////

    /**
     * 
     * @param hasNamedArgs
     * @param scriptArguments
     * @throws LanguageException
     */
    protected static HashMap<String, String> createArgumentsMap(boolean hasNamedArgs, String[] args)
            throws LanguageException {
        HashMap<String, String> argMap = new HashMap<String, String>();

        if (args == null)
            return argMap;

        for (int i = 1; i <= args.length; i++) {
            String arg = args[i - 1];

            if (arg.equalsIgnoreCase("-l") || arg.equalsIgnoreCase("-log") || arg.equalsIgnoreCase("-v")
                    || arg.equalsIgnoreCase("-visualize") || arg.equalsIgnoreCase("-explain")
                    || arg.equalsIgnoreCase("-debug") || arg.equalsIgnoreCase("-stats")
                    || arg.equalsIgnoreCase("-exec") || arg.equalsIgnoreCase("-debug")
                    || arg.startsWith("-config=")) {
                throw new LanguageException("-args or -nvargs must be the final argument for DMLScript!");
            }

            //parse arguments (named args / args by position)
            if (hasNamedArgs) {
                // CASE: named argument argName=argValue -- must add <argName, argValue> pair to _argVals
                String[] argPieces = arg.split("=");
                if (argPieces.length < 2)
                    throw new LanguageException(
                            "for -nvargs option, elements in arg list must be named and have form argName=argValue");
                String argName = argPieces[0];
                StringBuilder sb = new StringBuilder();
                for (int jj = 1; jj < argPieces.length; jj++) {
                    sb.append(argPieces[jj]);
                }

                String varNameRegex = "^[a-zA-Z]([a-zA-Z0-9_])*$";
                if (!argName.matches(varNameRegex))
                    throw new LanguageException("argName " + argName
                            + " must be a valid variable name in DML. Valid variable names in DML start with upper-case or lower-case letter, and contain only letters, digits, or underscores");

                argMap.put("$" + argName, sb.toString());
            } else {
                // CASE: unnamed argument -- use position in arg list for name
                argMap.put("$" + i, arg);
            }
        }

        return argMap;
    }

    /**
     * 
     * @param argname
     * @param arg
     * @return
     * @throws IOException 
     * @throws LanguageException 
     */
    protected static String readDMLScript(String argname, String script) throws IOException, LanguageException {
        boolean fromFile = argname.equals("-f") ? true : false;
        String dmlScriptStr = null;

        if (fromFile) {
            //read DML script from file
            if (script == null)
                throw new LanguageException("DML script path was not specified!");

            StringBuilder sb = new StringBuilder();
            BufferedReader in = null;
            try {
                //read from hdfs or gpfs file system
                if (script.startsWith("hdfs:") || script.startsWith("gpfs:")) {
                    if (!LocalFileUtils.validateExternalFilename(script, true))
                        throw new LanguageException("Invalid (non-trustworthy) hdfs filename.");
                    FileSystem fs = FileSystem.get(ConfigurationManager.getCachedJobConf());
                    Path scriptPath = new Path(script);
                    in = new BufferedReader(new InputStreamReader(fs.open(scriptPath)));
                }
                // from local file system
                else {
                    if (!LocalFileUtils.validateExternalFilename(script, false))
                        throw new LanguageException("Invalid (non-trustworthy) local filename.");
                    in = new BufferedReader(new FileReader(script));
                }

                //core script reading
                String tmp = null;
                while ((tmp = in.readLine()) != null) {
                    sb.append(tmp);
                    sb.append("\n");
                }
            } catch (IOException ex) {
                LOG.error("Failed to read the script from the file system", ex);
                throw ex;
            } finally {
                if (in != null)
                    in.close();
            }

            dmlScriptStr = sb.toString();
        } else {
            //parse given script string 
            if (script == null)
                throw new LanguageException("DML script was not specified!");

            InputStream is = new ByteArrayInputStream(script.getBytes());
            Scanner scan = new Scanner(is);
            dmlScriptStr = scan.useDelimiter("\\A").next();
            scan.close();
        }

        return dmlScriptStr;
    }

    /**
     * 
     * @param platform
     * @return
     */
    private static RUNTIME_PLATFORM parseRuntimePlatform(String platform) {
        RUNTIME_PLATFORM lrtplatform = null;

        if (platform.equalsIgnoreCase("hadoop"))
            lrtplatform = RUNTIME_PLATFORM.HADOOP;
        else if (platform.equalsIgnoreCase("singlenode"))
            lrtplatform = RUNTIME_PLATFORM.SINGLE_NODE;
        else if (platform.equalsIgnoreCase("hybrid"))
            lrtplatform = RUNTIME_PLATFORM.HYBRID;
        else if (platform.equalsIgnoreCase("spark"))
            lrtplatform = RUNTIME_PLATFORM.SPARK;
        else if (platform.equalsIgnoreCase("hybrid_spark"))
            lrtplatform = RUNTIME_PLATFORM.HYBRID_SPARK;
        else
            System.err.println("ERROR: Unknown runtime platform: " + platform);

        return lrtplatform;
    }

    /**
     * 
     * @param conf
     */
    private static void setLoggingProperties(Configuration conf) {
        String debug = conf.get("systemml.logging");

        if (debug == null)
            debug = System.getProperty("systemml.logging");

        if (debug != null) {
            if (debug.equalsIgnoreCase("debug")) {
                Logger.getLogger("com.ibm.bi.dml").setLevel((Level) Level.DEBUG);
            } else if (debug.equalsIgnoreCase("trace")) {
                Logger.getLogger("com.ibm.bi.dml").setLevel((Level) Level.TRACE);
            }
        }
    }

    ///////////////////////////////
    // private internal interface 
    // (core compilation and execute)
    ////////

    /**
     * run: The running body of DMLScript execution. This method should be called after execution properties have been correctly set,
     * and customized parameters have been put into _argVals
     * @throws ParseException 
     * @throws IOException 
     * @throws DMLRuntimeException 
     * @throws HopsException 
     * @throws LanguageException 
     * @throws DMLUnsupportedOperationException 
     * @throws LopsException 
     * @throws DMLException 
     */
    private static void execute(String dmlScriptStr, String fnameOptConfig, HashMap<String, String> argVals,
            String[] allArgs, boolean parsePyDML) throws ParseException, IOException, DMLRuntimeException,
            LanguageException, HopsException, LopsException, DMLUnsupportedOperationException {
        //print basic time and environment info
        printStartExecInfo(dmlScriptStr);

        //Step 1: parse configuration files
        DMLConfig conf = DMLConfig.readAndMergeConfigurationFiles(fnameOptConfig);
        ConfigurationManager.setConfig(conf);
        LOG.debug("\nDML config: \n" + conf.getConfigInfo());

        //Step 2: set local/remote memory if requested (for compile in AM context) 
        if (conf.getBooleanValue(DMLConfig.YARN_APPMASTER)) {
            DMLAppMasterUtils.setupConfigRemoteMaxMemory(conf);
        }

        //Step 3: parse dml script
        Statistics.startCompileTimer();
        AParserWrapper parser = AParserWrapper.createParser(parsePyDML);
        DMLProgram prog = parser.parse(DML_FILE_PATH_ANTLR_PARSER, dmlScriptStr, argVals);

        //Step 4: construct HOP DAGs (incl LVA and validate)
        DMLTranslator dmlt = new DMLTranslator(prog);
        dmlt.liveVariableAnalysis(prog);
        dmlt.validateParseTree(prog);
        dmlt.constructHops(prog);

        if (LOG.isDebugEnabled()) {
            LOG.debug("\n********************** HOPS DAG (Before Rewrite) *******************");
            dmlt.printHops(prog);
            DMLTranslator.resetHopsDAGVisitStatus(prog);
        }

        //Step 5: rewrite HOP DAGs (incl IPA and memory estimates)
        dmlt.rewriteHopsDAG(prog);

        if (LOG.isDebugEnabled()) {
            LOG.debug("\n********************** HOPS DAG (After Rewrite) *******************");
            dmlt.printHops(prog);
            DMLTranslator.resetHopsDAGVisitStatus(prog);

            LOG.debug("\n********************** OPTIMIZER *******************\n" + "Level = "
                    + OptimizerUtils.getOptLevel() + "\n" + "Available Memory = "
                    + ((double) InfrastructureAnalyzer.getLocalMaxMemory() / 1024 / 1024) + " MB" + "\n"
                    + "Memory Budget = " + ((double) OptimizerUtils.getLocalMemBudget() / 1024 / 1024) + " MB"
                    + "\n");
        }

        //Step 6: construct lops (incl exec type and op selection)
        dmlt.constructLops(prog);

        if (LOG.isDebugEnabled()) {
            LOG.debug("\n********************** LOPS DAG *******************");
            dmlt.printLops(prog);
            dmlt.resetLopsDAGVisitStatus(prog);
        }

        //Step 7: generate runtime program
        Program rtprog = prog.getRuntimeProgram(conf);

        if (LOG.isDebugEnabled()) {
            LOG.info("********************** Instructions *******************");
            rtprog.printMe();
            LOG.info("*******************************************************");
        }

        //Step 8: [optional global data flow optimization]
        if (OptimizerUtils.isOptLevel(OptimizationLevel.O4_GLOBAL_TIME_MEMORY)) {
            LOG.warn("Optimization level '" + OptimizationLevel.O4_GLOBAL_TIME_MEMORY + "' "
                    + "is still in experimental state and not intended for production use.");
            rtprog = GlobalOptimizerWrapper.optimizeProgram(prog, rtprog);
        }

        //launch SystemML appmaster (if requested and not already in launched AM)
        if (conf.getBooleanValue(DMLConfig.YARN_APPMASTER)) {
            if (!isActiveAM() && DMLYarnClientProxy.launchDMLYarnAppmaster(dmlScriptStr, conf, allArgs, rtprog))
                return; //if AM launch unsuccessful, fall back to normal execute
            if (isActiveAM()) //in AM context (not failed AM launch)
                DMLAppMasterUtils.setupProgramMappingRemoteMaxMemory(rtprog);
        }

        //Step 9: prepare statistics [and optional explain output]
        //count number compiled MR jobs / SP instructions   
        ExplainCounts counts = Explain.countDistributedOperations(rtprog);
        Statistics.resetNoOfCompiledJobs(counts.numJobs);

        //explain plan of program (hops or runtime)
        if (EXPLAIN != ExplainType.NONE) {
            LOG.info("EXPLAIN (" + EXPLAIN.toString() + "):\n" + Explain.explainMemoryBudget(counts) + "\n"
                    + Explain.explainDegreeOfParallelism(counts) + Explain.explain(prog, rtprog, EXPLAIN));
        }

        Statistics.stopCompileTimer();

        //double costs = CostEstimationWrapper.getTimeEstimate(rtprog, ExecutionContextFactory.createContext());
        //System.out.println("Estimated costs: "+costs);

        //Step 10: execute runtime program
        Statistics.startRunTimer();
        ExecutionContext ec = null;
        try {
            initHadoopExecution(conf);

            //run execute (w/ exception handling to ensure proper shutdown)
            ec = ExecutionContextFactory.createContext(rtprog);
            rtprog.execute(ec);

        } finally //ensure cleanup/shutdown
        {
            if (ec != null && ec instanceof SparkExecutionContext) {
                ((SparkExecutionContext) ec).close();
            }

            //display statistics (incl caching stats if enabled)
            Statistics.stopRunTimer();
            LOG.info(Statistics.display());
            LOG.info("END DML run " + getDateTime());

            //cleanup scratch_space and all working dirs
            cleanupHadoopExecution(conf);
        }
    }

    /**
     * launchDebugger: Launcher for DML debugger. This method should be called after 
     * execution and debug properties have been correctly set, and customized parameters 
     * have been put into _argVals
     * @param  dmlScriptStr DML script contents (including new lines)
     * @param  fnameOptConfig Full path of configuration file for SystemML
     * @param  argVals Key-value pairs defining arguments of DML script
     * @throws ParseException 
     * @throws IOException 
     * @throws DMLRuntimeException 
     * @throws DMLDebuggerException
     * @throws HopsException 
     * @throws LanguageException 
     * @throws DMLUnsupportedOperationException 
     * @throws LopsException 
     * @throws DMLException 
     */
    private static void launchDebugger(String dmlScriptStr, String fnameOptConfig, HashMap<String, String> argVals,
            boolean parsePyDML) throws ParseException, IOException, DMLRuntimeException, DMLDebuggerException,
            LanguageException, HopsException, LopsException, DMLUnsupportedOperationException {
        //produce debugging information (parse, compile and generate runtime program for a given DML script)
        DMLDebuggerProgramInfo p = compileForDebug(dmlScriptStr, fnameOptConfig, argVals, parsePyDML);

        try {
            //set execution environment
            initHadoopExecution(p.conf);

            //initialize an instance of SystemML debugger
            DMLDebugger SystemMLdb = new DMLDebugger(p, dmlScriptStr, argVals);
            //run SystemML debugger
            SystemMLdb.runSystemMLDebugger();
        } finally {
            //cleanup scratch_space and all working dirs
            cleanupHadoopExecution(p.conf);
        }
    }

    /**
     * compile: Compile DML script and generate hops, lops and runtime program for debugger. 
     * This method should be called after execution and debug properties have been set, and 
     * customized parameters have been put into _argVals
     * @param  dmlScriptStr DML script contents (including new lines)
     * @param  fnameOptConfig Full path of configuration file for SystemML
     * @param  argVals Key-value pairs defining arguments of DML script
     * @return dbprog Class containing parsed and compiled DML script w/ hops, lops and runtime program   
     * @throws ParseException
     * @throws IOException
     * @throws DMLRuntimeException
     * @throws LanguageException
     * @throws HopsException
     * @throws LopsException
     * @throws DMLUnsupportedOperationException
     */
    //TODO: MB: remove this redundant compile and execute (or at least remove from DMLScript)
    //TODO: This method should be private once debugger infrastructure is on top of the programmatic API  
    public static DMLDebuggerProgramInfo compileForDebug(String dmlScriptStr, String fnameOptConfig,
            HashMap<String, String> argVals, boolean parsePyDML) throws ParseException, IOException,
            DMLRuntimeException, LanguageException, HopsException, LopsException, DMLUnsupportedOperationException {
        DMLDebuggerProgramInfo dbprog = new DMLDebuggerProgramInfo();

        //Step 1: parse configuration files
        dbprog.conf = DMLConfig.readAndMergeConfigurationFiles(fnameOptConfig);
        ConfigurationManager.setConfig(dbprog.conf);

        //Step 2: parse dml script
        AParserWrapper parser = AParserWrapper.createParser(parsePyDML);
        dbprog.prog = parser.parse(DML_FILE_PATH_ANTLR_PARSER, dmlScriptStr, argVals);

        //Step 3: construct HOP DAGs (incl LVA and validate)
        dbprog.dmlt = new DMLTranslator(dbprog.prog);
        dbprog.dmlt.liveVariableAnalysis(dbprog.prog);
        dbprog.dmlt.validateParseTree(dbprog.prog);
        dbprog.dmlt.constructHops(dbprog.prog);

        //Step 4: rewrite HOP DAGs (incl IPA and memory estimates)
        dbprog.dmlt.rewriteHopsDAG(dbprog.prog);

        //Step 5: construct LOP DAGs
        dbprog.dmlt.constructLops(dbprog.prog);

        //Step 6: generate runtime program
        dbprog.rtprog = dbprog.prog.getRuntimeProgram(dbprog.conf);

        return dbprog;
    }

    /**
     * @throws ParseException 
     * @throws IOException 
     * @throws DMLRuntimeException 
     * 
     */
    static void initHadoopExecution(DMLConfig config) throws IOException, ParseException, DMLRuntimeException {
        //check security aspects
        checkSecuritySetup(config);

        //create scratch space with appropriate permissions
        String scratch = config.getTextValue(DMLConfig.SCRATCH_SPACE);
        MapReduceTool.createDirIfNotExistOnHDFS(scratch, DMLConfig.DEFAULT_SHARED_DIR_PERMISSION);

        //cleanup working dirs from previous aborted runs with same pid in order to prevent conflicts
        cleanupHadoopExecution(config);

        //init caching (incl set active)
        LocalFileUtils.createWorkingDirectory();
        CacheableData.initCaching();

        //reset statistics (required if multiple scripts executed in one JVM)
        Statistics.resetNoOfExecutedJobs(0);
        if (STATISTICS) {
            CacheStatistics.reset();
            Statistics.reset();
        }
    }

    /**
     * 
     * @param config 
     * @throws IOException
     * @throws DMLRuntimeException 
     */
    private static void checkSecuritySetup(DMLConfig config) throws IOException, DMLRuntimeException {
        //analyze local configuration
        String userName = System.getProperty("user.name");
        HashSet<String> groupNames = new HashSet<String>();
        try {
            //check existence, for backwards compatibility to < hadoop 0.21
            if (UserGroupInformation.class.getMethod("getCurrentUser") != null) {
                String[] groups = UserGroupInformation.getCurrentUser().getGroupNames();
                for (String g : groups)
                    groupNames.add(g);
            }
        } catch (Exception ex) {
        }

        //analyze hadoop configuration
        JobConf job = ConfigurationManager.getCachedJobConf();
        boolean localMode = InfrastructureAnalyzer.isLocalMode(job);
        String taskController = job.get("mapred.task.tracker.task-controller",
                "org.apache.hadoop.mapred.DefaultTaskController");
        String ttGroupName = job.get("mapreduce.tasktracker.group", "null");
        String perm = job.get(MRConfigurationNames.DFS_PERMISSIONS, "null"); //note: job.get("dfs.permissions.supergroup",null);
        URI fsURI = FileSystem.getDefaultUri(job);

        //determine security states
        boolean flagDiffUser = !(taskController.equals("org.apache.hadoop.mapred.LinuxTaskController") //runs map/reduce tasks as the current user
                || localMode // run in the same JVM anyway
                || groupNames.contains(ttGroupName)); //user in task tracker group 
        boolean flagLocalFS = fsURI == null || fsURI.getScheme().equals("file");
        boolean flagSecurity = perm.equals("yes");

        LOG.debug("SystemML security check: " + "local.user.name = " + userName + ", " + "local.user.groups = "
                + ProgramConverter.serializeStringCollection(groupNames) + ", " + "mapred.job.tracker = "
                + job.get("mapred.job.tracker") + ", " + "mapred.task.tracker.task-controller = " + taskController
                + "," + "mapreduce.tasktracker.group = " + ttGroupName + ", " + "fs.default.name = "
                + ((fsURI != null) ? fsURI.getScheme() : "null") + ", " + MRConfigurationNames.DFS_PERMISSIONS
                + " = " + perm);

        //print warning if permission issues possible
        if (flagDiffUser && (flagLocalFS || flagSecurity)) {
            LOG.warn("Cannot run map/reduce tasks as user '" + userName + "'. Using tasktracker group '"
                    + ttGroupName + "'.");
        }

        //validate external filenames working directories
        String localtmpdir = config.getTextValue(DMLConfig.LOCAL_TMP_DIR);
        String hdfstmpdir = config.getTextValue(DMLConfig.SCRATCH_SPACE);
        if (!LocalFileUtils.validateExternalFilename(localtmpdir, false))
            throw new DMLRuntimeException("Invalid (non-trustworthy) local working directory.");
        if (!LocalFileUtils.validateExternalFilename(hdfstmpdir, true))
            throw new DMLRuntimeException("Invalid (non-trustworthy) hdfs working directory.");
    }

    /**
     * 
     * @param config
     * @throws IOException
     * @throws ParseException
     */
    private static void cleanupHadoopExecution(DMLConfig config) throws IOException, ParseException {
        //create dml-script-specific suffix
        StringBuilder sb = new StringBuilder();
        sb.append(Lop.FILE_SEPARATOR);
        sb.append(Lop.PROCESS_PREFIX);
        sb.append(DMLScript.getUUID());
        String dirSuffix = sb.toString();

        //1) cleanup scratch space (everything for current uuid) 
        //(required otherwise export to hdfs would skip assumed unnecessary writes if same name)
        MapReduceTool.deleteFileIfExistOnHDFS(config.getTextValue(DMLConfig.SCRATCH_SPACE) + dirSuffix);

        //2) cleanup hadoop working dirs (only required for LocalJobRunner (local job tracker), because
        //this implementation does not create job specific sub directories)
        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        if (InfrastructureAnalyzer.isLocalMode(job)) {
            try {
                LocalFileUtils.deleteFileIfExists(DMLConfig.LOCAL_MR_MODE_STAGING_DIR + //staging dir (for local mode only) 
                        dirSuffix);
                LocalFileUtils.deleteFileIfExists(MRJobConfiguration.getLocalWorkingDirPrefix(job) + //local dir
                        dirSuffix);
                MapReduceTool.deleteFileIfExistOnHDFS(MRJobConfiguration.getSystemWorkingDirPrefix(job) + //system dir
                        dirSuffix);
                MapReduceTool.deleteFileIfExistOnHDFS(MRJobConfiguration.getStagingWorkingDirPrefix(job) + //staging dir
                        dirSuffix);
            } catch (Exception ex) {
                //we give only a warning because those directories are written by the mapred deamon 
                //and hence, execution can still succeed
                LOG.warn("Unable to cleanup hadoop working dirs: " + ex.getMessage());
            }
        }

        //3) cleanup systemml-internal working dirs
        CacheableData.cleanupCacheDir(); //might be local/hdfs
        LocalFileUtils.cleanupWorkingDirectory();
    }

    ///////////////////////////////
    // private internal helper functionalities
    ////////

    /**
     * 
     * @param fnameScript
     * @param fnameOptConfig
     * @param argVals
     */
    private static void printInvocationInfo(String fnameScript, String fnameOptConfig,
            HashMap<String, String> argVals) {
        LOG.debug("****** args to DML Script ******\n" + "UUID: " + getUUID() + "\n" + "SCRIPT PATH: " + fnameScript
                + "\n" + "RUNTIME: " + rtplatform + "\n" + "BUILTIN CONFIG: "
                + DMLConfig.DEFAULT_SYSTEMML_CONFIG_FILEPATH + "\n" + "OPTIONAL CONFIG: " + fnameOptConfig + "\n");

        if (!argVals.isEmpty()) {
            LOG.debug("Script arguments are: \n");
            for (int i = 1; i <= argVals.size(); i++)
                LOG.debug("Script argument $" + i + " = " + argVals.get("$" + i));
        }
    }

    private static void printStartExecInfo(String dmlScriptString) {
        LOG.info("BEGIN DML run " + getDateTime());
        LOG.debug("DML script: \n" + dmlScriptString);

        if (rtplatform == RUNTIME_PLATFORM.HADOOP || rtplatform == RUNTIME_PLATFORM.HYBRID) {
            String hadoop_home = System.getenv("HADOOP_HOME");
            LOG.info("HADOOP_HOME: " + hadoop_home);
        }
    }

    /**
     * 
     * @return
     */
    private static String getDateTime() {
        DateFormat dateFormat = new SimpleDateFormat("MM/dd/yyyy HH:mm:ss");
        Date date = new Date();
        return dateFormat.format(date);
    }

    /**
     * 
     * @throws DMLException
     */
    private static void cleanSystemMLWorkspace() throws DMLException {
        try {
            //read the default config
            DMLConfig conf = DMLConfig.readAndMergeConfigurationFiles(null);

            //run cleanup job to clean remote local tmp dirs
            CleanupMR.runJob(conf);

            //cleanup scratch space (on HDFS)
            String scratch = conf.getTextValue(DMLConfig.SCRATCH_SPACE);
            if (scratch != null)
                MapReduceTool.deleteFileIfExistOnHDFS(scratch);

            //cleanup local working dir
            String localtmp = conf.getTextValue(DMLConfig.LOCAL_TMP_DIR);
            if (localtmp != null)
                LocalFileUtils.cleanupRcWorkingDirectory(localtmp);
        } catch (Exception ex) {
            throw new DMLException("Failed to run SystemML workspace cleanup.", ex);
        }
    }
}