org.apache.sysml.runtime.util.ProgramConverter.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.sysml.runtime.util.ProgramConverter.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.sysml.runtime.util;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.StringTokenizer;
import java.util.stream.Collectors;
import java.util.Map.Entry;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.mapred.JobConf;
import org.apache.sysml.api.DMLScript;
import org.apache.sysml.conf.CompilerConfig.ConfigType;
import org.apache.sysml.conf.CompilerConfig;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.conf.DMLConfig;
import org.apache.sysml.hops.Hop;
import org.apache.sysml.hops.OptimizerUtils;
import org.apache.sysml.hops.recompile.Recompiler;
import org.apache.sysml.lops.Lop;
import org.apache.sysml.parser.DMLProgram;
import org.apache.sysml.parser.DataIdentifier;
import org.apache.sysml.parser.ForStatementBlock;
import org.apache.sysml.parser.IfStatementBlock;
import org.apache.sysml.parser.ParForStatementBlock;
import org.apache.sysml.parser.StatementBlock;
import org.apache.sysml.parser.Expression.DataType;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.parser.ParForStatementBlock.ResultVar;
import org.apache.sysml.parser.WhileStatementBlock;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.codegen.CodegenUtils;
import org.apache.sysml.runtime.controlprogram.ExternalFunctionProgramBlock;
import org.apache.sysml.runtime.controlprogram.ExternalFunctionProgramBlockCP;
import org.apache.sysml.runtime.controlprogram.ForProgramBlock;
import org.apache.sysml.runtime.controlprogram.FunctionProgramBlock;
import org.apache.sysml.runtime.controlprogram.IfProgramBlock;
import org.apache.sysml.runtime.controlprogram.LocalVariableMap;
import org.apache.sysml.runtime.controlprogram.ParForProgramBlock;
import org.apache.sysml.runtime.controlprogram.Program;
import org.apache.sysml.runtime.controlprogram.ProgramBlock;
import org.apache.sysml.runtime.controlprogram.WhileProgramBlock;
import org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat;
import org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PExecMode;
import org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat;
import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysml.runtime.controlprogram.caching.MatrixObject.UpdateType;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContextFactory;
import org.apache.sysml.runtime.controlprogram.paramserv.SparkPSBody;
import org.apache.sysml.runtime.controlprogram.parfor.ParForBody;
import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
import org.apache.sysml.runtime.instructions.CPInstructionParser;
import org.apache.sysml.runtime.instructions.Instruction;
import org.apache.sysml.runtime.instructions.InstructionParser;
import org.apache.sysml.runtime.instructions.MRJobInstruction;
import org.apache.sysml.runtime.instructions.cp.BooleanObject;
import org.apache.sysml.runtime.instructions.cp.CPInstruction;
import org.apache.sysml.runtime.instructions.cp.Data;
import org.apache.sysml.runtime.instructions.cp.DoubleObject;
import org.apache.sysml.runtime.instructions.cp.FunctionCallCPInstruction;
import org.apache.sysml.runtime.instructions.cp.IntObject;
import org.apache.sysml.runtime.instructions.cp.ListObject;
import org.apache.sysml.runtime.instructions.cp.ScalarObject;
import org.apache.sysml.runtime.instructions.cp.SpoofCPInstruction;
import org.apache.sysml.runtime.instructions.cp.StringObject;
import org.apache.sysml.runtime.instructions.cp.VariableCPInstruction;
import org.apache.sysml.runtime.instructions.gpu.GPUInstruction;
import org.apache.sysml.runtime.instructions.mr.MRInstruction;
import org.apache.sysml.runtime.instructions.spark.SPInstruction;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.MetaDataFormat;
import org.apache.sysml.runtime.matrix.data.InputInfo;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.OutputInfo;
import org.apache.sysml.udf.ExternalFunctionInvocationInstruction;

/**
 * Program converter functionalities for 
 *   (1) creating deep copies of program blocks, instructions, function program blocks, and 
 *   (2) serializing and parsing of programs, program blocks, functions program blocks.
 * 
 */
//TODO: rewrite class to instance-based invocation (grown gradually and now inappropriate design)
public class ProgramConverter {
    protected static final Log LOG = LogFactory.getLog(ProgramConverter.class.getName());

    //use escaped unicodes for separators in order to prevent string conflict
    public static final String NEWLINE = "\n"; //System.lineSeparator();
    public static final String COMPONENTS_DELIM = "\u236e"; //semicolon w/ bar; ";";
    public static final String ELEMENT_DELIM = "\u236a"; //comma w/ bar; ",";
    public static final String ELEMENT_DELIM2 = ",";
    public static final String DATA_FIELD_DELIM = "\u007c"; //"|";
    public static final String KEY_VALUE_DELIM = "\u003d"; //"=";
    public static final String LEVELIN = "\u23a8"; //variant of left curly bracket; "\u007b"; //"{";
    public static final String LEVELOUT = "\u23ac"; //variant of right curly bracket; "\u007d"; //"}";
    public static final String EMPTY = "null";
    public static final String DASH = "-";
    public static final String REF = "ref";
    public static final String LIST_ELEMENT_DELIM = "\t";

    public static final String CDATA_BEGIN = "<![CDATA[";
    public static final String CDATA_END = " ]]>";

    public static final String PROG_BEGIN = " PROG" + LEVELIN;
    public static final String PROG_END = LEVELOUT;
    public static final String VARS_BEGIN = "VARS: ";
    public static final String VARS_END = "";
    public static final String PBS_BEGIN = " PBS" + LEVELIN;
    public static final String PBS_END = LEVELOUT;
    public static final String INST_BEGIN = " INST: ";
    public static final String INST_END = "";
    public static final String EC_BEGIN = " EC: ";
    public static final String EC_END = "";
    public static final String PB_BEGIN = " PB" + LEVELIN;
    public static final String PB_END = LEVELOUT;
    public static final String PB_WHILE = " WHILE" + LEVELIN;
    public static final String PB_FOR = " FOR" + LEVELIN;
    public static final String PB_PARFOR = " PARFOR" + LEVELIN;
    public static final String PB_IF = " IF" + LEVELIN;
    public static final String PB_FC = " FC" + LEVELIN;
    public static final String PB_EFC = " EFC" + LEVELIN;

    public static final String CONF_STATS = "stats";

    // Used for parfor
    public static final String PARFORBODY_BEGIN = CDATA_BEGIN + "PARFORBODY" + LEVELIN;
    public static final String PARFORBODY_END = LEVELOUT + CDATA_END;

    // Used for paramserv builtin function
    public static final String PSBODY_BEGIN = CDATA_BEGIN + "PSBODY" + LEVELIN;
    public static final String PSBODY_END = LEVELOUT + CDATA_END;

    //exception msgs
    public static final String NOT_SUPPORTED_EXTERNALFUNCTION_PB = "Not supported: ExternalFunctionProgramBlock contains MR instructions. "
            + "(ExternalFunctionPRogramBlockCP can be used)";
    public static final String NOT_SUPPORTED_MR_INSTRUCTION = "Not supported: Instructions of type other than CP instructions";
    public static final String NOT_SUPPORTED_MR_PARFOR = "Not supported: Nested ParFOR REMOTE_MR due to possible deadlocks."
            + "(LOCAL can be used for innner ParFOR)";
    public static final String NOT_SUPPORTED_PB = "Not supported: type of program block";

    ////////////////////////////////
    // CREATION of DEEP COPIES
    ////////////////////////////////

    /**
     * Creates a deep copy of the given execution context.
     * For rt_platform=Hadoop, execution context has a symbol table.
     * 
     * @param ec execution context
     * @return execution context
     * @throws CloneNotSupportedException if CloneNotSupportedException occurs
     */
    public static ExecutionContext createDeepCopyExecutionContext(ExecutionContext ec)
            throws CloneNotSupportedException {
        ExecutionContext cpec = ExecutionContextFactory.createContext(false, ec.getProgram());
        cpec.setVariables((LocalVariableMap) ec.getVariables().clone());

        //handle result variables with in-place update flag
        //(each worker requires its own copy of the empty matrix object)
        for (String var : cpec.getVariables().keySet()) {
            Data dat = cpec.getVariables().get(var);
            if (dat instanceof MatrixObject && ((MatrixObject) dat).getUpdateType().isInPlace()) {
                MatrixObject mo = (MatrixObject) dat;
                MatrixObject moNew = new MatrixObject(mo);
                if (mo.getNnz() != 0) {
                    // If output matrix is not empty (NNZ != 0), then local copy is created so that 
                    // update in place operation can be applied.
                    MatrixBlock mbVar = mo.acquireRead();
                    moNew.acquireModify(new MatrixBlock(mbVar));
                    mo.release();
                } else {
                    //create empty matrix block w/ dense representation (preferred for update in-place)
                    //Creating a dense matrix block is valid because empty block not allocated and transfer 
                    // to sparse representation happens in left indexing in place operation.
                    moNew.acquireModify(new MatrixBlock((int) mo.getNumRows(), (int) mo.getNumColumns(), false));
                }
                moNew.release();
                cpec.setVariable(var, moNew);
            }
        }

        return cpec;
    }

    /**
     * This recursively creates a deep copy of program blocks and transparently replaces filenames according to the
     * specified parallel worker in order to avoid conflicts between parworkers. This happens recursively in order
     * to support arbitrary control-flow constructs within a parfor. 
     * 
     * @param childBlocks child program blocks
     * @param pid ?
     * @param IDPrefix ?
     * @param fnStack ?
     * @param fnCreated ?
     * @param plain if true, full deep copy without id replacement
     * @param forceDeepCopy if true, force deep copy
     * @return list of program blocks
     */
    public static ArrayList<ProgramBlock> rcreateDeepCopyProgramBlocks(ArrayList<ProgramBlock> childBlocks,
            long pid, int IDPrefix, HashSet<String> fnStack, HashSet<String> fnCreated, boolean plain,
            boolean forceDeepCopy) {
        ArrayList<ProgramBlock> tmp = new ArrayList<>();

        for (ProgramBlock pb : childBlocks) {
            Program prog = pb.getProgram();
            ProgramBlock tmpPB = null;

            if (pb instanceof WhileProgramBlock) {
                tmpPB = createDeepCopyWhileProgramBlock((WhileProgramBlock) pb, pid, IDPrefix, prog, fnStack,
                        fnCreated, plain, forceDeepCopy);
            } else if (pb instanceof ForProgramBlock && !(pb instanceof ParForProgramBlock)) {
                tmpPB = createDeepCopyForProgramBlock((ForProgramBlock) pb, pid, IDPrefix, prog, fnStack, fnCreated,
                        plain, forceDeepCopy);
            } else if (pb instanceof ParForProgramBlock) {
                ParForProgramBlock pfpb = (ParForProgramBlock) pb;
                if (ParForProgramBlock.ALLOW_NESTED_PARALLELISM)
                    tmpPB = createDeepCopyParForProgramBlock(pfpb, pid, IDPrefix, prog, fnStack, fnCreated, plain,
                            forceDeepCopy);
                else
                    tmpPB = createDeepCopyForProgramBlock((ForProgramBlock) pb, pid, IDPrefix, prog, fnStack,
                            fnCreated, plain, forceDeepCopy);
            } else if (pb instanceof IfProgramBlock) {
                tmpPB = createDeepCopyIfProgramBlock((IfProgramBlock) pb, pid, IDPrefix, prog, fnStack, fnCreated,
                        plain, forceDeepCopy);
            } else { //last-level program block
                tmpPB = new ProgramBlock(prog); // general case use for most PBs

                //for recompile in the master node JVM
                tmpPB.setStatementBlock(
                        createStatementBlockCopy(pb.getStatementBlock(), pid, plain, forceDeepCopy));
                //tmpPB.setStatementBlock(pb.getStatementBlock()); 
                tmpPB.setThreadID(pid);
            }

            //copy instructions
            tmpPB.setInstructions(createDeepCopyInstructionSet(pb.getInstructions(), pid, IDPrefix, prog, fnStack,
                    fnCreated, plain, true));

            //copy symbol table
            //tmpPB.setVariables( pb.getVariables() ); //implicit cloning

            tmp.add(tmpPB);
        }

        return tmp;
    }

    public static WhileProgramBlock createDeepCopyWhileProgramBlock(WhileProgramBlock wpb, long pid, int IDPrefix,
            Program prog, HashSet<String> fnStack, HashSet<String> fnCreated, boolean plain,
            boolean forceDeepCopy) {
        ArrayList<Instruction> predinst = createDeepCopyInstructionSet(wpb.getPredicate(), pid, IDPrefix, prog,
                fnStack, fnCreated, plain, true);
        WhileProgramBlock tmpPB = new WhileProgramBlock(prog, predinst);
        tmpPB.setStatementBlock(createWhileStatementBlockCopy((WhileStatementBlock) wpb.getStatementBlock(), pid,
                plain, forceDeepCopy));
        tmpPB.setThreadID(pid);
        tmpPB.setExitInstructions2(createDeepCopyInstructionSet(wpb.getExitInstructions(), pid, IDPrefix, prog,
                fnStack, fnCreated, plain, true));
        tmpPB.setChildBlocks(rcreateDeepCopyProgramBlocks(wpb.getChildBlocks(), pid, IDPrefix, fnStack, fnCreated,
                plain, forceDeepCopy));
        return tmpPB;
    }

    public static IfProgramBlock createDeepCopyIfProgramBlock(IfProgramBlock ipb, long pid, int IDPrefix,
            Program prog, HashSet<String> fnStack, HashSet<String> fnCreated, boolean plain,
            boolean forceDeepCopy) {
        ArrayList<Instruction> predinst = createDeepCopyInstructionSet(ipb.getPredicate(), pid, IDPrefix, prog,
                fnStack, fnCreated, plain, true);
        IfProgramBlock tmpPB = new IfProgramBlock(prog, predinst);
        tmpPB.setStatementBlock(
                createIfStatementBlockCopy((IfStatementBlock) ipb.getStatementBlock(), pid, plain, forceDeepCopy));
        tmpPB.setThreadID(pid);
        tmpPB.setExitInstructions2(createDeepCopyInstructionSet(ipb.getExitInstructions(), pid, IDPrefix, prog,
                fnStack, fnCreated, plain, true));
        tmpPB.setChildBlocksIfBody(rcreateDeepCopyProgramBlocks(ipb.getChildBlocksIfBody(), pid, IDPrefix, fnStack,
                fnCreated, plain, forceDeepCopy));
        tmpPB.setChildBlocksElseBody(rcreateDeepCopyProgramBlocks(ipb.getChildBlocksElseBody(), pid, IDPrefix,
                fnStack, fnCreated, plain, forceDeepCopy));
        return tmpPB;
    }

    public static ForProgramBlock createDeepCopyForProgramBlock(ForProgramBlock fpb, long pid, int IDPrefix,
            Program prog, HashSet<String> fnStack, HashSet<String> fnCreated, boolean plain,
            boolean forceDeepCopy) {
        ForProgramBlock tmpPB = new ForProgramBlock(prog, fpb.getIterVar());
        tmpPB.setStatementBlock(createForStatementBlockCopy((ForStatementBlock) fpb.getStatementBlock(), pid, plain,
                forceDeepCopy));
        tmpPB.setThreadID(pid);
        tmpPB.setFromInstructions(createDeepCopyInstructionSet(fpb.getFromInstructions(), pid, IDPrefix, prog,
                fnStack, fnCreated, plain, true));
        tmpPB.setToInstructions(createDeepCopyInstructionSet(fpb.getToInstructions(), pid, IDPrefix, prog, fnStack,
                fnCreated, plain, true));
        tmpPB.setIncrementInstructions(createDeepCopyInstructionSet(fpb.getIncrementInstructions(), pid, IDPrefix,
                prog, fnStack, fnCreated, plain, true));
        tmpPB.setExitInstructions(createDeepCopyInstructionSet(fpb.getExitInstructions(), pid, IDPrefix, prog,
                fnStack, fnCreated, plain, true));
        tmpPB.setChildBlocks(rcreateDeepCopyProgramBlocks(fpb.getChildBlocks(), pid, IDPrefix, fnStack, fnCreated,
                plain, forceDeepCopy));
        return tmpPB;
    }

    public static ForProgramBlock createShallowCopyForProgramBlock(ForProgramBlock fpb, Program prog) {
        ForProgramBlock tmpPB = new ForProgramBlock(prog, fpb.getIterVar());
        tmpPB.setFromInstructions(fpb.getFromInstructions());
        tmpPB.setToInstructions(fpb.getToInstructions());
        tmpPB.setIncrementInstructions(fpb.getIncrementInstructions());
        tmpPB.setExitInstructions(fpb.getExitInstructions());
        tmpPB.setChildBlocks(fpb.getChildBlocks());
        return tmpPB;
    }

    public static ParForProgramBlock createDeepCopyParForProgramBlock(ParForProgramBlock pfpb, long pid,
            int IDPrefix, Program prog, HashSet<String> fnStack, HashSet<String> fnCreated, boolean plain,
            boolean forceDeepCopy) {
        ParForProgramBlock tmpPB = null;

        if (IDPrefix == -1) //still on master node
            tmpPB = new ParForProgramBlock(prog, pfpb.getIterVar(), pfpb.getParForParams(),
                    pfpb.getResultVariables());
        else //child of remote ParWorker at any level
            tmpPB = new ParForProgramBlock(IDPrefix, prog, pfpb.getIterVar(), pfpb.getParForParams(),
                    pfpb.getResultVariables());

        tmpPB.setStatementBlock(createForStatementBlockCopy((ForStatementBlock) pfpb.getStatementBlock(), pid,
                plain, forceDeepCopy));
        tmpPB.setThreadID(pid);

        tmpPB.disableOptimization(); //already done in top-level parfor
        tmpPB.disableMonitorReport(); //already done in top-level parfor

        tmpPB.setFromInstructions(createDeepCopyInstructionSet(pfpb.getFromInstructions(), pid, IDPrefix, prog,
                fnStack, fnCreated, plain, true));
        tmpPB.setToInstructions(createDeepCopyInstructionSet(pfpb.getToInstructions(), pid, IDPrefix, prog, fnStack,
                fnCreated, plain, true));
        tmpPB.setIncrementInstructions(createDeepCopyInstructionSet(pfpb.getIncrementInstructions(), pid, IDPrefix,
                prog, fnStack, fnCreated, plain, true));
        tmpPB.setExitInstructions(createDeepCopyInstructionSet(pfpb.getExitInstructions(), pid, IDPrefix, prog,
                fnStack, fnCreated, plain, true));

        //NOTE: Normally, no recursive copy because (1) copied on each execution in this PB anyway 
        //and (2) leave placeholders as they are. However, if plain, an explicit deep copy is requested.
        if (plain || forceDeepCopy)
            tmpPB.setChildBlocks(rcreateDeepCopyProgramBlocks(pfpb.getChildBlocks(), pid, IDPrefix, fnStack,
                    fnCreated, plain, forceDeepCopy));
        else
            tmpPB.setChildBlocks(pfpb.getChildBlocks());

        return tmpPB;
    }

    /**
     * This creates a deep copy of a function program block. The central reference to singletons of function program blocks
     * poses the need for explicit copies in order to prevent conflicting writes of temporary variables (see ExternalFunctionProgramBlock.
     * 
     * @param namespace function namespace
     * @param oldName ?
     * @param pid ?
     * @param IDPrefix ?
     * @param prog runtime program
     * @param fnStack ?
     * @param fnCreated ?
     * @param plain ?
     */
    public static void createDeepCopyFunctionProgramBlock(String namespace, String oldName, long pid, int IDPrefix,
            Program prog, HashSet<String> fnStack, HashSet<String> fnCreated, boolean plain) {
        //fpb guaranteed to be non-null (checked inside getFunctionProgramBlock)
        FunctionProgramBlock fpb = prog.getFunctionProgramBlock(namespace, oldName);
        String fnameNew = (plain) ? oldName : (oldName + Lop.CP_CHILD_THREAD + pid);
        String fnameNewKey = DMLProgram.constructFunctionKey(namespace, fnameNew);

        if (prog.getFunctionProgramBlocks().containsKey(fnameNewKey))
            return; //prevent redundant deep copy if already existent

        //create deep copy
        FunctionProgramBlock copy = null;
        ArrayList<DataIdentifier> tmp1 = new ArrayList<>();
        ArrayList<DataIdentifier> tmp2 = new ArrayList<>();
        if (fpb.getInputParams() != null)
            tmp1.addAll(fpb.getInputParams());
        if (fpb.getOutputParams() != null)
            tmp2.addAll(fpb.getOutputParams());

        if (fpb instanceof ExternalFunctionProgramBlockCP) {
            ExternalFunctionProgramBlockCP efpb = (ExternalFunctionProgramBlockCP) fpb;
            HashMap<String, String> tmp3 = efpb.getOtherParams();
            if (IDPrefix != -1)
                copy = new ExternalFunctionProgramBlockCP(prog, tmp1, tmp2, tmp3, saveReplaceFilenameThreadID(
                        efpb.getBaseDir(), Lop.CP_CHILD_THREAD + IDPrefix, Lop.CP_CHILD_THREAD + pid));
            else
                copy = new ExternalFunctionProgramBlockCP(prog, tmp1, tmp2, tmp3, saveReplaceFilenameThreadID(
                        efpb.getBaseDir(), Lop.CP_ROOT_THREAD_ID, Lop.CP_CHILD_THREAD + pid));
        } else if (fpb instanceof ExternalFunctionProgramBlock) {
            ExternalFunctionProgramBlock efpb = (ExternalFunctionProgramBlock) fpb;
            HashMap<String, String> tmp3 = efpb.getOtherParams();
            if (IDPrefix != -1)
                copy = new ExternalFunctionProgramBlock(prog, tmp1, tmp2, tmp3, saveReplaceFilenameThreadID(
                        efpb.getBaseDir(), Lop.CP_CHILD_THREAD + IDPrefix, Lop.CP_CHILD_THREAD + pid));
            else
                copy = new ExternalFunctionProgramBlock(prog, tmp1, tmp2, tmp3, saveReplaceFilenameThreadID(
                        efpb.getBaseDir(), Lop.CP_ROOT_THREAD_ID, Lop.CP_CHILD_THREAD + pid));
        } else {
            if (!fnStack.contains(fnameNewKey)) {
                fnStack.add(fnameNewKey);
                copy = new FunctionProgramBlock(prog, tmp1, tmp2);
                copy.setChildBlocks(rcreateDeepCopyProgramBlocks(fpb.getChildBlocks(), pid, IDPrefix, fnStack,
                        fnCreated, plain, fpb.isRecompileOnce()));
                copy.setRecompileOnce(fpb.isRecompileOnce());
                copy.setThreadID(pid);
                fnStack.remove(fnameNewKey);
            } else //stop deep copy for recursive function calls
                copy = fpb;
        }

        //copy.setVariables( (LocalVariableMap) fpb.getVariables() ); //implicit cloning
        //note: instructions not used by function program block

        //put 
        prog.addFunctionProgramBlock(namespace, fnameNew, copy);
        fnCreated.add(DMLProgram.constructFunctionKey(namespace, fnameNew));
    }

    public static FunctionProgramBlock createDeepCopyFunctionProgramBlock(FunctionProgramBlock fpb,
            HashSet<String> fnStack, HashSet<String> fnCreated) {
        if (fpb == null)
            throw new DMLRuntimeException("Unable to create a deep copy of a non-existing FunctionProgramBlock.");

        //create deep copy
        FunctionProgramBlock copy = null;
        ArrayList<DataIdentifier> tmp1 = new ArrayList<>();
        ArrayList<DataIdentifier> tmp2 = new ArrayList<>();
        if (fpb.getInputParams() != null)
            tmp1.addAll(fpb.getInputParams());
        if (fpb.getOutputParams() != null)
            tmp2.addAll(fpb.getOutputParams());

        copy = new FunctionProgramBlock(fpb.getProgram(), tmp1, tmp2);
        copy.setChildBlocks(rcreateDeepCopyProgramBlocks(fpb.getChildBlocks(), 0, -1, fnStack, fnCreated, true,
                fpb.isRecompileOnce()));
        copy.setStatementBlock(fpb.getStatementBlock());
        copy.setRecompileOnce(fpb.isRecompileOnce());
        //copy.setVariables( (LocalVariableMap) fpb.getVariables() ); //implicit cloning
        //note: instructions not used by function program block

        return copy;
    }

    /**
     * Creates a deep copy of an array of instructions and replaces the placeholders of parworker
     * IDs with the concrete IDs of this parfor instance. This is a helper method uses for generating
     * deep copies of program blocks.
     * 
     * @param instSet list of instructions
     * @param pid ?
     * @param IDPrefix ?
     * @param prog runtime program
     * @param fnStack ?
     * @param fnCreated ?
     * @param plain ?
     * @param cpFunctions ?
     * @return list of instructions
     */
    public static ArrayList<Instruction> createDeepCopyInstructionSet(ArrayList<Instruction> instSet, long pid,
            int IDPrefix, Program prog, HashSet<String> fnStack, HashSet<String> fnCreated, boolean plain,
            boolean cpFunctions) {
        ArrayList<Instruction> tmp = new ArrayList<>();
        for (Instruction inst : instSet) {
            if (inst instanceof FunctionCallCPInstruction && cpFunctions) {
                FunctionCallCPInstruction finst = (FunctionCallCPInstruction) inst;
                createDeepCopyFunctionProgramBlock(finst.getNamespace(), finst.getFunctionName(), pid, IDPrefix,
                        prog, fnStack, fnCreated, plain);
            }
            tmp.add(cloneInstruction(inst, pid, plain, cpFunctions));
        }
        return tmp;
    }

    public static Instruction cloneInstruction(Instruction oInst, long pid, boolean plain, boolean cpFunctions) {
        Instruction inst = null;
        String tmpString = oInst.toString();

        try {
            if (oInst instanceof CPInstruction || oInst instanceof SPInstruction || oInst instanceof MRInstruction
                    || oInst instanceof GPUInstruction) {
                if (oInst instanceof FunctionCallCPInstruction && cpFunctions) {
                    FunctionCallCPInstruction tmp = (FunctionCallCPInstruction) oInst;
                    if (!plain) {
                        //safe replacement because target variables might include the function name
                        //note: this is no update-in-place in order to keep the original function name as basis
                        tmpString = tmp.updateInstStringFunctionName(tmp.getFunctionName(),
                                tmp.getFunctionName() + Lop.CP_CHILD_THREAD + pid);
                    }
                    //otherwise: preserve function name
                }
                inst = InstructionParser.parseSingleInstruction(tmpString);
            } else if (oInst instanceof MRJobInstruction) {
                //clone via copy constructor
                inst = new MRJobInstruction((MRJobInstruction) oInst);
            } else
                throw new DMLRuntimeException("Failed to clone instruction: " + oInst);
        } catch (Exception ex) {
            throw new DMLRuntimeException(ex);
        }

        //save replacement of thread id references in instructions
        inst = saveReplaceThreadID(inst, Lop.CP_ROOT_THREAD_ID, Lop.CP_CHILD_THREAD + pid);

        return inst;
    }

    public static StatementBlock createStatementBlockCopy(StatementBlock sb, long pid, boolean plain,
            boolean forceDeepCopy) {
        StatementBlock ret = null;

        try {
            if (ConfigurationManager.getCompilerConfigFlag(ConfigType.ALLOW_PARALLEL_DYN_RECOMPILATION)
                    && sb != null //forced deep copy for function recompilation
                    && (Recompiler.requiresRecompilation(sb.getHops()) || forceDeepCopy)) {
                //create new statement (shallow copy livein/liveout for recompile, line numbers for explain)
                ret = new StatementBlock();
                ret.setDMLProg(sb.getDMLProg());
                ret.setParseInfo(sb);
                ret.setLiveIn(sb.liveIn());
                ret.setLiveOut(sb.liveOut());
                ret.setUpdatedVariables(sb.variablesUpdated());
                ret.setReadVariables(sb.variablesRead());

                //deep copy hops dag for concurrent recompile
                ArrayList<Hop> hops = Recompiler.deepCopyHopsDag(sb.getHops());
                if (!plain)
                    Recompiler.updateFunctionNames(hops, pid);
                ret.setHops(hops);
                ret.updateRecompilationFlag();
            } else {
                ret = sb;
            }
        } catch (Exception ex) {
            throw new DMLRuntimeException(ex);
        }

        return ret;
    }

    public static IfStatementBlock createIfStatementBlockCopy(IfStatementBlock sb, long pid, boolean plain,
            boolean forceDeepCopy) {
        IfStatementBlock ret = null;

        try {
            if (ConfigurationManager.getCompilerConfigFlag(ConfigType.ALLOW_PARALLEL_DYN_RECOMPILATION)
                    && sb != null //forced deep copy for function recompile
                    && (Recompiler.requiresRecompilation(sb.getPredicateHops()) || forceDeepCopy)) {
                //create new statement (shallow copy livein/liveout for recompile, line numbers for explain)
                ret = new IfStatementBlock();
                ret.setDMLProg(sb.getDMLProg());
                ret.setParseInfo(sb);
                ret.setLiveIn(sb.liveIn());
                ret.setLiveOut(sb.liveOut());
                ret.setUpdatedVariables(sb.variablesUpdated());
                ret.setReadVariables(sb.variablesRead());

                //shallow copy child statements
                ret.setStatements(sb.getStatements());

                //deep copy predicate hops dag for concurrent recompile
                Hop hops = Recompiler.deepCopyHopsDag(sb.getPredicateHops());
                ret.setPredicateHops(hops);
                ret.updatePredicateRecompilationFlag();
            } else {
                ret = sb;
            }
        } catch (Exception ex) {
            throw new DMLRuntimeException(ex);
        }

        return ret;
    }

    public static WhileStatementBlock createWhileStatementBlockCopy(WhileStatementBlock sb, long pid, boolean plain,
            boolean forceDeepCopy) {
        WhileStatementBlock ret = null;

        try {
            if (ConfigurationManager.getCompilerConfigFlag(ConfigType.ALLOW_PARALLEL_DYN_RECOMPILATION)
                    && sb != null //forced deep copy for function recompile
                    && (Recompiler.requiresRecompilation(sb.getPredicateHops()) || forceDeepCopy)) {
                //create new statement (shallow copy livein/liveout for recompile, line numbers for explain)
                ret = new WhileStatementBlock();
                ret.setDMLProg(sb.getDMLProg());
                ret.setParseInfo(sb);
                ret.setLiveIn(sb.liveIn());
                ret.setLiveOut(sb.liveOut());
                ret.setUpdatedVariables(sb.variablesUpdated());
                ret.setReadVariables(sb.variablesRead());
                ret.setUpdateInPlaceVars(sb.getUpdateInPlaceVars());

                //shallow copy child statements
                ret.setStatements(sb.getStatements());

                //deep copy predicate hops dag for concurrent recompile
                Hop hops = Recompiler.deepCopyHopsDag(sb.getPredicateHops());
                ret.setPredicateHops(hops);
                ret.updatePredicateRecompilationFlag();
            } else {
                ret = sb;
            }
        } catch (Exception ex) {
            throw new DMLRuntimeException(ex);
        }

        return ret;
    }

    public static ForStatementBlock createForStatementBlockCopy(ForStatementBlock sb, long pid, boolean plain,
            boolean forceDeepCopy) {
        ForStatementBlock ret = null;

        try {
            if (ConfigurationManager.getCompilerConfigFlag(ConfigType.ALLOW_PARALLEL_DYN_RECOMPILATION)
                    && sb != null
                    && (Recompiler.requiresRecompilation(sb.getFromHops())
                            || Recompiler.requiresRecompilation(sb.getToHops())
                            || Recompiler.requiresRecompilation(sb.getIncrementHops()) || forceDeepCopy)) {
                ret = (sb instanceof ParForStatementBlock) ? new ParForStatementBlock() : new ForStatementBlock();

                //create new statement (shallow copy livein/liveout for recompile, line numbers for explain)
                ret.setDMLProg(sb.getDMLProg());
                ret.setParseInfo(sb);
                ret.setLiveIn(sb.liveIn());
                ret.setLiveOut(sb.liveOut());
                ret.setUpdatedVariables(sb.variablesUpdated());
                ret.setReadVariables(sb.variablesRead());
                ret.setUpdateInPlaceVars(sb.getUpdateInPlaceVars());

                //shallow copy child statements
                ret.setStatements(sb.getStatements());

                //deep copy predicate hops dag for concurrent recompile
                if (sb.requiresFromRecompilation()) {
                    Hop hops = Recompiler.deepCopyHopsDag(sb.getFromHops());
                    ret.setFromHops(hops);
                }
                if (sb.requiresToRecompilation()) {
                    Hop hops = Recompiler.deepCopyHopsDag(sb.getToHops());
                    ret.setToHops(hops);
                }
                if (sb.requiresIncrementRecompilation()) {
                    Hop hops = Recompiler.deepCopyHopsDag(sb.getIncrementHops());
                    ret.setIncrementHops(hops);
                }
                ret.updatePredicateRecompilationFlags();
            } else {
                ret = sb;
            }
        } catch (Exception ex) {
            throw new DMLRuntimeException(ex);
        }

        return ret;
    }

    ////////////////////////////////
    // SERIALIZATION 
    ////////////////////////////////

    public static String serializeSparkPSBody(SparkPSBody body, HashMap<String, byte[]> clsMap) {

        ExecutionContext ec = body.getEc();
        StringBuilder builder = new StringBuilder();
        builder.append(PSBODY_BEGIN);
        builder.append(NEWLINE);

        //handle DMLScript UUID (propagate original uuid for writing to scratch space)
        builder.append(DMLScript.getUUID());
        builder.append(COMPONENTS_DELIM);
        builder.append(NEWLINE);

        //handle DML config
        builder.append(ConfigurationManager.getDMLConfig().serializeDMLConfig());
        builder.append(COMPONENTS_DELIM);
        builder.append(NEWLINE);

        //handle additional configurations
        builder.append(CONF_STATS + "=" + ConfigurationManager.isStatistics());
        builder.append(COMPONENTS_DELIM);
        builder.append(NEWLINE);

        //handle program
        builder.append(PROG_BEGIN);
        builder.append(NEWLINE);
        builder.append(rSerializeFunctionProgramBlocks(ec.getProgram().getFunctionProgramBlocks(),
                new HashSet<>(ec.getProgram().getFunctionProgramBlocks().keySet()), clsMap));
        builder.append(PROG_END);
        builder.append(NEWLINE);
        builder.append(COMPONENTS_DELIM);
        builder.append(NEWLINE);

        //handle execution context
        builder.append(EC_BEGIN);
        builder.append(serializeExecutionContext(ec));
        builder.append(EC_END);
        builder.append(NEWLINE);
        builder.append(COMPONENTS_DELIM);
        builder.append(NEWLINE);

        //handle program blocks
        builder.append(PBS_BEGIN);
        builder.append(NEWLINE);
        builder.append(rSerializeProgramBlocks(ec.getProgram().getProgramBlocks(), clsMap));
        builder.append(PBS_END);
        builder.append(NEWLINE);
        builder.append(COMPONENTS_DELIM);
        builder.append(NEWLINE);

        builder.append(PSBODY_END);
        return builder.toString();
    }

    public static String serializeParForBody(ParForBody body) {
        return serializeParForBody(body, new HashMap<String, byte[]>());
    }

    public static String serializeParForBody(ParForBody body, HashMap<String, byte[]> clsMap) {
        ArrayList<ProgramBlock> pbs = body.getChildBlocks();
        ArrayList<ResultVar> rVnames = body.getResultVariables();
        ExecutionContext ec = body.getEc();

        if (pbs.isEmpty())
            return PARFORBODY_BEGIN + PARFORBODY_END;

        Program prog = pbs.get(0).getProgram();

        StringBuilder sb = new StringBuilder();
        sb.append(PARFORBODY_BEGIN);
        sb.append(NEWLINE);

        //handle DMLScript UUID (propagate original uuid for writing to scratch space)
        sb.append(DMLScript.getUUID());
        sb.append(COMPONENTS_DELIM);
        sb.append(NEWLINE);

        //handle DML config
        sb.append(ConfigurationManager.getDMLConfig().serializeDMLConfig());
        sb.append(COMPONENTS_DELIM);
        sb.append(NEWLINE);

        //handle additional configurations
        sb.append(CONF_STATS + "=" + ConfigurationManager.isStatistics());
        sb.append(COMPONENTS_DELIM);
        sb.append(NEWLINE);

        //handle program
        sb.append(PROG_BEGIN);
        sb.append(NEWLINE);
        sb.append(serializeProgram(prog, pbs, clsMap));
        sb.append(PROG_END);
        sb.append(NEWLINE);
        sb.append(COMPONENTS_DELIM);
        sb.append(NEWLINE);

        //handle result variable names
        sb.append(serializeResultVariables(rVnames));
        sb.append(COMPONENTS_DELIM);

        //handle execution context
        //note: this includes also the symbol table (serialize only the top-level variable map,
        //      (symbol tables for nested/child blocks are created at parse time, on the remote side)
        sb.append(EC_BEGIN);
        sb.append(serializeExecutionContext(ec));
        sb.append(EC_END);
        sb.append(NEWLINE);
        sb.append(COMPONENTS_DELIM);
        sb.append(NEWLINE);

        //handle program blocks
        sb.append(PBS_BEGIN);
        sb.append(NEWLINE);
        sb.append(rSerializeProgramBlocks(pbs, clsMap));
        sb.append(PBS_END);
        sb.append(NEWLINE);

        sb.append(PARFORBODY_END);

        return sb.toString();
    }

    private static String serializeProgram(Program prog, ArrayList<ProgramBlock> pbs,
            HashMap<String, byte[]> clsMap) {
        //note program contains variables, programblocks and function program blocks 
        //but in order to avoid redundancy, we only serialize function program blocks
        HashMap<String, FunctionProgramBlock> fpb = prog.getFunctionProgramBlocks();
        HashSet<String> cand = new HashSet<>();
        rFindSerializationCandidates(pbs, cand);
        return rSerializeFunctionProgramBlocks(fpb, cand, clsMap);
    }

    private static void rFindSerializationCandidates(ArrayList<ProgramBlock> pbs, HashSet<String> cand) {
        for (ProgramBlock pb : pbs) {
            if (pb instanceof WhileProgramBlock) {
                WhileProgramBlock wpb = (WhileProgramBlock) pb;
                rFindSerializationCandidates(wpb.getChildBlocks(), cand);
            } else if (pb instanceof ForProgramBlock || pb instanceof ParForProgramBlock) {
                ForProgramBlock fpb = (ForProgramBlock) pb;
                rFindSerializationCandidates(fpb.getChildBlocks(), cand);
            } else if (pb instanceof IfProgramBlock) {
                IfProgramBlock ipb = (IfProgramBlock) pb;
                rFindSerializationCandidates(ipb.getChildBlocksIfBody(), cand);
                if (ipb.getChildBlocksElseBody() != null)
                    rFindSerializationCandidates(ipb.getChildBlocksElseBody(), cand);
            } else { //all generic program blocks
                for (Instruction inst : pb.getInstructions())
                    if (inst instanceof FunctionCallCPInstruction) {
                        FunctionCallCPInstruction fci = (FunctionCallCPInstruction) inst;
                        String fkey = DMLProgram.constructFunctionKey(fci.getNamespace(), fci.getFunctionName());
                        if (!cand.contains(fkey)) { //memoization for multiple calls, recursion
                            cand.add(fkey); //add to candidates
                            //investigate chains of function calls
                            FunctionProgramBlock fpb = pb.getProgram().getFunctionProgramBlock(fci.getNamespace(),
                                    fci.getFunctionName());
                            rFindSerializationCandidates(fpb.getChildBlocks(), cand);
                        }
                    }
            }
        }
    }

    private static String serializeVariables(LocalVariableMap vars) {
        StringBuilder sb = new StringBuilder();
        sb.append(VARS_BEGIN);
        sb.append(vars.serialize());
        sb.append(VARS_END);
        return sb.toString();
    }

    public static String serializeDataObject(String key, Data dat) {
        // SCHEMA: <name>|<datatype>|<valuetype>|value
        // (scalars are serialize by value, matrices by filename)
        StringBuilder sb = new StringBuilder();
        //prepare data for serialization
        String name = key;
        DataType datatype = dat.getDataType();
        ValueType valuetype = dat.getValueType();
        String value = null;
        String[] metaData = null;
        String[] listData = null;
        switch (datatype) {
        case SCALAR:
            ScalarObject so = (ScalarObject) dat;
            //name = so.getName();
            value = so.getStringValue();
            break;
        case MATRIX:
            MatrixObject mo = (MatrixObject) dat;
            MetaDataFormat md = (MetaDataFormat) dat.getMetaData();
            MatrixCharacteristics mc = md.getMatrixCharacteristics();
            value = mo.getFileName();
            PartitionFormat partFormat = (mo.getPartitionFormat() != null)
                    ? new PartitionFormat(mo.getPartitionFormat(), mo.getPartitionSize())
                    : PartitionFormat.NONE;
            metaData = new String[11];
            metaData[0] = String.valueOf(mc.getRows());
            metaData[1] = String.valueOf(mc.getCols());
            metaData[2] = String.valueOf(mc.getRowsPerBlock());
            metaData[3] = String.valueOf(mc.getColsPerBlock());
            metaData[4] = String.valueOf(mc.getNonZeros());
            metaData[5] = InputInfo.inputInfoToString(md.getInputInfo());
            metaData[6] = OutputInfo.outputInfoToString(md.getOutputInfo());
            metaData[7] = String.valueOf(partFormat);
            metaData[8] = String.valueOf(mo.getUpdateType());
            metaData[9] = String.valueOf(mo.isHDFSFileExists());
            metaData[10] = String.valueOf(mo.isCleanupEnabled());
            break;
        case LIST:
            // SCHEMA: <name>|<datatype>|<valuetype>|value|<metadata>|<tab>element1<tab>element2<tab>element3 (this is the list)
            //         (for the element1) <listName-index>|<datatype>|<valuetype>|value
            //         (for the element2) <listName-index>|<datatype>|<valuetype>|value
            ListObject lo = (ListObject) dat;
            value = REF;
            metaData = new String[2];
            metaData[0] = String.valueOf(lo.getLength());
            metaData[1] = lo.getNames() == null ? EMPTY : serializeList(lo.getNames(), ELEMENT_DELIM2);
            listData = new String[lo.getLength()];
            for (int index = 0; index < lo.getLength(); index++) {
                listData[index] = serializeDataObject(name + DASH + index, lo.slice(index));
            }
            break;
        default:
            throw new DMLRuntimeException("Unable to serialize datatype " + datatype);
        }

        //serialize data
        sb.append(name);
        sb.append(DATA_FIELD_DELIM);
        sb.append(datatype);
        sb.append(DATA_FIELD_DELIM);
        sb.append(valuetype);
        sb.append(DATA_FIELD_DELIM);
        sb.append(value);
        if (metaData != null)
            for (int i = 0; i < metaData.length; i++) {
                sb.append(DATA_FIELD_DELIM);
                sb.append(metaData[i]);
            }
        if (listData != null) {
            sb.append(DATA_FIELD_DELIM);
            for (String ld : listData) {
                sb.append(LIST_ELEMENT_DELIM);
                sb.append(ld);
            }
        }

        return sb.toString();
    }

    private static String serializeExecutionContext(ExecutionContext ec) {
        return (ec != null) ? serializeVariables(ec.getVariables()) : EMPTY;
    }

    @SuppressWarnings("all")
    private static String serializeInstructions(ArrayList<Instruction> inst, HashMap<String, byte[]> clsMap) {
        StringBuilder sb = new StringBuilder();
        int count = 0;
        for (Instruction linst : inst) {
            //check that only cp instruction are transmitted 
            if (!(linst instanceof CPInstruction || linst instanceof ExternalFunctionInvocationInstruction))
                throw new DMLRuntimeException(
                        NOT_SUPPORTED_MR_INSTRUCTION + " " + linst.getClass().getName() + "\n" + linst);

            //obtain serialized version of generated classes
            if (linst instanceof SpoofCPInstruction) {
                Class<?> cla = ((SpoofCPInstruction) linst).getOperatorClass();
                clsMap.put(cla.getName(), CodegenUtils.getClassData(cla.getName()));
            }

            if (count > 0)
                sb.append(ELEMENT_DELIM);

            sb.append(checkAndReplaceLiterals(linst.toString()));
            count++;
        }

        return sb.toString();
    }

    /**
     * Replacement of internal delimiters occurring in literals of instructions
     * in order to ensure robustness of serialization and parsing.
     * (e.g. print( "a,b" ) would break the parsing of instruction that internally
     * are separated with a "," )
     * 
     * @param instStr instruction string
     * @return instruction string with replacements
     */
    private static String checkAndReplaceLiterals(String instStr) {
        String tmp = instStr;

        //1) check own delimiters (very unlikely due to special characters)
        if (tmp.contains(COMPONENTS_DELIM)) {
            tmp = tmp.replaceAll(COMPONENTS_DELIM, ".");
            LOG.warn("Replaced special literal character sequence " + COMPONENTS_DELIM + " with '.'");
        }

        if (tmp.contains(ELEMENT_DELIM)) {
            tmp = tmp.replaceAll(ELEMENT_DELIM, ".");
            LOG.warn("Replaced special literal character sequence " + ELEMENT_DELIM + " with '.'");
        }

        if (tmp.contains(LEVELIN)) {
            tmp = tmp.replaceAll(LEVELIN, "("); // '\\' required if LEVELIN='{' because regex
            LOG.warn("Replaced special literal character sequence " + LEVELIN + " with '('");
        }

        if (tmp.contains(LEVELOUT)) {
            tmp = tmp.replaceAll(LEVELOUT, ")");
            LOG.warn("Replaced special literal character sequence " + LEVELOUT + " with ')'");
        }

        //NOTE: DATA_FIELD_DELIM and KEY_VALUE_DELIM not required
        //because those literals cannot occur in critical places.

        //2) check end tag of CDATA
        if (tmp.contains(CDATA_END)) {
            tmp = tmp.replaceAll(CDATA_END, "."); //prevent XML parsing issues in job.xml
            LOG.warn("Replaced special literal character sequence " + CDATA_END + " with '.'");
        }

        return tmp;
    }

    private static String serializeStringHashMap(HashMap<String, String> vars) {
        return serializeList(vars.entrySet().stream().map(e -> e.getKey() + KEY_VALUE_DELIM + e.getValue())
                .collect(Collectors.toList()));
    }

    public static String serializeResultVariables(List<ResultVar> vars) {
        return serializeList(
                vars.stream().map(v -> v._isAccum ? v._name + "+" : v._name).collect(Collectors.toList()));
    }

    public static String serializeList(List<String> elements) {
        return serializeList(elements, ELEMENT_DELIM);
    }

    public static String serializeList(List<String> elements, String delim) {
        return StringUtils.join(elements, delim);
    }

    private static String serializeDataIdentifiers(List<DataIdentifier> vars) {
        return serializeList(vars.stream().map(v -> serializeDataIdentifier(v)).collect(Collectors.toList()));
    }

    private static String serializeDataIdentifier(DataIdentifier dat) {
        // SCHEMA: <name>|<datatype>|<valuetype>
        StringBuilder sb = new StringBuilder();
        sb.append(dat.getName());
        sb.append(DATA_FIELD_DELIM);
        sb.append(dat.getDataType());
        sb.append(DATA_FIELD_DELIM);
        sb.append(dat.getValueType());
        return sb.toString();
    }

    private static String rSerializeFunctionProgramBlocks(HashMap<String, FunctionProgramBlock> pbs,
            HashSet<String> cand, HashMap<String, byte[]> clsMap) {
        StringBuilder sb = new StringBuilder();
        int count = 0;
        for (Entry<String, FunctionProgramBlock> pb : pbs.entrySet()) {
            if (!cand.contains(pb.getKey())) //skip function not included in the parfor body
                continue;
            if (count > 0) {
                sb.append(ELEMENT_DELIM);
                sb.append(NEWLINE);
            }
            sb.append(pb.getKey());
            sb.append(KEY_VALUE_DELIM);
            sb.append(rSerializeProgramBlock(pb.getValue(), clsMap));
            count++;
        }
        sb.append(NEWLINE);
        return sb.toString();
    }

    private static String rSerializeProgramBlocks(ArrayList<ProgramBlock> pbs, HashMap<String, byte[]> clsMap) {
        StringBuilder sb = new StringBuilder();
        int count = 0;
        for (ProgramBlock pb : pbs) {
            if (count > 0) {
                sb.append(ELEMENT_DELIM);
                sb.append(NEWLINE);
            }
            sb.append(rSerializeProgramBlock(pb, clsMap));
            count++;
        }
        return sb.toString();
    }

    private static String rSerializeProgramBlock(ProgramBlock pb, HashMap<String, byte[]> clsMap) {
        StringBuilder sb = new StringBuilder();

        //handle header
        if (pb instanceof WhileProgramBlock)
            sb.append(PB_WHILE);
        else if (pb instanceof ForProgramBlock && !(pb instanceof ParForProgramBlock))
            sb.append(PB_FOR);
        else if (pb instanceof ParForProgramBlock)
            sb.append(PB_PARFOR);
        else if (pb instanceof IfProgramBlock)
            sb.append(PB_IF);
        else if (pb instanceof FunctionProgramBlock && !(pb instanceof ExternalFunctionProgramBlock))
            sb.append(PB_FC);
        else if (pb instanceof ExternalFunctionProgramBlock)
            sb.append(PB_EFC);
        else //all generic program blocks
            sb.append(PB_BEGIN);

        //handle body
        if (pb instanceof WhileProgramBlock) {
            WhileProgramBlock wpb = (WhileProgramBlock) pb;
            sb.append(INST_BEGIN);
            sb.append(serializeInstructions(wpb.getPredicate(), clsMap));
            sb.append(INST_END);
            sb.append(COMPONENTS_DELIM);
            sb.append(INST_BEGIN);
            sb.append(serializeInstructions(wpb.getExitInstructions(), clsMap));
            sb.append(INST_END);
            sb.append(COMPONENTS_DELIM);
            sb.append(PBS_BEGIN);
            sb.append(rSerializeProgramBlocks(wpb.getChildBlocks(), clsMap));
            sb.append(PBS_END);
        } else if (pb instanceof ForProgramBlock && !(pb instanceof ParForProgramBlock)) {
            ForProgramBlock fpb = (ForProgramBlock) pb;
            sb.append(fpb.getIterVar());
            sb.append(COMPONENTS_DELIM);
            sb.append(INST_BEGIN);
            sb.append(serializeInstructions(fpb.getFromInstructions(), clsMap));
            sb.append(INST_END);
            sb.append(COMPONENTS_DELIM);
            sb.append(INST_BEGIN);
            sb.append(serializeInstructions(fpb.getToInstructions(), clsMap));
            sb.append(INST_END);
            sb.append(COMPONENTS_DELIM);
            sb.append(INST_BEGIN);
            sb.append(serializeInstructions(fpb.getIncrementInstructions(), clsMap));
            sb.append(INST_END);
            sb.append(COMPONENTS_DELIM);
            sb.append(INST_BEGIN);
            sb.append(serializeInstructions(fpb.getExitInstructions(), clsMap));
            sb.append(INST_END);
            sb.append(COMPONENTS_DELIM);
            sb.append(PBS_BEGIN);
            sb.append(rSerializeProgramBlocks(fpb.getChildBlocks(), clsMap));
            sb.append(PBS_END);
        } else if (pb instanceof ParForProgramBlock) {
            ParForProgramBlock pfpb = (ParForProgramBlock) pb;

            //check for nested remote ParFOR
            if (PExecMode
                    .valueOf(pfpb.getParForParams().get(ParForStatementBlock.EXEC_MODE)) == PExecMode.REMOTE_MR)
                throw new DMLRuntimeException(NOT_SUPPORTED_MR_PARFOR);

            sb.append(pfpb.getIterVar());
            sb.append(COMPONENTS_DELIM);
            sb.append(serializeResultVariables(pfpb.getResultVariables()));
            sb.append(COMPONENTS_DELIM);
            sb.append(serializeStringHashMap(pfpb.getParForParams())); //parameters of nested parfor
            sb.append(COMPONENTS_DELIM);
            sb.append(INST_BEGIN);
            sb.append(serializeInstructions(pfpb.getFromInstructions(), clsMap));
            sb.append(INST_END);
            sb.append(COMPONENTS_DELIM);
            sb.append(INST_BEGIN);
            sb.append(serializeInstructions(pfpb.getToInstructions(), clsMap));
            sb.append(INST_END);
            sb.append(COMPONENTS_DELIM);
            sb.append(INST_BEGIN);
            sb.append(serializeInstructions(pfpb.getIncrementInstructions(), clsMap));
            sb.append(INST_END);
            sb.append(COMPONENTS_DELIM);
            sb.append(INST_BEGIN);
            sb.append(serializeInstructions(pfpb.getExitInstructions(), clsMap));
            sb.append(INST_END);
            sb.append(COMPONENTS_DELIM);
            sb.append(PBS_BEGIN);
            sb.append(rSerializeProgramBlocks(pfpb.getChildBlocks(), clsMap));
            sb.append(PBS_END);
        } else if (pb instanceof IfProgramBlock) {
            IfProgramBlock ipb = (IfProgramBlock) pb;
            sb.append(INST_BEGIN);
            sb.append(serializeInstructions(ipb.getPredicate(), clsMap));
            sb.append(INST_END);
            sb.append(COMPONENTS_DELIM);
            sb.append(INST_BEGIN);
            sb.append(serializeInstructions(ipb.getExitInstructions(), clsMap));
            sb.append(INST_END);
            sb.append(COMPONENTS_DELIM);
            sb.append(PBS_BEGIN);
            sb.append(rSerializeProgramBlocks(ipb.getChildBlocksIfBody(), clsMap));
            sb.append(PBS_END);
            sb.append(COMPONENTS_DELIM);
            sb.append(PBS_BEGIN);
            sb.append(rSerializeProgramBlocks(ipb.getChildBlocksElseBody(), clsMap));
            sb.append(PBS_END);
        } else if (pb instanceof FunctionProgramBlock && !(pb instanceof ExternalFunctionProgramBlock)) {
            FunctionProgramBlock fpb = (FunctionProgramBlock) pb;

            sb.append(serializeDataIdentifiers(fpb.getInputParams()));
            sb.append(COMPONENTS_DELIM);
            sb.append(serializeDataIdentifiers(fpb.getOutputParams()));
            sb.append(COMPONENTS_DELIM);
            sb.append(INST_BEGIN);
            sb.append(serializeInstructions(fpb.getInstructions(), clsMap));
            sb.append(INST_END);
            sb.append(COMPONENTS_DELIM);
            sb.append(PBS_BEGIN);
            sb.append(rSerializeProgramBlocks(fpb.getChildBlocks(), clsMap));
            sb.append(PBS_END);
            sb.append(COMPONENTS_DELIM);
        } else if (pb instanceof ExternalFunctionProgramBlock) {
            if (!(pb instanceof ExternalFunctionProgramBlockCP)) {
                throw new DMLRuntimeException(NOT_SUPPORTED_EXTERNALFUNCTION_PB);
            }

            ExternalFunctionProgramBlockCP fpb = (ExternalFunctionProgramBlockCP) pb;

            sb.append(serializeDataIdentifiers(fpb.getInputParams()));
            sb.append(COMPONENTS_DELIM);
            sb.append(serializeDataIdentifiers(fpb.getOutputParams()));
            sb.append(COMPONENTS_DELIM);
            sb.append(serializeStringHashMap(fpb.getOtherParams()));
            sb.append(COMPONENTS_DELIM);
            sb.append(fpb.getBaseDir());
            sb.append(COMPONENTS_DELIM);

            sb.append(INST_BEGIN);
            //create on construction anyway 
            sb.append(INST_END);
            sb.append(COMPONENTS_DELIM);
            sb.append(PBS_BEGIN);
            sb.append(rSerializeProgramBlocks(fpb.getChildBlocks(), clsMap));
            sb.append(PBS_END);
        } else //all generic program blocks
        {
            sb.append(INST_BEGIN);
            sb.append(serializeInstructions(pb.getInstructions(), clsMap));
            sb.append(INST_END);
        }

        //handle end
        sb.append(PB_END);

        return sb.toString();
    }

    ////////////////////////////////
    // PARSING 
    ////////////////////////////////

    public static SparkPSBody parseSparkPSBody(String in, int id) {
        SparkPSBody body = new SparkPSBody();

        //header elimination
        String tmpin = in.replaceAll(NEWLINE, ""); //normalization
        tmpin = tmpin.substring(PSBODY_BEGIN.length(), tmpin.length() - PSBODY_END.length()); //remove start/end
        HierarchyAwareStringTokenizer st = new HierarchyAwareStringTokenizer(tmpin, COMPONENTS_DELIM);

        //handle DMLScript UUID (NOTE: set directly in DMLScript)
        //(master UUID is used for all nodes (in order to simply cleanup))
        DMLScript.setUUID(st.nextToken());

        //handle DML config (NOTE: set directly in ConfigurationManager)
        handleDMLConfig(st.nextToken());

        //handle additional configs
        parseAndSetAdditionalConfigurations(st.nextToken());

        //handle program
        Program prog = parseProgram(st.nextToken(), id);

        //handle execution context
        ExecutionContext ec = parseExecutionContext(st.nextToken(), prog);
        ec.setProgram(prog);

        //handle program blocks
        String spbs = st.nextToken();
        ArrayList<ProgramBlock> pbs = rParseProgramBlocks(spbs, prog, id);
        prog.getProgramBlocks().addAll(pbs);

        body.setEc(ec);
        return body;
    }

    public static ParForBody parseParForBody(String in, int id) {
        return parseParForBody(in, id, false);
    }

    public static ParForBody parseParForBody(String in, int id, boolean inSpark) {
        ParForBody body = new ParForBody();

        //header elimination
        String tmpin = in.replaceAll(NEWLINE, ""); //normalization
        tmpin = tmpin.substring(PARFORBODY_BEGIN.length(), tmpin.length() - PARFORBODY_END.length()); //remove start/end
        HierarchyAwareStringTokenizer st = new HierarchyAwareStringTokenizer(tmpin, COMPONENTS_DELIM);

        //handle DMLScript UUID (NOTE: set directly in DMLScript)
        //(master UUID is used for all nodes (in order to simply cleanup))
        DMLScript.setUUID(st.nextToken());

        //handle DML config (NOTE: set directly in ConfigurationManager)
        String confStr = st.nextToken();
        JobConf job = ConfigurationManager.getCachedJobConf();
        if (!InfrastructureAnalyzer.isLocalMode(job)) {
            handleDMLConfig(confStr);
            //init internal configuration w/ parsed or default config
            ParForProgramBlock.initInternalConfigurations(ConfigurationManager.getDMLConfig());
        }

        //handle additional configs
        String aconfs = st.nextToken();
        if (!inSpark)
            parseAndSetAdditionalConfigurations(aconfs);

        //handle program
        String progStr = st.nextToken();
        Program prog = parseProgram(progStr, id);

        //handle result variable names
        String rvarStr = st.nextToken();
        ArrayList<ResultVar> rvars = parseResultVariables(rvarStr);
        body.setResultVariables(rvars);

        //handle execution context
        String ecStr = st.nextToken();
        ExecutionContext ec = parseExecutionContext(ecStr, prog);

        //handle program blocks
        String spbs = st.nextToken();
        ArrayList<ProgramBlock> pbs = rParseProgramBlocks(spbs, prog, id);

        body.setChildBlocks(pbs);
        body.setEc(ec);

        return body;
    }

    private static void handleDMLConfig(String confStr) {
        if (confStr != null && !confStr.trim().isEmpty()) {
            DMLConfig dmlconf = DMLConfig.parseDMLConfig(confStr);
            CompilerConfig cconf = OptimizerUtils.constructCompilerConfig(dmlconf);
            ConfigurationManager.setLocalConfig(dmlconf);
            ConfigurationManager.setLocalConfig(cconf);
        }
    }

    public static Program parseProgram(String in, int id) {
        String lin = in.substring(PROG_BEGIN.length(), in.length() - PROG_END.length()).trim();
        Program prog = new Program();
        HashMap<String, FunctionProgramBlock> fc = parseFunctionProgramBlocks(lin, prog, id);
        for (Entry<String, FunctionProgramBlock> e : fc.entrySet()) {
            String[] keypart = e.getKey().split(Program.KEY_DELIM);
            String namespace = keypart[0];
            String name = keypart[1];
            prog.addFunctionProgramBlock(namespace, name, e.getValue());
        }
        return prog;
    }

    private static LocalVariableMap parseVariables(String in) {
        LocalVariableMap ret = null;
        if (in.length() > VARS_BEGIN.length() + VARS_END.length()) {
            String varStr = in.substring(VARS_BEGIN.length(), in.length() - VARS_END.length()).trim();
            ret = LocalVariableMap.deserialize(varStr);
        } else { //empty input symbol table
            ret = new LocalVariableMap();
        }
        return ret;
    }

    private static HashMap<String, FunctionProgramBlock> parseFunctionProgramBlocks(String in, Program prog,
            int id) {
        HashMap<String, FunctionProgramBlock> ret = new HashMap<>();
        HierarchyAwareStringTokenizer st = new HierarchyAwareStringTokenizer(in, ELEMENT_DELIM);
        while (st.hasMoreTokens()) {
            String lvar = st.nextToken(); //with ID = CP_CHILD_THREAD+id for current use
            //put first copy into prog (for direct use)
            int index = lvar.indexOf(KEY_VALUE_DELIM);
            String tmp1 = lvar.substring(0, index); // + CP_CHILD_THREAD+id;
            String tmp2 = lvar.substring(index + 1);
            ret.put(tmp1, (FunctionProgramBlock) rParseProgramBlock(tmp2, prog, id));
        }
        return ret;
    }

    private static ArrayList<ProgramBlock> rParseProgramBlocks(String in, Program prog, int id) {
        ArrayList<ProgramBlock> pbs = new ArrayList<>();
        String tmpdata = in.substring(PBS_BEGIN.length(), in.length() - PBS_END.length());
        HierarchyAwareStringTokenizer st = new HierarchyAwareStringTokenizer(tmpdata, ELEMENT_DELIM);
        while (st.hasMoreTokens())
            pbs.add(rParseProgramBlock(st.nextToken(), prog, id));
        return pbs;
    }

    private static ProgramBlock rParseProgramBlock(String in, Program prog, int id) {
        ProgramBlock pb = null;
        if (in.startsWith(PB_WHILE))
            pb = rParseWhileProgramBlock(in, prog, id);
        else if (in.startsWith(PB_FOR))
            pb = rParseForProgramBlock(in, prog, id);
        else if (in.startsWith(PB_PARFOR))
            pb = rParseParForProgramBlock(in, prog, id);
        else if (in.startsWith(PB_IF))
            pb = rParseIfProgramBlock(in, prog, id);
        else if (in.startsWith(PB_FC))
            pb = rParseFunctionProgramBlock(in, prog, id);
        else if (in.startsWith(PB_EFC))
            pb = rParseExternalFunctionProgramBlock(in, prog, id);
        else if (in.startsWith(PB_BEGIN))
            pb = rParseGenericProgramBlock(in, prog, id);
        else
            throw new DMLRuntimeException(NOT_SUPPORTED_PB + " " + in);
        return pb;
    }

    private static WhileProgramBlock rParseWhileProgramBlock(String in, Program prog, int id) {
        String lin = in.substring(PB_WHILE.length(), in.length() - PB_END.length());
        HierarchyAwareStringTokenizer st = new HierarchyAwareStringTokenizer(lin, COMPONENTS_DELIM);

        //predicate instructions
        ArrayList<Instruction> inst = parseInstructions(st.nextToken(), id);

        //exit instructions
        ArrayList<Instruction> exit = parseInstructions(st.nextToken(), id);

        //program blocks
        ArrayList<ProgramBlock> pbs = rParseProgramBlocks(st.nextToken(), prog, id);

        WhileProgramBlock wpb = new WhileProgramBlock(prog, inst);
        wpb.setExitInstructions2(exit);
        wpb.setChildBlocks(pbs);

        return wpb;
    }

    private static ForProgramBlock rParseForProgramBlock(String in, Program prog, int id) {
        String lin = in.substring(PB_FOR.length(), in.length() - PB_END.length());
        HierarchyAwareStringTokenizer st = new HierarchyAwareStringTokenizer(lin, COMPONENTS_DELIM);

        //inputs
        String iterVar = st.nextToken();

        //instructions
        ArrayList<Instruction> from = parseInstructions(st.nextToken(), id);
        ArrayList<Instruction> to = parseInstructions(st.nextToken(), id);
        ArrayList<Instruction> incr = parseInstructions(st.nextToken(), id);

        //exit instructions
        ArrayList<Instruction> exit = parseInstructions(st.nextToken(), id);

        //program blocks
        ArrayList<ProgramBlock> pbs = rParseProgramBlocks(st.nextToken(), prog, id);

        ForProgramBlock fpb = new ForProgramBlock(prog, iterVar);
        fpb.setFromInstructions(from);
        fpb.setToInstructions(to);
        fpb.setIncrementInstructions(incr);
        fpb.setExitInstructions(exit);
        fpb.setChildBlocks(pbs);

        return fpb;
    }

    private static ParForProgramBlock rParseParForProgramBlock(String in, Program prog, int id) {
        String lin = in.substring(PB_PARFOR.length(), in.length() - PB_END.length());
        HierarchyAwareStringTokenizer st = new HierarchyAwareStringTokenizer(lin, COMPONENTS_DELIM);

        //inputs
        String iterVar = st.nextToken();
        ArrayList<ResultVar> resultVars = parseResultVariables(st.nextToken());
        HashMap<String, String> params = parseStringHashMap(st.nextToken());

        //instructions 
        ArrayList<Instruction> from = parseInstructions(st.nextToken(), 0);
        ArrayList<Instruction> to = parseInstructions(st.nextToken(), 0);
        ArrayList<Instruction> incr = parseInstructions(st.nextToken(), 0);

        //exit instructions
        ArrayList<Instruction> exit = parseInstructions(st.nextToken(), 0);

        //program blocks //reset id to preinit state, replaced during exec
        ArrayList<ProgramBlock> pbs = rParseProgramBlocks(st.nextToken(), prog, 0);

        ParForProgramBlock pfpb = new ParForProgramBlock(id, prog, iterVar, params, resultVars);
        pfpb.disableOptimization(); //already done in top-level parfor
        pfpb.setFromInstructions(from);
        pfpb.setToInstructions(to);
        pfpb.setIncrementInstructions(incr);
        pfpb.setExitInstructions(exit);
        pfpb.setChildBlocks(pbs);

        return pfpb;
    }

    private static IfProgramBlock rParseIfProgramBlock(String in, Program prog, int id) {
        String lin = in.substring(PB_IF.length(), in.length() - PB_END.length());
        HierarchyAwareStringTokenizer st = new HierarchyAwareStringTokenizer(lin, COMPONENTS_DELIM);

        //predicate instructions
        ArrayList<Instruction> inst = parseInstructions(st.nextToken(), id);

        //exit instructions
        ArrayList<Instruction> exit = parseInstructions(st.nextToken(), id);

        //program blocks: if and else
        ArrayList<ProgramBlock> pbs1 = rParseProgramBlocks(st.nextToken(), prog, id);
        ArrayList<ProgramBlock> pbs2 = rParseProgramBlocks(st.nextToken(), prog, id);

        IfProgramBlock ipb = new IfProgramBlock(prog, inst);
        ipb.setExitInstructions2(exit);
        ipb.setChildBlocksIfBody(pbs1);
        ipb.setChildBlocksElseBody(pbs2);

        return ipb;
    }

    private static FunctionProgramBlock rParseFunctionProgramBlock(String in, Program prog, int id) {
        String lin = in.substring(PB_FC.length(), in.length() - PB_END.length());
        HierarchyAwareStringTokenizer st = new HierarchyAwareStringTokenizer(lin, COMPONENTS_DELIM);

        //inputs and outputs
        ArrayList<DataIdentifier> dat1 = parseDataIdentifiers(st.nextToken());
        ArrayList<DataIdentifier> dat2 = parseDataIdentifiers(st.nextToken());

        //instructions
        ArrayList<Instruction> inst = parseInstructions(st.nextToken(), id);

        //program blocks
        ArrayList<ProgramBlock> pbs = rParseProgramBlocks(st.nextToken(), prog, id);

        ArrayList<DataIdentifier> tmp1 = new ArrayList<>(dat1);
        ArrayList<DataIdentifier> tmp2 = new ArrayList<>(dat2);
        FunctionProgramBlock fpb = new FunctionProgramBlock(prog, tmp1, tmp2);
        fpb.setInstructions(inst);
        fpb.setChildBlocks(pbs);

        return fpb;
    }

    private static ExternalFunctionProgramBlock rParseExternalFunctionProgramBlock(String in, Program prog,
            int id) {
        String lin = in.substring(PB_EFC.length(), in.length() - PB_END.length());
        HierarchyAwareStringTokenizer st = new HierarchyAwareStringTokenizer(lin, COMPONENTS_DELIM);

        //inputs, outputs and params
        ArrayList<DataIdentifier> dat1 = parseDataIdentifiers(st.nextToken());
        ArrayList<DataIdentifier> dat2 = parseDataIdentifiers(st.nextToken());
        HashMap<String, String> dat3 = parseStringHashMap(st.nextToken());

        //basedir
        String basedir = st.nextToken();

        //instructions (required for removing INST BEGIN, END)
        parseInstructions(st.nextToken(), id);

        //program blocks
        ArrayList<ProgramBlock> pbs = rParseProgramBlocks(st.nextToken(), prog, id);

        ArrayList<DataIdentifier> tmp1 = new ArrayList<>(dat1);
        ArrayList<DataIdentifier> tmp2 = new ArrayList<>(dat2);

        //only CP external functions, because no nested MR jobs for reblocks
        ExternalFunctionProgramBlockCP efpb = new ExternalFunctionProgramBlockCP(prog, tmp1, tmp2, dat3, basedir);
        efpb.setChildBlocks(pbs);

        return efpb;
    }

    private static ProgramBlock rParseGenericProgramBlock(String in, Program prog, int id) {
        String lin = in.substring(PB_BEGIN.length(), in.length() - PB_END.length());
        StringTokenizer st = new StringTokenizer(lin, COMPONENTS_DELIM);
        ProgramBlock pb = new ProgramBlock(prog);
        pb.setInstructions(parseInstructions(st.nextToken(), id));
        return pb;
    }

    private static ArrayList<Instruction> parseInstructions(String in, int id) {
        ArrayList<Instruction> insts = new ArrayList<>();
        String lin = in.substring(INST_BEGIN.length(), in.length() - INST_END.length());
        StringTokenizer st = new StringTokenizer(lin, ELEMENT_DELIM);
        while (st.hasMoreTokens()) {
            //Note that at this point only CP instructions and External function instruction can occur
            String instStr = st.nextToken();
            try {
                Instruction tmpinst = CPInstructionParser.parseSingleInstruction(instStr);
                tmpinst = saveReplaceThreadID(tmpinst, Lop.CP_ROOT_THREAD_ID, Lop.CP_CHILD_THREAD + id);
                insts.add(tmpinst);
            } catch (Exception ex) {
                throw new DMLRuntimeException("Failed to parse instruction: " + instStr, ex);
            }
        }
        return insts;
    }

    private static ArrayList<ResultVar> parseResultVariables(String in) {
        ArrayList<ResultVar> ret = new ArrayList<>();
        for (String var : parseStringArrayList(in)) {
            boolean accum = var.endsWith("+");
            ret.add(new ResultVar(accum ? var.substring(0, var.length() - 1) : var, accum));
        }
        return ret;
    }

    private static HashMap<String, String> parseStringHashMap(String in) {
        HashMap<String, String> vars = new HashMap<>();
        StringTokenizer st = new StringTokenizer(in, ELEMENT_DELIM);
        while (st.hasMoreTokens()) {
            String lin = st.nextToken();
            int index = lin.indexOf(KEY_VALUE_DELIM);
            String tmp1 = lin.substring(0, index);
            String tmp2 = lin.substring(index + 1);
            vars.put(tmp1, tmp2);
        }
        return vars;
    }

    private static ArrayList<String> parseStringArrayList(String in) {
        return parseStringArrayList(in, ELEMENT_DELIM);
    }

    private static ArrayList<String> parseStringArrayList(String in, String delim) {
        StringTokenizer st = new StringTokenizer(in, delim);
        ArrayList<String> vars = new ArrayList<>(st.countTokens());
        while (st.hasMoreTokens())
            vars.add(st.nextToken());
        return vars;
    }

    private static ArrayList<DataIdentifier> parseDataIdentifiers(String in) {
        ArrayList<DataIdentifier> vars = new ArrayList<>();
        StringTokenizer st = new StringTokenizer(in, ELEMENT_DELIM);
        while (st.hasMoreTokens()) {
            String tmp = st.nextToken();
            DataIdentifier dat = parseDataIdentifier(tmp);
            vars.add(dat);
        }
        return vars;
    }

    private static DataIdentifier parseDataIdentifier(String in) {
        StringTokenizer st = new StringTokenizer(in, DATA_FIELD_DELIM);
        DataIdentifier dat = new DataIdentifier(st.nextToken());
        dat.setDataType(DataType.valueOf(st.nextToken()));
        dat.setValueType(ValueType.valueOf(st.nextToken()));
        return dat;
    }

    /**
     * NOTE: MRJobConfiguration cannot be used for the general case because program blocks and
     * related symbol tables can be hierarchically structured.
     * 
     * @param in data object as string
     * @return array of objects
     */
    public static Object[] parseDataObject(String in) {
        Object[] ret = new Object[2];

        StringTokenizer st = new StringTokenizer(in, DATA_FIELD_DELIM);
        String name = st.nextToken();
        DataType datatype = DataType.valueOf(st.nextToken());
        ValueType valuetype = ValueType.valueOf(st.nextToken());
        String valString = st.hasMoreTokens() ? st.nextToken() : "";
        Data dat = null;
        switch (datatype) {
        case SCALAR: {
            switch (valuetype) {
            case INT:
                dat = new IntObject(Long.parseLong(valString));
                break;
            case DOUBLE:
                dat = new DoubleObject(Double.parseDouble(valString));
                break;
            case BOOLEAN:
                dat = new BooleanObject(Boolean.parseBoolean(valString));
                break;
            case STRING:
                dat = new StringObject(valString);
                break;
            default:
                throw new DMLRuntimeException("Unable to parse valuetype " + valuetype);
            }
            break;
        }
        case MATRIX: {
            MatrixObject mo = new MatrixObject(valuetype, valString);
            long rows = Long.parseLong(st.nextToken());
            long cols = Long.parseLong(st.nextToken());
            int brows = Integer.parseInt(st.nextToken());
            int bcols = Integer.parseInt(st.nextToken());
            long nnz = Long.parseLong(st.nextToken());
            InputInfo iin = InputInfo.stringToInputInfo(st.nextToken());
            OutputInfo oin = OutputInfo.stringToOutputInfo(st.nextToken());
            PartitionFormat partFormat = PartitionFormat.valueOf(st.nextToken());
            UpdateType inplace = UpdateType.valueOf(st.nextToken());
            MatrixCharacteristics mc = new MatrixCharacteristics(rows, cols, brows, bcols, nnz);
            MetaDataFormat md = new MetaDataFormat(mc, oin, iin);
            mo.setMetaData(md);
            if (partFormat._dpf != PDataPartitionFormat.NONE)
                mo.setPartitioned(partFormat._dpf, partFormat._N);
            mo.setUpdateType(inplace);
            mo.setHDFSFileExists(Boolean.valueOf(st.nextToken()));
            mo.enableCleanup(Boolean.valueOf(st.nextToken()));
            dat = mo;
            break;
        }
        case LIST:
            int size = Integer.parseInt(st.nextToken());
            String namesStr = st.nextToken();
            List<String> names = namesStr.equals(EMPTY) ? null : parseStringArrayList(namesStr, ELEMENT_DELIM2);
            List<Data> data = new ArrayList<>(size);
            st.nextToken(LIST_ELEMENT_DELIM);
            for (int i = 0; i < size; i++) {
                String dataStr = st.nextToken();
                Object[] obj = parseDataObject(dataStr);
                data.add((Data) obj[1]);
            }
            dat = new ListObject(data, names);
            break;
        default:
            throw new DMLRuntimeException("Unable to parse datatype " + datatype);
        }

        ret[0] = name;
        ret[1] = dat;
        return ret;
    }

    private static ExecutionContext parseExecutionContext(String in, Program prog) {
        ExecutionContext ec = null;
        String lin = in.substring(EC_BEGIN.length(), in.length() - EC_END.length()).trim();
        if (!lin.equals(EMPTY)) {
            LocalVariableMap vars = parseVariables(lin);
            ec = ExecutionContextFactory.createContext(false, prog);
            ec.setVariables(vars);
        }
        return ec;
    }

    private static void parseAndSetAdditionalConfigurations(String conf) {
        String[] statsFlag = conf.split("=");
        ConfigurationManager.setStatistics(Boolean.parseBoolean(statsFlag[1]));
    }

    //////////
    // CUSTOM SAFE LITERAL REPLACEMENT

    /**
     * In-place replacement of thread ids in filenames, functions names etc
     * 
     * @param inst instruction
     * @param pattern ?
     * @param replacement string replacement
     * @return instruction
     */
    private static Instruction saveReplaceThreadID(Instruction inst, String pattern, String replacement) {
        //currently known, relevant instructions: createvar, rand, seq, extfunct, 
        if (inst instanceof MRJobInstruction) {
            //update dims file, and internal string representations of rand/seq instructions
            MRJobInstruction mrinst = (MRJobInstruction) inst;
            mrinst.updateInstructionThreadID(pattern, replacement);
        } else if (inst instanceof VariableCPInstruction) { //createvar, setfilename
            //update in-memory representation
            inst.updateInstructionThreadID(pattern, replacement);
        }
        //NOTE> //Rand, seq in CP not required
        return inst;
    }

    public static String saveReplaceFilenameThreadID(String fname, String pattern, String replace) {
        //save replace necessary in order to account for the possibility that read variables have our prefix in the absolute path
        //replace the last match only, because (1) we have at most one _t0 and (2) always concatenated to the end.
        int pos = fname.lastIndexOf(pattern);
        return (pos < 0) ? fname : fname.substring(0, pos) + replace + fname.substring(pos + pattern.length());
    }

    //////////
    // CUSTOM HIERARCHICAL TOKENIZER

    /**
     * Custom StringTokenizer for splitting strings of hierarchies. The basic idea is to
     * search for delim-Strings on the same hierarchy level, while delims of lower hierarchy
     * levels are skipped.  
     * 
     */
    private static class HierarchyAwareStringTokenizer //extends StringTokenizer
    {
        private String _str = null;
        private String _del = null;
        private int _off = -1;

        public HierarchyAwareStringTokenizer(String in, String delim) {
            //super(in);
            _str = in;
            _del = delim;
            _off = delim.length();
        }

        public boolean hasMoreTokens() {
            return (_str.length() > 0);
        }

        public String nextToken() {
            int nextDelim = determineNextSameLevelIndexOf(_str, _del);
            String token = null;
            if (nextDelim < 0) {
                nextDelim = _str.length();
                _off = 0;
            }
            token = _str.substring(0, nextDelim);
            _str = _str.substring(nextDelim + _off);
            return token;
        }

        private static int determineNextSameLevelIndexOf(String data, String pattern) {
            String tmpdata = data;
            int index = 0;
            int count = 0;
            int off = 0, i1, i2, i3, min;

            while (true) {
                i1 = tmpdata.indexOf(pattern);
                i2 = tmpdata.indexOf(LEVELIN);
                i3 = tmpdata.indexOf(LEVELOUT);

                if (i1 < 0)
                    return i1; //no pattern found at all

                min = i1; //min >= 0 by definition
                if (i2 >= 0)
                    min = Math.min(min, i2);
                if (i3 >= 0)
                    min = Math.min(min, i3);

                //stack maintenance
                if (i1 == min && count == 0)
                    return index + i1;
                else if (i2 == min) {
                    count++;
                    off = LEVELIN.length();
                } else if (i3 == min) {
                    count--;
                    off = LEVELOUT.length();
                }

                //prune investigated string
                index += min + off;
                tmpdata = tmpdata.substring(min + off);
            }
        }
    }
}