com.ibm.bi.dml.runtime.controlprogram.context.SparkExecutionContext.java Source code

Introduction

Here is the source code for com.ibm.bi.dml.runtime.controlprogram.context.SparkExecutionContext.java
Source

/**
 * (C) Copyright IBM Corp. 2010, 2015
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * 
*/

package com.ibm.bi.dml.runtime.controlprogram.context;

import java.util.LinkedList;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.broadcast.Broadcast;
import org.apache.spark.storage.StorageLevel;

import scala.Tuple2;

import com.ibm.bi.dml.api.DMLScript;
import com.ibm.bi.dml.api.MLContext;
import com.ibm.bi.dml.api.MLContextProxy;
import com.ibm.bi.dml.hops.OptimizerUtils;
import com.ibm.bi.dml.lops.Checkpoint;
import com.ibm.bi.dml.runtime.DMLRuntimeException;
import com.ibm.bi.dml.runtime.DMLUnsupportedOperationException;
import com.ibm.bi.dml.runtime.controlprogram.Program;
import com.ibm.bi.dml.runtime.controlprogram.caching.MatrixObject;
import com.ibm.bi.dml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
import com.ibm.bi.dml.runtime.instructions.spark.SPInstruction;
import com.ibm.bi.dml.runtime.instructions.spark.data.BroadcastObject;
import com.ibm.bi.dml.runtime.instructions.spark.data.LineageObject;
import com.ibm.bi.dml.runtime.instructions.spark.data.PartitionedBroadcastMatrix;
import com.ibm.bi.dml.runtime.instructions.spark.data.PartitionedMatrixBlock;
import com.ibm.bi.dml.runtime.instructions.spark.data.RDDObject;
import com.ibm.bi.dml.runtime.instructions.spark.functions.CopyBinaryCellFunction;
import com.ibm.bi.dml.runtime.instructions.spark.functions.CopyBlockPairFunction;
import com.ibm.bi.dml.runtime.instructions.spark.functions.CopyTextInputFunction;
import com.ibm.bi.dml.runtime.instructions.spark.utils.RDDAggregateUtils;
import com.ibm.bi.dml.runtime.instructions.spark.utils.SparkUtils;
import com.ibm.bi.dml.runtime.matrix.data.InputInfo;
import com.ibm.bi.dml.runtime.matrix.data.MatrixBlock;
import com.ibm.bi.dml.runtime.matrix.data.MatrixCell;
import com.ibm.bi.dml.runtime.matrix.data.MatrixIndexes;
import com.ibm.bi.dml.runtime.matrix.data.OutputInfo;
import com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration;
import com.ibm.bi.dml.runtime.util.MapReduceTool;
import com.ibm.bi.dml.utils.Statistics;

public class SparkExecutionContext extends ExecutionContext {

    private static final Log LOG = LogFactory.getLog(SparkExecutionContext.class.getName());

    //internal configurations 
    private static boolean LAZY_SPARKCTX_CREATION = true;
    private static boolean ASYNCHRONOUS_VAR_DESTROY = true;
    private static boolean FAIR_SCHEDULER_MODE = true;

    //executor memory and relative fractions as obtained from the spark configuration
    private static long _memExecutors = -1; //mem per executors
    private static double _memRatioData = -1;
    private static double _memRatioShuffle = -1;
    private static int _numExecutors = -1; //total executors
    private static int _defaultPar = -1; //total vcores  
    private static boolean _confOnly = false; //infrastructure info based on config

    // Only one SparkContext may be active per JVM. You must stop() the active SparkContext before creating a new one. 
    // This limitation may eventually be removed; see SPARK-2243 for more details.
    private static JavaSparkContext _spctx = null;

    protected SparkExecutionContext(Program prog) {
        //protected constructor to force use of ExecutionContextFactory
        this(true, prog);
    }

    protected SparkExecutionContext(boolean allocateVars, Program prog) {
        //protected constructor to force use of ExecutionContextFactory
        super(allocateVars, prog);

        //spark context creation via internal initializer
        if (!(LAZY_SPARKCTX_CREATION && OptimizerUtils.isHybridExecutionMode())) {
            initSparkContext();
        }
    }

    /**
     * Returns the used singleton spark context. In case of lazy spark context
     * creation, this methods blocks until the spark context is created.
     *  
     * @return
     */
    public JavaSparkContext getSparkContext() {
        //lazy spark context creation on demand (lazy instead of asynchronous 
        //to avoid wait for uninitialized spark context on close)
        if (LAZY_SPARKCTX_CREATION) {
            initSparkContext();
        }

        //return the created spark context
        return _spctx;
    }

    /**
     * 
     * @return
     */
    public static JavaSparkContext getSparkContextStatic() {
        initSparkContext();
        return _spctx;
    }

    /**
     * 
     */
    public void close() {
        synchronized (SparkExecutionContext.class) {
            if (_spctx != null) {
                //stop the spark context if existing
                _spctx.stop();

                //make sure stopped context is never used again
                _spctx = null;
            }

        }
    }

    public static boolean isLazySparkContextCreation() {
        return LAZY_SPARKCTX_CREATION;
    }

    /**
     * 
     */
    private synchronized static void initSparkContext() {
        //check for redundant spark context init
        if (_spctx != null)
            return;

        long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;

        //create a default spark context (master, appname, etc refer to system properties
        //as given in the spark configuration or during spark-submit)

        MLContext mlCtx = MLContextProxy.getActiveMLContext();
        if (mlCtx != null) {
            // This is when DML is called through spark shell
            // Will clean the passing of static variables later as this involves minimal change to DMLScript
            _spctx = new JavaSparkContext(mlCtx.getSparkContext());
        } else {
            if (DMLScript.USE_LOCAL_SPARK_CONFIG) {
                // For now set 4 cores for integration testing :)
                SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("My local integration test app");
                // This is discouraged in spark but have added only for those testcase that cannot stop the context properly
                // conf.set("spark.driver.allowMultipleContexts", "true");
                conf.set("spark.ui.enabled", "false");
                _spctx = new JavaSparkContext(conf);
            } else //default cluster setup
            {
                //setup systemml-preferred spark configuration (w/o user choice)
                SparkConf conf = new SparkConf();

                //always set unlimited result size (required for cp collect)
                conf.set("spark.driver.maxResultSize", "0");

                //always use the fair scheduler (for single jobs, it's equivalent to fifo
                //but for concurrent jobs in parfor it ensures better data locality because
                //round robin assignment mitigates the problem of 'sticky slots')
                if (FAIR_SCHEDULER_MODE) {
                    conf.set("spark.scheduler.mode", "FAIR");
                }

                _spctx = new JavaSparkContext(conf);
            }
        }

        //globally add binaryblock serialization framework for all hdfs read/write operations
        //TODO if spark context passed in from outside (mlcontext), we need to clean this up at the end 
        if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
            MRJobConfiguration.addBinaryBlockSerializationFramework(_spctx.hadoopConfiguration());

        //statistics maintenance
        if (DMLScript.STATISTICS) {
            Statistics.setSparkCtxCreateTime(System.nanoTime() - t0);
        }
    }

    /**
     * Spark instructions should call this for all matrix inputs except broadcast
     * variables.
     * 
     * @param varname
     * @return
     * @throws DMLRuntimeException
     * @throws DMLUnsupportedOperationException
     */
    @SuppressWarnings("unchecked")
    public JavaPairRDD<MatrixIndexes, MatrixBlock> getBinaryBlockRDDHandleForVariable(String varname)
            throws DMLRuntimeException, DMLUnsupportedOperationException {
        return (JavaPairRDD<MatrixIndexes, MatrixBlock>) getRDDHandleForVariable(varname,
                InputInfo.BinaryBlockInputInfo);
    }

    /**
     * 
     * @param varname
     * @param inputInfo
     * @return
     * @throws DMLRuntimeException
     * @throws DMLUnsupportedOperationException
     */
    public JavaPairRDD<?, ?> getRDDHandleForVariable(String varname, InputInfo inputInfo)
            throws DMLRuntimeException, DMLUnsupportedOperationException {
        MatrixObject mo = getMatrixObject(varname);
        return getRDDHandleForMatrixObject(mo, inputInfo);
    }

    /**
     * This call returns an RDD handle for a given matrix object. This includes 
     * the creation of RDDs for in-memory or binary-block HDFS data. 
     * 
     * 
     * @param varname
     * @return
     * @throws DMLRuntimeException 
     * @throws DMLUnsupportedOperationException 
     */
    @SuppressWarnings("unchecked")
    public JavaPairRDD<?, ?> getRDDHandleForMatrixObject(MatrixObject mo, InputInfo inputInfo)
            throws DMLRuntimeException, DMLUnsupportedOperationException {

        //NOTE: MB this logic should be integrated into MatrixObject
        //However, for now we cannot assume that spark libraries are 
        //always available and hence only store generic references in 
        //matrix object while all the logic is in the SparkExecContext

        JavaPairRDD<?, ?> rdd = null;
        //CASE 1: rdd already existing (reuse if checkpoint or trigger
        //pending rdd operations if not yet cached but prevent to re-evaluate 
        //rdd operations if already executed and cached
        if (mo.getRDDHandle() != null && (mo.getRDDHandle().isCheckpointRDD() || !mo.isCached(false))) {
            //return existing rdd handling (w/o input format change)
            rdd = mo.getRDDHandle().getRDD();
        }
        //CASE 2: dirty in memory data or cached result of rdd operations
        else if (mo.isDirty() || mo.isCached(false)) {
            //get in-memory matrix block and parallelize it
            MatrixBlock mb = mo.acquireRead(); //pin matrix in memory
            rdd = toJavaPairRDD(getSparkContext(), mb, (int) mo.getNumRowsPerBlock(),
                    (int) mo.getNumColumnsPerBlock());
            mo.release(); //unpin matrix

            //keep rdd handle for future operations on it
            RDDObject rddhandle = new RDDObject(rdd, mo.getVarName());
            mo.setRDDHandle(rddhandle);
        }
        //CASE 3: non-dirty (file exists on HDFS)
        else {
            // parallelize hdfs-resident file
            // For binary block, these are: SequenceFileInputFormat.class, MatrixIndexes.class, MatrixBlock.class
            if (inputInfo == InputInfo.BinaryBlockInputInfo) {
                rdd = getSparkContext().hadoopFile(mo.getFileName(), inputInfo.inputFormatClass,
                        inputInfo.inputKeyClass, inputInfo.inputValueClass);
                //note: this copy is still required in Spark 1.4 because spark hands out whatever the inputformat
                //recordreader returns; the javadoc explicitly recommend to copy all key/value pairs
                rdd = ((JavaPairRDD<MatrixIndexes, MatrixBlock>) rdd).mapToPair(new CopyBlockPairFunction()); //cp is workaround for read bug
            } else if (inputInfo == InputInfo.TextCellInputInfo || inputInfo == InputInfo.CSVInputInfo
                    || inputInfo == InputInfo.MatrixMarketInputInfo) {
                rdd = getSparkContext().hadoopFile(mo.getFileName(), inputInfo.inputFormatClass,
                        inputInfo.inputKeyClass, inputInfo.inputValueClass);
                rdd = ((JavaPairRDD<LongWritable, Text>) rdd).mapToPair(new CopyTextInputFunction()); //cp is workaround for read bug
            } else if (inputInfo == InputInfo.BinaryCellInputInfo) {
                rdd = getSparkContext().hadoopFile(mo.getFileName(), inputInfo.inputFormatClass,
                        inputInfo.inputKeyClass, inputInfo.inputValueClass);
                rdd = ((JavaPairRDD<MatrixIndexes, MatrixCell>) rdd).mapToPair(new CopyBinaryCellFunction()); //cp is workaround for read bug
            } else {
                throw new DMLRuntimeException("Incorrect input format in getRDDHandleForVariable");
            }

            //keep rdd handle for future operations on it
            RDDObject rddhandle = new RDDObject(rdd, mo.getVarName());
            rddhandle.setHDFSFile(true);
            mo.setRDDHandle(rddhandle);
        }

        return rdd;
    }

    /**
     * TODO So far we only create broadcast variables but never destroy
     * them. This is a memory leak which might lead to executor out-of-memory.
     * However, in order to handle this, we need to keep track when broadcast 
     * variables are no longer required.
     * 
     * @param varname
     * @return
     * @throws DMLRuntimeException
     * @throws DMLUnsupportedOperationException
     */
    @SuppressWarnings("unchecked")
    public PartitionedBroadcastMatrix getBroadcastForVariable(String varname)
            throws DMLRuntimeException, DMLUnsupportedOperationException {
        MatrixObject mo = getMatrixObject(varname);

        PartitionedBroadcastMatrix bret = null;

        if (mo.getBroadcastHandle() != null && mo.getBroadcastHandle().isValid()) {
            //reuse existing broadcast handle
            bret = mo.getBroadcastHandle().getBroadcast();
        } else {
            //obtain meta data for matrix 
            int brlen = (int) mo.getNumRowsPerBlock();
            int bclen = (int) mo.getNumColumnsPerBlock();

            //create partitioned matrix block and release memory consumed by input
            MatrixBlock mb = mo.acquireRead();
            PartitionedMatrixBlock pmb = new PartitionedMatrixBlock(mb, brlen, bclen);
            mo.release();

            //determine coarse-grained partitioning
            int numPerPart = PartitionedBroadcastMatrix.computeBlocksPerPartition(mo.getNumRows(),
                    mo.getNumColumns(), brlen, bclen);
            int numParts = (int) Math.ceil((double) pmb.getNumRowBlocks() * pmb.getNumColumnBlocks() / numPerPart);
            Broadcast<PartitionedMatrixBlock>[] ret = new Broadcast[numParts];

            //create coarse-grained partitioned broadcasts
            if (numParts > 1) {
                for (int i = 0; i < numParts; i++) {
                    int offset = i * numPerPart;
                    int numBlks = Math.min(numPerPart, pmb.getNumRowBlocks() * pmb.getNumColumnBlocks() - offset);
                    PartitionedMatrixBlock tmp = pmb.createPartition(offset, numBlks);
                    ret[i] = getSparkContext().broadcast(tmp);
                }
            } else { //single partition
                ret[0] = getSparkContext().broadcast(pmb);
            }

            bret = new PartitionedBroadcastMatrix(ret);
            BroadcastObject bchandle = new BroadcastObject(bret, varname);
            mo.setBroadcastHandle(bchandle);
        }

        return bret;
    }

    /**
     * Keep the output rdd of spark rdd operations as meta data of matrix objects in the 
     * symbol table.
     * 
     * Spark instructions should call this for all matrix outputs.
     * 
     * 
     * @param varname
     * @param rdd
     * @throws DMLRuntimeException 
     */
    public void setRDDHandleForVariable(String varname, JavaPairRDD<MatrixIndexes, ?> rdd)
            throws DMLRuntimeException {
        MatrixObject mo = getMatrixObject(varname);
        RDDObject rddhandle = new RDDObject(rdd, varname);
        mo.setRDDHandle(rddhandle);
    }

    /**
     * Utility method for creating an RDD out of an in-memory matrix block.
     * 
     * @param sc
     * @param block
     * @return
     * @throws DMLUnsupportedOperationException 
     * @throws DMLRuntimeException 
     */
    public static JavaPairRDD<MatrixIndexes, MatrixBlock> toJavaPairRDD(JavaSparkContext sc, MatrixBlock src,
            int brlen, int bclen) throws DMLRuntimeException, DMLUnsupportedOperationException {
        LinkedList<Tuple2<MatrixIndexes, MatrixBlock>> list = new LinkedList<Tuple2<MatrixIndexes, MatrixBlock>>();

        if (src.getNumRows() <= brlen && src.getNumColumns() <= bclen) {
            list.addLast(new Tuple2<MatrixIndexes, MatrixBlock>(new MatrixIndexes(1, 1), src));
        } else {
            boolean sparse = src.isInSparseFormat();

            //create and write subblocks of matrix
            for (int blockRow = 0; blockRow < (int) Math.ceil(src.getNumRows() / (double) brlen); blockRow++)
                for (int blockCol = 0; blockCol < (int) Math
                        .ceil(src.getNumColumns() / (double) bclen); blockCol++) {
                    int maxRow = (blockRow * brlen + brlen < src.getNumRows()) ? brlen
                            : src.getNumRows() - blockRow * brlen;
                    int maxCol = (blockCol * bclen + bclen < src.getNumColumns()) ? bclen
                            : src.getNumColumns() - blockCol * bclen;

                    MatrixBlock block = new MatrixBlock(maxRow, maxCol, sparse);

                    int row_offset = blockRow * brlen;
                    int col_offset = blockCol * bclen;

                    //copy submatrix to block
                    src.sliceOperations(row_offset, row_offset + maxRow - 1, col_offset, col_offset + maxCol - 1,
                            block);

                    //append block to sequence file
                    MatrixIndexes indexes = new MatrixIndexes(blockRow + 1, blockCol + 1);
                    list.addLast(new Tuple2<MatrixIndexes, MatrixBlock>(indexes, block));
                }
        }

        return sc.parallelizePairs(list);
    }

    /**
     * This method is a generic abstraction for calls from the buffer pool.
     * See toMatrixBlock(JavaPairRDD<MatrixIndexes,MatrixBlock> rdd, int numRows, int numCols);
     * 
     * @param rdd
     * @param numRows
     * @param numCols
     * @return
     * @throws DMLRuntimeException 
     */
    @SuppressWarnings("unchecked")
    public static MatrixBlock toMatrixBlock(RDDObject rdd, int rlen, int clen, int brlen, int bclen, long nnz)
            throws DMLRuntimeException {
        return toMatrixBlock((JavaPairRDD<MatrixIndexes, MatrixBlock>) rdd.getRDD(), rlen, clen, brlen, bclen, nnz);
    }

    /**
     * Utility method for creating a single matrix block out of an RDD. Note that this collect call
     * might trigger execution of any pending transformations. 
     * 
     * NOTE: This is an unguarded utility function, which requires memory for both the output matrix
     * and its collected, blocked representation.
     * 
     * @param rdd
     * @param numRows
     * @param numCols
     * @return
     * @throws DMLRuntimeException
     */
    public static MatrixBlock toMatrixBlock(JavaPairRDD<MatrixIndexes, MatrixBlock> rdd, int rlen, int clen,
            int brlen, int bclen, long nnz) throws DMLRuntimeException {
        MatrixBlock out = null;

        if (rlen <= brlen && clen <= bclen) //SINGLE BLOCK
        {
            //special case without copy and nnz maintenance
            List<Tuple2<MatrixIndexes, MatrixBlock>> list = rdd.collect();
            if (list.size() > 1)
                throw new DMLRuntimeException("Expecting no more than one result block.");
            else if (list.size() == 1)
                out = list.get(0)._2();
            else //empty (e.g., after ops w/ outputEmpty=false)
                out = new MatrixBlock(rlen, clen, true);
        } else //MULTIPLE BLOCKS
        {
            //determine target sparse/dense representation
            long lnnz = (nnz >= 0) ? nnz : (long) rlen * clen;
            boolean sparse = MatrixBlock.evalSparseFormatInMemory(rlen, clen, lnnz);

            //create output matrix block (w/ lazy allocation)
            out = new MatrixBlock(rlen, clen, sparse);
            List<Tuple2<MatrixIndexes, MatrixBlock>> list = rdd.collect();

            //copy blocks one-at-a-time into output matrix block
            for (Tuple2<MatrixIndexes, MatrixBlock> keyval : list) {
                //unpack index-block pair
                MatrixIndexes ix = keyval._1();
                MatrixBlock block = keyval._2();

                //compute row/column block offsets
                int row_offset = (int) (ix.getRowIndex() - 1) * brlen;
                int col_offset = (int) (ix.getColumnIndex() - 1) * bclen;
                int rows = block.getNumRows();
                int cols = block.getNumColumns();

                if (sparse) { //SPARSE OUTPUT
                    //append block to sparse target in order to avoid shifting
                    //note: this append requires a final sort of sparse rows
                    out.appendToSparse(block, row_offset, col_offset);
                } else { //DENSE OUTPUT
                    out.copy(row_offset, row_offset + rows - 1, col_offset, col_offset + cols - 1, block, false);
                }
            }

            //post-processing output matrix
            if (sparse)
                out.sortSparseRows();
            out.recomputeNonZeros();
            out.examSparsity();
        }

        return out;
    }

    /**
     * 
     * @param rdd
     * @param rlen
     * @param clen
     * @param brlen
     * @param bclen
     * @param nnz
     * @return
     * @throws DMLRuntimeException
     */
    public static PartitionedMatrixBlock toPartitionedMatrixBlock(JavaPairRDD<MatrixIndexes, MatrixBlock> rdd,
            int rlen, int clen, int brlen, int bclen, long nnz) throws DMLRuntimeException {
        PartitionedMatrixBlock out = new PartitionedMatrixBlock(rlen, clen, brlen, bclen);

        List<Tuple2<MatrixIndexes, MatrixBlock>> list = rdd.collect();

        //copy blocks one-at-a-time into output matrix block
        for (Tuple2<MatrixIndexes, MatrixBlock> keyval : list) {
            //unpack index-block pair
            MatrixIndexes ix = keyval._1();
            MatrixBlock block = keyval._2();
            out.setMatrixBlock((int) ix.getRowIndex(), (int) ix.getColumnIndex(), block);
        }

        return out;
    }

    /**
     * 
     * @param rdd
     * @param oinfo
     */
    @SuppressWarnings("unchecked")
    public static long writeRDDtoHDFS(RDDObject rdd, String path, OutputInfo oinfo) {
        JavaPairRDD<MatrixIndexes, MatrixBlock> lrdd = (JavaPairRDD<MatrixIndexes, MatrixBlock>) rdd.getRDD();

        //recompute nnz 
        long nnz = SparkUtils.computeNNZFromBlocks(lrdd);

        //save file is an action which also triggers nnz maintenance
        lrdd.saveAsHadoopFile(path, oinfo.outputKeyClass, oinfo.outputValueClass, oinfo.outputFormatClass);

        //return nnz aggregate of all blocks
        return nnz;
    }

    /**
     * Returns the available memory budget for broadcast variables in bytes.
     * In detail, this takes into account the total executor memory as well
     * as relative ratios for data and shuffle. Note, that this is a conservative
     * estimate since both data memory and shuffle memory might not be fully
     * utilized. 
     * 
     * @return
     */
    public static double getBroadcastMemoryBudget() {
        if (_memExecutors < 0 || _memRatioData < 0 || _memRatioShuffle < 0)
            analyzeSparkConfiguation();

        //70% of remaining free memory
        double membudget = OptimizerUtils.MEM_UTIL_FACTOR
                * (_memExecutors - _memExecutors * (_memRatioData + _memRatioShuffle));

        return membudget;
    }

    /**
     * 
     * @return
     */
    public static double getConfiguredTotalDataMemory() {
        return getConfiguredTotalDataMemory(false);
    }

    /**
     * 
     * @param refresh
     * @return
     */
    public static double getConfiguredTotalDataMemory(boolean refresh) {
        if (_memExecutors < 0 || _memRatioData < 0)
            analyzeSparkConfiguation();

        //always get the current num executors on refresh because this might 
        //change if not all executors are initially allocated and it is plan-relevant
        if (refresh && !_confOnly) {
            JavaSparkContext jsc = getSparkContextStatic();
            int numExec = Math.max(jsc.sc().getExecutorMemoryStatus().size() - 1, 1);
            return _memExecutors * _memRatioData * numExec;
        } else
            return (_memExecutors * _memRatioData * _numExecutors);
    }

    public static int getNumExecutors() {
        if (_numExecutors < 0)
            analyzeSparkConfiguation();

        return _numExecutors;
    }

    public static int getDefaultParallelism() {
        return getDefaultParallelism(false);
    }

    /**
     * 
     * @return
     */
    public static int getDefaultParallelism(boolean refresh) {
        if (_defaultPar < 0 && !refresh)
            analyzeSparkConfiguation();

        //always get the current default parallelism on refresh because this might 
        //change if not all executors are initially allocated and it is plan-relevant
        if (refresh && !_confOnly)
            return getSparkContextStatic().defaultParallelism();
        else
            return _defaultPar;
    }

    /**
     * 
     */
    public static void analyzeSparkConfiguation() {
        SparkConf sconf = new SparkConf();

        //parse absolute executor memory
        String tmp = sconf.get("spark.executor.memory", "512m");
        if (tmp.endsWith("g") || tmp.endsWith("G"))
            _memExecutors = Long.parseLong(tmp.substring(0, tmp.length() - 1)) * 1024 * 1024 * 1024;
        else if (tmp.endsWith("m") || tmp.endsWith("M"))
            _memExecutors = Long.parseLong(tmp.substring(0, tmp.length() - 1)) * 1024 * 1024;
        else if (tmp.endsWith("k") || tmp.endsWith("K"))
            _memExecutors = Long.parseLong(tmp.substring(0, tmp.length() - 1)) * 1024;
        else
            _memExecutors = Long.parseLong(tmp.substring(0, tmp.length() - 2));

        //get data and shuffle memory ratios (defaults not specified in job conf)
        _memRatioData = sconf.getDouble("spark.storage.memoryFraction", 0.6); //default 60%
        _memRatioShuffle = sconf.getDouble("spark.shuffle.memoryFraction", 0.2); //default 20%

        int numExecutors = sconf.getInt("spark.executor.instances", -1);
        int numCoresPerExec = sconf.getInt("spark.executor.cores", -1);
        int defaultPar = sconf.getInt("spark.default.parallelism", -1);

        if (numExecutors > 1 && (defaultPar > 1 || numCoresPerExec > 1)) {
            _numExecutors = numExecutors;
            _defaultPar = (defaultPar > 1) ? defaultPar : numExecutors * numCoresPerExec;
            _confOnly = true;
        } else {
            //get default parallelism (total number of executors and cores)
            //note: spark context provides this information while conf does not
            //(for num executors we need to correct for driver and local mode)
            JavaSparkContext jsc = getSparkContextStatic();
            _numExecutors = Math.max(jsc.sc().getExecutorMemoryStatus().size() - 1, 1);
            _defaultPar = jsc.defaultParallelism();
            _confOnly = false; //implies env info refresh w/ spark context 
        }

        //note: required time for infrastructure analysis on 5 node cluster: ~5-20ms. 
    }

    /**
     * 
     */
    public void checkAndRaiseValidationWarningJDKVersion() {
        //check for jdk version less than jdk8
        boolean isLtJDK8 = InfrastructureAnalyzer.isJavaVersionLessThanJDK8();

        //check multi-threaded executors
        int numExecutors = getNumExecutors();
        int numCores = getDefaultParallelism();
        boolean multiThreaded = (numCores > numExecutors);

        //check for jdk version less than 8 (and raise warning if multi-threaded)
        if (isLtJDK8 && multiThreaded) {
            //get the jre version 
            String version = System.getProperty("java.version");

            LOG.warn("########################################################################################");
            LOG.warn("### WARNING: Multi-threaded text reblock may lead to thread contention on JRE < 1.8 ####");
            LOG.warn("### java.version = " + version);
            LOG.warn("### total number of executors = " + numExecutors);
            LOG.warn("### total number of cores = " + numCores);
            LOG.warn("### JDK-7032154: Performance tuning of sun.misc.FloatingDecimal/FormattedFloatingDecimal");
            LOG.warn("### Workaround: Convert text to binary w/ changed configuration of one executor per core");
            LOG.warn("########################################################################################");
        }
    }

    ///////////////////////////////////////////
    // Cleanup of RDDs and Broadcast variables
    ///////

    /**
     * Adds a child rdd object to the lineage of a parent rdd.
     * 
     * @param varParent
     * @param varChild
     * @throws DMLRuntimeException
     */
    public void addLineageRDD(String varParent, String varChild) throws DMLRuntimeException {
        RDDObject parent = getMatrixObject(varParent).getRDDHandle();
        RDDObject child = getMatrixObject(varChild).getRDDHandle();

        parent.addLineageChild(child);
    }

    /**
     * Adds a child broadcast object to the lineage of a parent rdd.
     * 
     * @param varParent
     * @param varChild
     * @throws DMLRuntimeException
     */
    public void addLineageBroadcast(String varParent, String varChild) throws DMLRuntimeException {
        RDDObject parent = getMatrixObject(varParent).getRDDHandle();
        BroadcastObject child = getMatrixObject(varChild).getBroadcastHandle();

        parent.addLineageChild(child);
    }

    @Override
    public void cleanupMatrixObject(MatrixObject mo) throws DMLRuntimeException {
        //NOTE: this method overwrites the default behavior of cleanupMatrixObject
        //and hence is transparently used by rmvar instructions and other users. The
        //core difference is the lineage-based cleanup of RDD and broadcast variables.

        try {
            if (mo.isCleanupEnabled()) {
                //compute ref count only if matrix cleanup actually necessary
                if (!getVariables().hasReferences(mo)) {
                    //clean cached data   
                    mo.clearData();

                    //clean hdfs data
                    if (mo.isFileExists()) {
                        String fpath = mo.getFileName();
                        if (fpath != null) {
                            MapReduceTool.deleteFileIfExistOnHDFS(fpath);
                            MapReduceTool.deleteFileIfExistOnHDFS(fpath + ".mtd");
                        }
                    }

                    //cleanup RDD and broadcast variables (recursive)
                    //note: requires that mo.clearData already removed back references
                    if (mo.getRDDHandle() != null) {
                        rCleanupLineageObject(mo.getRDDHandle());
                    }
                    if (mo.getBroadcastHandle() != null) {
                        rCleanupLineageObject(mo.getBroadcastHandle());
                    }
                }
            }
        } catch (Exception ex) {
            throw new DMLRuntimeException(ex);
        }
    }

    private void rCleanupLineageObject(LineageObject lob) {
        //abort recursive cleanup if still consumers
        if (lob.getNumReferences() > 0)
            return;

        //abort if still reachable through matrix object (via back references for 
        //robustness in function calls and to prevent repeated scans of the symbol table)
        if (lob.hasBackReference())
            return;

        //cleanup current lineage object (from driver/executors)
        if (lob instanceof RDDObject)
            cleanupRDDVariable(((RDDObject) lob).getRDD());
        else if (lob instanceof BroadcastObject) {
            PartitionedBroadcastMatrix pbm = ((BroadcastObject) lob).getBroadcast();
            for (Broadcast<PartitionedMatrixBlock> bc : pbm.getBroadcasts())
                cleanupBroadcastVariable(bc);
        }

        //recursively process lineage children
        for (LineageObject c : lob.getLineageChilds()) {
            c.decrementNumReferences();
            rCleanupLineageObject(c);
        }
    }

    /**
     * This call destroys a broadcast variable at all executors and the driver.
     * Hence, it is intended to be used on rmvar only. Depending on the
     * ASYNCHRONOUS_VAR_DESTROY configuration, this is asynchronous or not.
     * 
     * 
     * @param inV
     */
    public void cleanupBroadcastVariable(Broadcast<?> bvar) {
        //in comparison to 'unpersist' (which would only delete the broadcast from the executors),
        //this call also deletes related data from the driver.
        if (bvar.isValid()) {
            bvar.destroy(ASYNCHRONOUS_VAR_DESTROY);
        }
    }

    /**
     * This call removes an rdd variable from executor memory and disk if required.
     * Hence, it is intended to be used on rmvar only. Depending on the
     * ASYNCHRONOUS_VAR_DESTROY configuration, this is asynchronous or not.
     * 
     * @param rvar
     */
    public void cleanupRDDVariable(JavaPairRDD<?, ?> rvar) {
        if (rvar.getStorageLevel() != StorageLevel.NONE()) {
            rvar.unpersist(ASYNCHRONOUS_VAR_DESTROY);
        }
    }

    /**
     * 
     * @param var
     * @throws DMLRuntimeException 
     * @throws DMLUnsupportedOperationException 
     */
    @SuppressWarnings("unchecked")
    public void repartitionAndCacheMatrixObject(String var)
            throws DMLRuntimeException, DMLUnsupportedOperationException {
        //get input rdd and default storage level
        MatrixObject mo = getMatrixObject(var);
        JavaPairRDD<MatrixIndexes, MatrixBlock> in = (JavaPairRDD<MatrixIndexes, MatrixBlock>) getRDDHandleForMatrixObject(
                mo, InputInfo.BinaryBlockInputInfo);

        //repartition and persist rdd (force creation of shuffled rdd via merge)
        JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDAggregateUtils.mergeByKey(in);
        out.persist(Checkpoint.DEFAULT_STORAGE_LEVEL).count(); //trigger caching to prevent contention

        //create new rdd handle, in-place of current matrix object
        RDDObject inro = mo.getRDDHandle(); //guaranteed to exist (see above)
        RDDObject outro = new RDDObject(out, var); //create new rdd object
        outro.setCheckpointRDD(true); //mark as checkpointed
        outro.addLineageChild(inro); //keep lineage to prevent cycles on cleanup
        mo.setRDDHandle(outro);
    }

    /**
     * 
     * @param var
     * @throws DMLRuntimeException
     * @throws DMLUnsupportedOperationException
     */
    @SuppressWarnings("unchecked")
    public void cacheMatrixObject(String var) throws DMLRuntimeException, DMLUnsupportedOperationException {
        //get input rdd and default storage level
        MatrixObject mo = getMatrixObject(var);
        JavaPairRDD<MatrixIndexes, MatrixBlock> in = (JavaPairRDD<MatrixIndexes, MatrixBlock>) getRDDHandleForMatrixObject(
                mo, InputInfo.BinaryBlockInputInfo);

        //persist rdd (force rdd caching)
        in.count(); //trigger caching to prevent contention                
    }

    /**
     * 
     * @param poolName
     */
    public void setThreadLocalSchedulerPool(String poolName) {
        if (FAIR_SCHEDULER_MODE) {
            getSparkContext().sc().setLocalProperty("spark.scheduler.pool", poolName);
        }
    }

    /**
     * 
     */
    public void cleanupThreadLocalSchedulerPool() {
        if (FAIR_SCHEDULER_MODE) {
            getSparkContext().sc().setLocalProperty("spark.scheduler.pool", null);
        }
    }

    ///////////////////////////////////////////
    // Debug String Handling (see explain); TODO to be removed
    ///////

    /**
     * 
     * @param inst
     * @param outputVarName
     * @throws DMLRuntimeException
     */
    public void setDebugString(SPInstruction inst, String outputVarName) throws DMLRuntimeException {
        RDDObject parentLineage = getMatrixObject(outputVarName).getRDDHandle();

        if (parentLineage == null || parentLineage.getRDD() == null)
            return;

        MLContextProxy.addRDDForInstructionForMonitoring(inst, parentLineage.getRDD().id());

        JavaPairRDD<?, ?> out = parentLineage.getRDD();
        JavaPairRDD<?, ?> in1 = null;
        JavaPairRDD<?, ?> in2 = null;
        String input1VarName = null;
        String input2VarName = null;
        if (parentLineage.getLineageChilds() != null) {
            for (LineageObject child : parentLineage.getLineageChilds()) {
                if (child instanceof RDDObject) {
                    if (in1 == null) {
                        in1 = ((RDDObject) child).getRDD();
                        input1VarName = child.getVarName();
                    } else if (in2 == null) {
                        in2 = ((RDDObject) child).getRDD();
                        input2VarName = child.getVarName();
                    } else {
                        throw new DMLRuntimeException(
                                "PRINT_EXPLAIN_WITH_LINEAGE not yet supported for three outputs");
                    }
                }
            }
        }
        setLineageInfoForExplain(inst, out, in1, input1VarName, in2, input2VarName);
    }

    // The most expensive operation here is rdd.toDebugString() which can be a major hit because
    // of unrolling lazy evaluation of Spark. Hence, it is guarded against it along with flag 'PRINT_EXPLAIN_WITH_LINEAGE' which is 
    // enabled only through MLContext. This way, it doesnot affect our performance evaluation through non-MLContext path
    private void setLineageInfoForExplain(SPInstruction inst, JavaPairRDD<?, ?> out, JavaPairRDD<?, ?> in1,
            String in1Name, JavaPairRDD<?, ?> in2, String in2Name) throws DMLRuntimeException {

        // RDDInfo outInfo = org.apache.spark.storage.RDDInfo.fromRdd(out.rdd());

        // First fetch start lines from input RDDs
        String startLine1 = null;
        String startLine2 = null;
        int i1length = 0, i2length = 0;
        if (in1 != null) {
            String[] lines = in1.toDebugString().split("\\r?\\n");
            startLine1 = SparkUtils.getStartLineFromSparkDebugInfo(lines[0]); // lines[0].substring(4, lines[0].length());
            i1length = lines.length;
        }
        if (in2 != null) {
            String[] lines = in2.toDebugString().split("\\r?\\n");
            startLine2 = SparkUtils.getStartLineFromSparkDebugInfo(lines[0]); // lines[0].substring(4, lines[0].length());
            i2length = lines.length;
        }

        String outDebugString = "";
        int skip = 0;

        // Now process output RDD and replace inputRDD debug string by the matrix variable name
        String[] outLines = out.toDebugString().split("\\r?\\n");
        for (int i = 0; i < outLines.length; i++) {
            if (skip > 0) {
                skip--;
                // outDebugString += "\nSKIP:" + outLines[i];
            } else if (startLine1 != null && outLines[i].contains(startLine1)) {
                String prefix = SparkUtils.getPrefixFromSparkDebugInfo(outLines[i]); // outLines[i].substring(0, outLines[i].length() - startLine1.length());
                outDebugString += "\n" + prefix + "[[" + in1Name + "]]";
                //outDebugString += "\n{" + prefix + "}[[" + in1Name + "]] => " + outLines[i];
                skip = i1length - 1;
            } else if (startLine2 != null && outLines[i].contains(startLine2)) {
                String prefix = SparkUtils.getPrefixFromSparkDebugInfo(outLines[i]); // outLines[i].substring(0, outLines[i].length() - startLine2.length());
                outDebugString += "\n" + prefix + "[[" + in2Name + "]]";
                skip = i2length - 1;
            } else {
                outDebugString += "\n" + outLines[i];
            }
        }

        MLContext mlContext = MLContextProxy.getActiveMLContext();
        if (mlContext != null && mlContext.getMonitoringUtil() != null) {
            mlContext.getMonitoringUtil().setLineageInfo(inst, outDebugString);
        } else {
            throw new DMLRuntimeException(
                    "The method setLineageInfoForExplain should be called only through MLContext");
        }

    }
}