com.ibm.bi.dml.runtime.controlprogram.caching.MatrixObject.java Source code

Java tutorial

Introduction

Here is the source code for com.ibm.bi.dml.runtime.controlprogram.caching.MatrixObject.java

Source

/**
 * (C) Copyright IBM Corp. 2010, 2015
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * 
*/

package com.ibm.bi.dml.runtime.controlprogram.caching;

import java.io.IOException;
import java.lang.ref.SoftReference;

import org.apache.commons.lang.mutable.MutableBoolean;

import com.ibm.bi.dml.api.DMLScript;
import com.ibm.bi.dml.api.DMLScript.RUNTIME_PLATFORM;
import com.ibm.bi.dml.hops.OptimizerUtils;
import com.ibm.bi.dml.lops.Lop;
import com.ibm.bi.dml.parser.DMLTranslator;
import com.ibm.bi.dml.parser.Expression.DataType;
import com.ibm.bi.dml.parser.Expression.ValueType;
import com.ibm.bi.dml.runtime.DMLRuntimeException;
import com.ibm.bi.dml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat;
import com.ibm.bi.dml.runtime.controlprogram.context.SparkExecutionContext;
import com.ibm.bi.dml.runtime.instructions.spark.data.BroadcastObject;
import com.ibm.bi.dml.runtime.instructions.spark.data.RDDObject;
import com.ibm.bi.dml.runtime.instructions.spark.data.RDDProperties;
import com.ibm.bi.dml.runtime.matrix.MatrixCharacteristics;
import com.ibm.bi.dml.runtime.matrix.MatrixDimensionsMetaData;
import com.ibm.bi.dml.runtime.matrix.MatrixFormatMetaData;
import com.ibm.bi.dml.runtime.matrix.MetaData;
import com.ibm.bi.dml.runtime.matrix.data.FileFormatProperties;
import com.ibm.bi.dml.runtime.matrix.data.InputInfo;
import com.ibm.bi.dml.runtime.matrix.data.MatrixBlock;
import com.ibm.bi.dml.runtime.matrix.data.NumItemsByEachReducerMetaData;
import com.ibm.bi.dml.runtime.matrix.data.OutputInfo;
import com.ibm.bi.dml.runtime.util.DataConverter;
import com.ibm.bi.dml.runtime.util.IndexRange;
import com.ibm.bi.dml.runtime.util.MapReduceTool;

/**
 * Represents a matrix in control program. This class contains method to read
 * matrices from HDFS and convert them to a specific format/representation. It
 * is also able to write several formats/representation of matrices to HDFS.
    
 * IMPORTANT: Preserve one-to-one correspondence between {@link MatrixObject}
 * and {@link MatrixBlock} objects, for cache purposes.  Do not change a
 * {@link MatrixBlock} object without informing its {@link MatrixObject} object.
 * 
 */
public class MatrixObject extends CacheableData {
    private static final long serialVersionUID = 6374712373206495637L;

    /**
     * Current state of pinned variables, required for guarded collect.
     */
    private static ThreadLocal<Long> sizePinned = new ThreadLocal<Long>() {
        @Override
        protected Long initialValue() {
            return 0L;
        }
    };

    /**
     * Cache for actual data, evicted by garbage collector.
     */
    private SoftReference<MatrixBlock> _cache = null;

    /**
     * Container object that holds the actual data.
     */
    private MatrixBlock _data = null;

    /**
     * The name of HDFS file in which the data is backed up.
     */
    private String _hdfsFileName = null; // file name and path

    /** 
     * Flag that indicates whether or not hdfs file exists.
     * It is used for improving the performance of "rmvar" instruction.
     * When it has value <code>false</code>, one can skip invocations to
     * utility functions such as MapReduceTool.deleteFileIfExistOnHDFS(),
     * which can be potentially expensive.
     */
    private boolean _hdfsFileExists = false;

    /**
     * <code>true</code> if the in-memory or evicted matrix may be different from
     * the matrix located at {@link #_hdfsFileName}; <code>false</code> if the two
     * matrices should be the same.
     */
    private boolean _dirtyFlag = false;

    /**
     * Object that holds the metadata associated with the matrix, which
     * includes: 1) Matrix dimensions, if available 2) Number of non-zeros, if
     * available 3) Block dimensions, if applicable 4) InputInfo -- subsequent
     * operations that use this Matrix expect it to be in this format.
     * 
     * When the matrix is written to HDFS (local file system, as well?), one
     * must get the OutputInfo that matches with InputInfo stored inside _mtd.
     */
    private MetaData _metaData = null;

    //additional names and flags
    private String _varName = ""; //plan variable name
    private String _cacheFileName = null; //local eviction file name
    private boolean _requiresLocalWrite = false; //flag if local write for read obj
    private boolean _isAcquireFromEmpty = false; //flag if read from status empty 
    private boolean _cleanupFlag = true; //flag if obj unpinned (cleanup enabled)
    private boolean _updateInPlaceFlag = false; //flag if in-place update

    //spark-specific handles
    //note: we use the abstraction of LineageObjects for two reasons: (1) to keep track of cleanup
    //for lazily evaluated RDDs, and (2) as abstraction for environments that do not necessarily have spark libraries available
    private RDDObject _rddHandle = null; //RDD handle
    private BroadcastObject _bcHandle = null; //Broadcast handle
    private RDDProperties _rddProperties = null;

    /**
     * Information relevant to partitioned matrices.
     */
    private boolean _partitioned = false; //indicates if obj partitioned
    private PDataPartitionFormat _partitionFormat = null; //indicates how obj partitioned
    private int _partitionSize = -1; //indicates n for BLOCKWISE_N
    private String _partitionCacheName = null; //name of cache block
    private MatrixBlock _partitionInMemory = null;

    /**
     * Information relevant to specific external file formats
     */
    FileFormatProperties _formatProperties = null;

    public RDDProperties getRddProperties() {
        return _rddProperties;
    }

    public void setRddProperties(RDDProperties _rddProperties) {
        this._rddProperties = _rddProperties;
    }

    /**
     * Constructor that takes only the HDFS filename.
     */
    public MatrixObject(ValueType vt, String file) {
        this(vt, file, null); //HDFS file path
    }

    /**
     * Constructor that takes both HDFS filename and associated metadata.
     */
    public MatrixObject(ValueType vt, String file, MetaData mtd) {
        super(DataType.MATRIX, vt);
        _metaData = mtd;
        _hdfsFileName = file;

        _cache = null;
        _data = null;
    }

    /**
     * Copy constructor that copies meta data but NO data.
     * 
     * @param mo
     */
    public MatrixObject(MatrixObject mo) {
        super(mo.getDataType(), mo.getValueType());

        _hdfsFileName = mo._hdfsFileName;
        _hdfsFileExists = mo._hdfsFileExists;

        MatrixFormatMetaData metaOld = (MatrixFormatMetaData) mo.getMetaData();
        _metaData = new MatrixFormatMetaData(new MatrixCharacteristics(metaOld.getMatrixCharacteristics()),
                metaOld.getOutputInfo(), metaOld.getInputInfo());

        _varName = mo._varName;
        _cleanupFlag = mo._cleanupFlag;
        _updateInPlaceFlag = mo._updateInPlaceFlag;
        _partitioned = mo._partitioned;
        _partitionFormat = mo._partitionFormat;
        _partitionSize = mo._partitionSize;
        _partitionCacheName = mo._partitionCacheName;
    }

    public void setVarName(String s) {
        _varName = s;
    }

    public String getVarName() {
        return _varName;
    }

    @Override
    public void setMetaData(MetaData md) {
        _metaData = md;
    }

    @Override
    public MetaData getMetaData() {
        return _metaData;
    }

    @Override
    public void removeMetaData() {
        _metaData = null;
    }

    @Override
    public void updateMatrixCharacteristics(MatrixCharacteristics mc) {
        ((MatrixDimensionsMetaData) _metaData).setMatrixCharacteristics(mc);
    }

    /**
     * Make the matrix metadata consistent with the in-memory matrix data
     * @throws CacheException 
     */
    public void refreshMetaData() throws CacheException {
        if (_data == null || _metaData == null) //refresh only for existing data
            throw new CacheException("Cannot refresh meta data because there is no data or meta data. ");
        //we need to throw an exception, otherwise input/output format cannot be inferred

        MatrixCharacteristics mc = ((MatrixDimensionsMetaData) _metaData).getMatrixCharacteristics();
        mc.setDimension(_data.getNumRows(), _data.getNumColumns());
        mc.setNonZeros(_data.getNonZeros());
    }

    public void setFileFormatProperties(FileFormatProperties formatProperties) {
        _formatProperties = formatProperties;
    }

    public FileFormatProperties getFileFormatProperties() {
        return _formatProperties;
    }

    public boolean isFileExists() {
        return _hdfsFileExists;
    }

    public void setFileExists(boolean flag) {
        _hdfsFileExists = flag;
    }

    public String getFileName() {
        return _hdfsFileName;
    }

    public synchronized void setFileName(String file) {
        if (!_hdfsFileName.equals(file)) {
            _hdfsFileName = file;
            if (!isEmpty(true))
                _dirtyFlag = true;
        }
    }

    /**
     * 
     * @return
     */
    public long getNumRows() {
        MatrixDimensionsMetaData meta = (MatrixDimensionsMetaData) _metaData;
        MatrixCharacteristics mc = meta.getMatrixCharacteristics();
        return mc.getRows();
    }

    /**
     * 
     * @return
     */
    public long getNumColumns() {
        MatrixDimensionsMetaData meta = (MatrixDimensionsMetaData) _metaData;
        MatrixCharacteristics mc = meta.getMatrixCharacteristics();
        return mc.getCols();
    }

    /**
     * 
     * @return
     */
    public long getNumRowsPerBlock() {
        MatrixDimensionsMetaData meta = (MatrixDimensionsMetaData) _metaData;
        MatrixCharacteristics mc = meta.getMatrixCharacteristics();
        return mc.getRowsPerBlock();
    }

    /**
     * 
     * @return
     */
    public long getNumColumnsPerBlock() {
        MatrixDimensionsMetaData meta = (MatrixDimensionsMetaData) _metaData;
        MatrixCharacteristics mc = meta.getMatrixCharacteristics();
        return mc.getColsPerBlock();
    }

    /**
     * 
     * @return
     */
    public long getNnz() {
        MatrixDimensionsMetaData meta = (MatrixDimensionsMetaData) _metaData;
        MatrixCharacteristics mc = meta.getMatrixCharacteristics();
        return mc.getNonZeros();
    }

    /**
     * 
     * @return
     */
    public double getSparsity() {
        MatrixDimensionsMetaData meta = (MatrixDimensionsMetaData) _metaData;
        MatrixCharacteristics mc = meta.getMatrixCharacteristics();

        return ((double) mc.getNonZeros()) / mc.getRows() / mc.getCols();
    }

    /**
     * 
     * @return
     */
    public MatrixCharacteristics getMatrixCharacteristics() {
        MatrixDimensionsMetaData meta = (MatrixDimensionsMetaData) _metaData;
        return meta.getMatrixCharacteristics();
    }

    /**
     * <code>true</code> if the in-memory or evicted matrix may be different from
     * the matrix located at {@link #_hdfsFileName}; <code>false</code> if the two
     * matrices are supposed to be the same.
     */
    public boolean isDirty() {
        return _dirtyFlag;
    }

    public String toString() {
        StringBuilder str = new StringBuilder();
        str.append("Matrix: ");
        str.append(_hdfsFileName + ", ");
        //System.out.println(_hdfsFileName);
        if (_metaData instanceof NumItemsByEachReducerMetaData) {
            str.append("NumItemsByEachReducerMetaData");
        } else {
            try {
                MatrixFormatMetaData md = (MatrixFormatMetaData) _metaData;
                if (md != null) {
                    MatrixCharacteristics mc = ((MatrixDimensionsMetaData) _metaData).getMatrixCharacteristics();
                    str.append(mc.toString());

                    InputInfo ii = md.getInputInfo();
                    if (ii == null)
                        str.append("null");
                    else {
                        str.append(", ");
                        str.append(InputInfo.inputInfoToString(ii));
                    }
                } else {
                    str.append("null, null");
                }
            } catch (Exception ex) {
                LOG.error(ex);
            }
        }
        str.append(", ");
        str.append(isDirty() ? "dirty" : "not-dirty");

        return str.toString();
    }

    public RDDObject getRDDHandle() {
        return _rddHandle;
    }

    public void setRDDHandle(RDDObject rdd) {
        //cleanup potential old back reference
        if (_rddHandle != null)
            _rddHandle.setBackReference(null);

        //add new rdd handle
        _rddHandle = rdd;
        if (_rddHandle != null)
            rdd.setBackReference(this);
    }

    public BroadcastObject getBroadcastHandle() {
        return _bcHandle;
    }

    public void setBroadcastHandle(BroadcastObject bc) {
        //cleanup potential old back reference
        if (_bcHandle != null)
            _bcHandle.setBackReference(null);

        //add new broadcast handle
        _bcHandle = bc;
        if (_bcHandle != null)
            bc.setBackReference(this);
    }

    // *********************************************
    // ***                                       ***
    // ***    HIGH-LEVEL METHODS THAT SPECIFY    ***
    // ***   THE LOCKING AND CACHING INTERFACE   ***
    // ***                                       ***
    // *********************************************

    /**
     * Acquires a shared "read-only" lock, produces the reference to the matrix data,
     * restores the matrix to main memory, reads from HDFS if needed.
     * 
     * Synchronized because there might be parallel threads (parfor local) that
     * access the same MatrixObjectNew object (in case it was created before the loop).
     * 
     * In-Status:  EMPTY, EVICTABLE, EVICTED, READ;
     * Out-Status: READ(+1).
     * 
     * @return the matrix data reference
     * @throws CacheException 
     */
    public synchronized MatrixBlock acquireRead() throws CacheException {
        if (LOG.isTraceEnabled())
            LOG.trace("Acquire read " + _varName);
        long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;

        if (!isAvailableToRead())
            throw new CacheStatusException("MatrixObject not available to read.");

        //get object from cache
        if (_data == null)
            getCache();

        //read data from HDFS/RDD if required
        //(probe data for cache_nowrite / jvm_reuse)  
        if (isEmpty(true) && _data == null) {
            try {
                if (DMLScript.STATISTICS)
                    CacheStatistics.incrementHDFSHits();

                if (getRDDHandle() == null || getRDDHandle().allowsShortCircuitRead()) {
                    //check filename
                    if (_hdfsFileName == null)
                        throw new CacheException("Cannot read matrix for empty filename.");

                    //read matrix from hdfs
                    _data = readMatrixFromHDFS(_hdfsFileName);

                    //mark for initial local write despite read operation
                    _requiresLocalWrite = CACHING_WRITE_CACHE_ON_READ;
                } else {
                    //read matrix from rdd (incl execute pending rdd operations)
                    MutableBoolean writeStatus = new MutableBoolean();
                    _data = readMatrixFromRDD(getRDDHandle(), writeStatus);

                    //mark for initial local write (prevent repeated execution of rdd operations)
                    if (writeStatus.booleanValue())
                        _requiresLocalWrite = CACHING_WRITE_CACHE_ON_READ;
                    else
                        _requiresLocalWrite = true;
                }

                _dirtyFlag = false;
            } catch (IOException e) {
                throw new CacheIOException("Reading of " + _hdfsFileName + " (" + _varName + ") failed.", e);
            }

            _isAcquireFromEmpty = true;
        } else if (DMLScript.STATISTICS) {
            if (_data != null)
                CacheStatistics.incrementMemHits();
        }

        //cache status maintenance
        super.acquire(false, _data == null);
        updateStatusPinned(true);

        if (DMLScript.STATISTICS) {
            long t1 = System.nanoTime();
            CacheStatistics.incrementAcquireRTime(t1 - t0);
        }

        return _data;
    }

    /**
     * Acquires the exclusive "write" lock for a thread that wants to change matrix
     * cell values.  Produces the reference to the matrix data, restores the matrix
     * to main memory, reads from HDFS if needed.
     * 
     * In-Status:  EMPTY, EVICTABLE, EVICTED;
     * Out-Status: MODIFY.
     * 
     * @return the matrix data reference
     * @throws CacheException 
     */
    public synchronized MatrixBlock acquireModify() throws CacheException {
        if (LOG.isTraceEnabled())
            LOG.trace("Acquire modify " + _varName);
        long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;

        if (!isAvailableToModify())
            throw new CacheStatusException("MatrixObject not available to modify.");

        //get object from cache
        if (_data == null)
            getCache();

        //read data from HDFS if required
        if (isEmpty(true) && _data == null) {
            //check filename
            if (_hdfsFileName == null)
                throw new CacheException("Cannot read matrix for empty filename.");

            //load data
            try {
                _data = readMatrixFromHDFS(_hdfsFileName);
            } catch (IOException e) {
                throw new CacheIOException("Reading of " + _hdfsFileName + " (" + _varName + ") failed.", e);
            }
        }

        //cache status maintenance
        super.acquire(true, _data == null);
        updateStatusPinned(true);
        _dirtyFlag = true;
        _isAcquireFromEmpty = false;

        if (DMLScript.STATISTICS) {
            long t1 = System.nanoTime();
            CacheStatistics.incrementAcquireMTime(t1 - t0);
        }

        return _data;
    }

    /**
     * Acquires the exclusive "write" lock for a thread that wants to throw away the
     * old matrix data and link up with new matrix data.  Abandons the old matrix data
     * without reading it.  Sets the new matrix data reference.
        
     * In-Status:  EMPTY, EVICTABLE, EVICTED;
     * Out-Status: MODIFY.
     * 
     * @param newData : the new matrix data reference
     * @return the matrix data reference, which is the same as the argument
     * @throws CacheException 
     */
    public synchronized MatrixBlock acquireModify(MatrixBlock newData) throws CacheException {
        if (LOG.isTraceEnabled())
            LOG.trace("Acquire modify newdata " + _varName);
        long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;

        if (!isAvailableToModify())
            throw new CacheStatusException("MatrixObject not available to modify.");

        //clear old data 
        clearData();

        //cache status maintenance
        super.acquire(true, false); //no need to load evicted matrix
        _dirtyFlag = true;
        _isAcquireFromEmpty = false;

        //set references to new data
        if (newData == null)
            throw new CacheException("acquireModify with empty matrix block.");
        _data = newData;
        updateStatusPinned(true);

        if (DMLScript.STATISTICS) {
            long t1 = System.nanoTime();
            CacheStatistics.incrementAcquireMTime(t1 - t0);
        }

        return _data;
    }

    /**
     * Releases the shared ("read-only") or exclusive ("write") lock.  Updates
     * the matrix size, last-access time, metadata, etc.
     * 
     * Synchronized because there might be parallel threads (parfor local) that
     * access the same MatrixObjectNew object (in case it was created before the loop).
     * 
     * In-Status:  READ, MODIFY;
     * Out-Status: READ(-1), EVICTABLE, EMPTY.
     * 
     * @throws CacheStatusException
     */
    public synchronized void release() throws CacheException {
        if (LOG.isTraceEnabled())
            LOG.trace("Release " + _varName);
        long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;

        boolean write = false;
        if (isModify()) {
            //set flags for write
            write = true;
            _dirtyFlag = true;

            //update meta data
            refreshMetaData();
        }

        //compact empty in-memory block 
        if (_data.isEmptyBlock(false) && _data.isAllocated())
            _data.cleanupBlock(true, true);

        //cache status maintenance (pass cacheNoWrite flag)
        super.release(_isAcquireFromEmpty && !_requiresLocalWrite);
        updateStatusPinned(false);

        if (isCachingActive() //only if caching is enabled (otherwise keep everything in mem)
                && isCached(true) //not empty and not read/modify
                && !isUpdateInPlace() //pinned result variable
                && !isBelowCachingThreshold()) //min size for caching
        {
            if (write || _requiresLocalWrite) {
                //evict blob
                String filePath = getCacheFilePathAndName();
                try {
                    writeMatrix(filePath);
                } catch (Exception e) {
                    throw new CacheException("Eviction to local path " + filePath + " (" + _varName + ") failed.",
                            e);
                }
                _requiresLocalWrite = false;
            }

            //create cache
            createCache();
            _data = null;
        } else if (LOG.isTraceEnabled()) {
            LOG.trace("Var " + _varName + " not subject to caching: rows=" + _data.getNumRows() + ", cols="
                    + _data.getNumColumns() + ", state=" + getStatusAsString());
        }

        if (DMLScript.STATISTICS) {
            long t1 = System.nanoTime();
            CacheStatistics.incrementReleaseTime(t1 - t0);
        }
    }

    /**
     * Sets the matrix data reference to <code>null</code>, abandons the old matrix.
     * Makes the "envelope" empty.  Run it to finalize the matrix (otherwise the
     * evicted matrix file may remain undeleted).
     * 
     * In-Status:  EMPTY, EVICTABLE, EVICTED;
     * Out-Status: EMPTY.
     * @throws CacheException 
     */
    public synchronized void clearData() throws CacheException {
        if (LOG.isTraceEnabled())
            LOG.trace("Clear data " + _varName);

        // check if cleanup enabled and possible 
        if (!_cleanupFlag)
            return; // do nothing
        if (!isAvailableToModify())
            throw new CacheStatusException("MatrixObject (" + this.getDebugName()
                    + ") not available to modify. Status = " + this.getStatusAsString() + ".");

        // clear existing WB / FS representation (but prevent unnecessary probes)
        if (!(isEmpty(true) || (_data != null && isBelowCachingThreshold())
                || (_data != null && !isCachingActive()))) //additional condition for JMLC
            freeEvictedBlob();

        // clear the in-memory data
        _data = null;
        clearCache();

        // clear rdd/broadcast back refs
        if (_rddHandle != null)
            _rddHandle.setBackReference(null);
        if (_bcHandle != null)
            _bcHandle.setBackReference(null);

        // change object state EMPTY
        _dirtyFlag = false;
        setEmpty();
    }

    public synchronized void exportData() throws CacheException {
        exportData(-1);
    }

    /**
     * Writes, or flushes, the matrix data to HDFS.
     * 
     * In-Status:  EMPTY, EVICTABLE, EVICTED, READ;
     * Out-Status: EMPTY, EVICTABLE, EVICTED, READ.
     * 
     * @throws CacheException 
     */
    public synchronized void exportData(int replication) throws CacheException {
        exportData(_hdfsFileName, null, replication, null);
        _hdfsFileExists = true;
    }

    /**
     * 
     * @param fName
     * @param outputFormat
     * @param formatProperties
     * @throws CacheException
     */
    public synchronized void exportData(String fName, String outputFormat, FileFormatProperties formatProperties)
            throws CacheException {
        exportData(fName, outputFormat, -1, formatProperties);
    }

    /**
     * 
     * @param fName
     * @param outputFormat
     * @throws CacheException
     */
    public synchronized void exportData(String fName, String outputFormat) throws CacheException {
        exportData(fName, outputFormat, -1, null);
    }

    /**
     * Synchronized because there might be parallel threads (parfor local) that
     * access the same MatrixObjectNew object (in case it was created before the loop).
     * If all threads export the same data object concurrently it results in errors
     * because they all write to the same file. Efficiency for loops and parallel threads
     * is achieved by checking if the in-memory matrix block is dirty.
     * 
     * NOTE: MB: we do not use dfs copy from local (evicted) to HDFS because this would ignore
     * the output format and most importantly would bypass reblocking during write (which effects the
     * potential degree of parallelism). However, we copy files on HDFS if certain criteria are given.  
     * 
     * @param fName
     * @param outputFormat
     * @throws CacheException
     */
    public synchronized void exportData(String fName, String outputFormat, int replication,
            FileFormatProperties formatProperties) throws CacheException {
        if (LOG.isTraceEnabled())
            LOG.trace("Export data " + _varName + " " + fName);
        long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;

        //prevent concurrent modifications
        if (!isAvailableToRead())
            throw new CacheStatusException("MatrixObject not available to read.");

        LOG.trace("Exporting " + this.getDebugName() + " to " + fName + " in format " + outputFormat);

        boolean pWrite = false; // !fName.equals(_hdfsFileName); //persistent write flag
        if (fName.equals(_hdfsFileName)) {
            _hdfsFileExists = true;
            pWrite = false;
        } else {
            pWrite = true; // i.e., export is called from "write" instruction
        }

        //actual export (note: no direct transfer of local copy in order to ensure blocking (and hence, parallelism))
        if (isDirty() || //use dirty for skipping parallel exports
                (pWrite && !isEqualOutputFormat(outputFormat))) {
            // CASE 1: dirty in-mem matrix or pWrite w/ different format (write matrix to fname; load into memory if evicted)
            // a) get the matrix      
            if (isEmpty(true)) {
                //read data from HDFS if required (never read before), this applies only to pWrite w/ different output formats
                //note: for large rdd outputs, we compile dedicated writespinstructions (no need to handle this here) 
                try {
                    if (getRDDHandle() == null || getRDDHandle().allowsShortCircuitRead())
                        _data = readMatrixFromHDFS(_hdfsFileName);
                    else
                        _data = readMatrixFromRDD(getRDDHandle(), new MutableBoolean());
                    _dirtyFlag = false;
                } catch (IOException e) {
                    throw new CacheIOException("Reading of " + _hdfsFileName + " (" + _varName + ") failed.", e);
                }
            }
            //get object from cache
            if (_data == null)
                getCache();
            super.acquire(false, _data == null); //incl. read matrix if evicted   

            // b) write the matrix 
            try {
                writeMetaData(fName, outputFormat, formatProperties);
                writeMatrixToHDFS(fName, outputFormat, replication, formatProperties);
                if (!pWrite)
                    _dirtyFlag = false;
            } catch (Exception e) {
                throw new CacheIOException("Export to " + fName + " failed.", e);
            } finally {
                release();
            }
        } else if (pWrite) // pwrite with same output format
        {
            //CASE 2: matrix already in same format but different file on hdfs (copy matrix to fname)
            try {
                MapReduceTool.deleteFileIfExistOnHDFS(fName);
                MapReduceTool.deleteFileIfExistOnHDFS(fName + ".mtd");
                if (getRDDHandle() == null || getRDDHandle().allowsShortCircuitRead())
                    MapReduceTool.copyFileOnHDFS(_hdfsFileName, fName);
                else //write might trigger rdd operations and nnz maintenance
                    writeMatrixFromRDDtoHDFS(getRDDHandle(), fName, outputFormat);
                writeMetaData(fName, outputFormat, formatProperties);
            } catch (Exception e) {
                throw new CacheIOException("Export to " + fName + " failed.", e);
            }
        } else if (getRDDHandle() != null && //pending rdd operation
                !getRDDHandle().allowsShortCircuitRead()) {
            //CASE 3: pending rdd operation (other than checkpoints)
            try {
                writeMatrixFromRDDtoHDFS(getRDDHandle(), fName, outputFormat);
                writeMetaData(fName, outputFormat, formatProperties);
            } catch (Exception e) {
                throw new CacheIOException("Export to " + fName + " failed.", e);
            }
        } else {
            //CASE 4: data already in hdfs (do nothing, no need for export)
            LOG.trace(this.getDebugName() + ": Skip export to hdfs since data already exists.");
        }

        if (DMLScript.STATISTICS) {
            long t1 = System.nanoTime();
            CacheStatistics.incrementExportTime(t1 - t0);
        }
    }

    /**
     * 
     * @param fName
     * @param outputFormat
     * @return
     * @throws CacheIOException
     */
    public synchronized boolean moveData(String fName, String outputFormat) throws CacheIOException {
        boolean ret = false;

        try {
            //ensure input file is persistent on hdfs (pending RDD operations), 
            //file might have been written during export or collect via write/read
            if (getRDDHandle() != null && !MapReduceTool.existsFileOnHDFS(_hdfsFileName)) {
                writeMatrixFromRDDtoHDFS(getRDDHandle(), _hdfsFileName, outputFormat);
            }

            //export or rename to target file on hdfs
            if (isDirty() || (!isEqualOutputFormat(outputFormat) && isEmpty(true))) {
                exportData(fName, outputFormat);
                ret = true;
            } else if (isEqualOutputFormat(outputFormat)) {
                MapReduceTool.deleteFileIfExistOnHDFS(fName);
                MapReduceTool.deleteFileIfExistOnHDFS(fName + ".mtd");
                MapReduceTool.renameFileOnHDFS(_hdfsFileName, fName);
                writeMetaData(fName, outputFormat, null);
                ret = true;
            }
        } catch (Exception e) {
            throw new CacheIOException("Move to " + fName + " failed.", e);
        }

        return ret;
    }

    // *********************************************
    // ***                                       ***
    // ***       HIGH-LEVEL PUBLIC METHODS       ***
    // ***     FOR PARTITIONED MATRIX ACCESS     ***
    // ***   (all other methods still usable)    ***
    // ***                                       ***
    // *********************************************

    /**
     * @param n 
     * 
     */
    public void setPartitioned(PDataPartitionFormat format, int n) {
        _partitioned = true;
        _partitionFormat = format;
        _partitionSize = n;
    }

    public void unsetPartitioned() {
        _partitioned = false;
        _partitionFormat = null;
        _partitionSize = -1;
    }

    /**
     * 
     * @return
     */
    public boolean isPartitioned() {
        return _partitioned;
    }

    public PDataPartitionFormat getPartitionFormat() {
        return _partitionFormat;
    }

    public int getPartitionSize() {
        return _partitionSize;
    }

    public synchronized void setInMemoryPartition(MatrixBlock block) {
        _partitionInMemory = block;
    }

    /**
     * NOTE: for reading matrix partitions, we could cache (in its real sense) the read block
     * with soft references (no need for eviction, as partitioning only applied for read-only matrices).
     * However, since we currently only support row- and column-wise partitioning caching is not applied yet.
     * This could be changed once we also support column-block-wise and row-block-wise. Furthermore,
     * as we reject to partition vectors and support only full row or column indexing, no metadata (apart from
     * the partition flag) is required.  
     * 
     * @param pred
     * @return
     * @throws CacheException
     */
    public synchronized MatrixBlock readMatrixPartition(IndexRange pred) throws CacheException {
        if (LOG.isTraceEnabled())
            LOG.trace("Acquire partition " + _varName + " " + pred);
        long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;

        if (!_partitioned)
            throw new CacheStatusException("MatrixObject not available to indexed read.");

        //return static partition of set from outside of the program
        if (_partitionInMemory != null)
            return _partitionInMemory;

        MatrixBlock mb = null;

        try {
            boolean blockwise = (_partitionFormat == PDataPartitionFormat.ROW_BLOCK_WISE
                    || _partitionFormat == PDataPartitionFormat.COLUMN_BLOCK_WISE);

            //preparations for block wise access
            MatrixFormatMetaData iimd = (MatrixFormatMetaData) _metaData;
            MatrixCharacteristics mc = iimd.getMatrixCharacteristics();
            int brlen = mc.getRowsPerBlock();
            int bclen = mc.getColsPerBlock();

            //get filename depending on format
            String fname = getPartitionFileName(pred, brlen, bclen);

            //probe cache
            if (blockwise && _partitionCacheName != null && _partitionCacheName.equals(fname)) {
                mb = _cache.get(); //try getting block from cache
            }

            if (mb == null) //block not in cache
            {
                //get rows and cols
                long rows = -1;
                long cols = -1;
                switch (_partitionFormat) {
                case ROW_WISE:
                    rows = 1;
                    cols = mc.getCols();
                    break;
                case ROW_BLOCK_WISE:
                    rows = brlen;
                    cols = mc.getCols();
                    break;
                case COLUMN_WISE:
                    rows = mc.getRows();
                    cols = 1;
                    break;
                case COLUMN_BLOCK_WISE:
                    rows = mc.getRows();
                    cols = bclen;
                    break;
                default:
                    throw new CacheException("Unsupported partition format: " + _partitionFormat);
                }

                //read the 
                if (MapReduceTool.existsFileOnHDFS(fname))
                    mb = readMatrixFromHDFS(fname, rows, cols);
                else {
                    mb = new MatrixBlock((int) rows, (int) cols, true);
                    LOG.warn("Reading empty matrix partition " + fname);
                }
            }

            //post processing
            if (blockwise) {
                //put block into cache
                _partitionCacheName = fname;
                _cache = new SoftReference<MatrixBlock>(mb);

                if (_partitionFormat == PDataPartitionFormat.ROW_BLOCK_WISE) {
                    int rix = (int) ((pred.rowStart - 1) % brlen);
                    mb = mb.sliceOperations(rix, rix, (int) (pred.colStart - 1), (int) (pred.colEnd - 1),
                            new MatrixBlock());
                }
                if (_partitionFormat == PDataPartitionFormat.COLUMN_BLOCK_WISE) {
                    int cix = (int) ((pred.colStart - 1) % bclen);
                    mb = mb.sliceOperations((int) (pred.rowStart - 1), (int) (pred.rowEnd - 1), cix, cix,
                            new MatrixBlock());
                }
            }

            //NOTE: currently no special treatment of non-existing partitions necessary 
            //      because empty blocks are written anyway
        } catch (Exception ex) {
            throw new CacheException(ex);
        }

        if (DMLScript.STATISTICS) {
            long t1 = System.nanoTime();
            CacheStatistics.incrementAcquireRTime(t1 - t0);
        }

        return mb;
    }

    /**
     * 
     * @param pred
     * @return
     * @throws CacheStatusException 
     */
    public String getPartitionFileName(IndexRange pred, int brlen, int bclen) throws CacheStatusException {
        if (!_partitioned)
            throw new CacheStatusException("MatrixObject not available to indexed read.");

        StringBuilder sb = new StringBuilder();
        sb.append(_hdfsFileName);

        switch (_partitionFormat) {
        case ROW_WISE:
            sb.append(Lop.FILE_SEPARATOR);
            sb.append(pred.rowStart);
            break;
        case ROW_BLOCK_WISE:
            sb.append(Lop.FILE_SEPARATOR);
            sb.append((pred.rowStart - 1) / brlen + 1);
            break;
        case COLUMN_WISE:
            sb.append(Lop.FILE_SEPARATOR);
            sb.append(pred.colStart);
            break;
        case COLUMN_BLOCK_WISE:
            sb.append(Lop.FILE_SEPARATOR);
            sb.append((pred.colStart - 1) / bclen + 1);
            break;
        default:
            throw new CacheStatusException("MatrixObject not available to indexed read.");
        }

        return sb.toString();
    }

    // *********************************************
    // ***                                       ***
    // ***      LOW-LEVEL PROTECTED METHODS      ***
    // ***         EXTEND CACHEABLE DATA         ***
    // ***     ONLY CALLED BY THE SUPERCLASS     ***
    // ***                                       ***
    // *********************************************

    @Override
    protected boolean isBlobPresent() {
        return (_data != null);
    }

    @Override
    protected void evictBlobFromMemory(MatrixBlock mb) throws CacheIOException {
        throw new CacheIOException("Redundant explicit eviction.");
    }

    @Override
    protected void restoreBlobIntoMemory() throws CacheIOException {
        long begin = 0;

        if (LOG.isTraceEnabled()) {
            LOG.trace("RESTORE of Matrix " + _varName + ", " + _hdfsFileName);
            begin = System.currentTimeMillis();
        }

        String filePath = getCacheFilePathAndName();

        if (LOG.isTraceEnabled())
            LOG.trace("CACHE: Restoring matrix...  " + _varName + "  HDFS path: "
                    + (_hdfsFileName == null ? "null" : _hdfsFileName) + ", Restore from path: " + filePath);

        if (_data != null)
            throw new CacheIOException(filePath + " : Cannot restore on top of existing in-memory data.");

        try {
            _data = readMatrix(filePath);
        } catch (IOException e) {
            throw new CacheIOException(filePath + " : Restore failed.", e);
        }

        //check for success
        if (_data == null)
            throw new CacheIOException(filePath + " : Restore failed.");

        if (LOG.isTraceEnabled())
            LOG.trace("Restoring matrix - COMPLETED ... " + (System.currentTimeMillis() - begin) + " msec.");
    }

    @Override
    protected void freeEvictedBlob() {
        String cacheFilePathAndName = getCacheFilePathAndName();
        long begin = 0;
        if (LOG.isTraceEnabled()) {
            LOG.trace("CACHE: Freeing evicted matrix...  " + _varName + "  HDFS path: "
                    + (_hdfsFileName == null ? "null" : _hdfsFileName) + " Eviction path: " + cacheFilePathAndName);
            begin = System.currentTimeMillis();
        }

        LazyWriteBuffer.deleteMatrix(cacheFilePathAndName);

        if (LOG.isTraceEnabled())
            LOG.trace("Freeing evicted matrix - COMPLETED ... " + (System.currentTimeMillis() - begin) + " msec.");
    }

    @Override
    protected boolean isBelowCachingThreshold() {
        long rlen = _data.getNumRows();
        long clen = _data.getNumColumns();
        long nnz = _data.getNonZeros();

        //get in-memory size (assume dense, if nnz unknown)
        double sparsity = OptimizerUtils.getSparsity(rlen, clen, nnz);
        double size = MatrixBlock.estimateSizeInMemory(rlen, clen, sparsity);

        return (!_data.isAllocated() || size <= CACHING_THRESHOLD);
    }

    // *******************************************
    // ***                                     ***
    // ***      LOW-LEVEL PRIVATE METHODS      ***
    // ***           FOR MATRIX I/O            ***
    // ***                                     ***
    // *******************************************

    private boolean isUpdateInPlace() {
        return _updateInPlaceFlag;
    }

    /**
     * 
     */
    private String getCacheFilePathAndName() {
        if (_cacheFileName == null) {
            StringBuilder sb = new StringBuilder();
            sb.append(CacheableData.cacheEvictionLocalFilePath);
            sb.append(CacheableData.cacheEvictionLocalFilePrefix);
            sb.append(String.format("%09d", getUniqueCacheID()));
            sb.append(CacheableData.cacheEvictionLocalFileExtension);

            _cacheFileName = sb.toString();
        }

        return _cacheFileName;
    }

    /**
     * 
     * @param filePathAndName
     * @return
     * @throws IOException
     */
    private MatrixBlock readMatrix(String filePathAndName) throws IOException {
        return LazyWriteBuffer.readMatrix(filePathAndName);
    }

    /**
     * 
     * @param filePathAndName
     * @return
     * @throws IOException
     */
    private MatrixBlock readMatrixFromHDFS(String filePathAndName) throws IOException {
        MatrixFormatMetaData iimd = (MatrixFormatMetaData) _metaData;
        MatrixCharacteristics mc = iimd.getMatrixCharacteristics();
        return readMatrixFromHDFS(filePathAndName, mc.getRows(), mc.getCols());
    }

    /**
     * 
     * @param rdd
     * @return
     * @throws IOException 
     */
    private MatrixBlock readMatrixFromRDD(RDDObject rdd, MutableBoolean writeStatus) throws IOException {
        //note: the read of a matrix block from an RDD might trigger
        //lazy evaluation of pending transformations.
        RDDObject lrdd = rdd;

        //prepare return status (by default only collect)
        writeStatus.setValue(false);

        MatrixFormatMetaData iimd = (MatrixFormatMetaData) _metaData;
        MatrixCharacteristics mc = iimd.getMatrixCharacteristics();
        MatrixBlock mb = null;
        try {
            //prevent unnecessary collect through rdd checkpoint
            if (rdd.allowsShortCircuitCollect()) {
                lrdd = (RDDObject) rdd.getLineageChilds().get(0);
            }

            //obtain matrix block from RDD
            int rlen = (int) mc.getRows();
            int clen = (int) mc.getCols();
            int brlen = (int) mc.getRowsPerBlock();
            int bclen = (int) mc.getColsPerBlock();
            long nnz = mc.getNonZeros();

            //guarded rdd collect 
            if (!OptimizerUtils.checkSparkCollectMemoryBudget(rlen, clen, brlen, bclen, nnz, sizePinned.get())) {
                //write RDD to hdfs and read to prevent invalid collect mem consumption 
                //note: lazy, partition-at-a-time collect (toLocalIterator) was significantly slower
                if (!MapReduceTool.existsFileOnHDFS(_hdfsFileName)) { //prevent overwrite existing file
                    long newnnz = SparkExecutionContext.writeRDDtoHDFS(lrdd, _hdfsFileName, iimd.getOutputInfo());
                    ((MatrixDimensionsMetaData) _metaData).getMatrixCharacteristics().setNonZeros(newnnz);
                    ((RDDObject) rdd).setHDFSFile(true); //mark rdd as hdfs file (for restore)
                    writeStatus.setValue(true); //mark for no cache-write on read
                }
                mb = readMatrixFromHDFS(_hdfsFileName);
            } else {
                //collect matrix block from RDD
                mb = SparkExecutionContext.toMatrixBlock(lrdd, rlen, clen, brlen, bclen, nnz);
            }
        } catch (DMLRuntimeException ex) {
            throw new IOException(ex);
        }

        //sanity check correct output
        if (mb == null) {
            throw new IOException("Unable to load matrix from rdd: " + lrdd.getVarName());
        }

        return mb;
    }

    /**
     * 
     * @param rdd
     * @param fname
     * @param outputFormat
     * @throws DMLRuntimeException 
     */
    private void writeMatrixFromRDDtoHDFS(RDDObject rdd, String fname, String outputFormat)
            throws DMLRuntimeException {
        //prepare output info
        MatrixFormatMetaData iimd = (MatrixFormatMetaData) _metaData;
        OutputInfo oinfo = (outputFormat != null ? OutputInfo.stringToOutputInfo(outputFormat)
                : InputInfo.getMatchingOutputInfo(iimd.getInputInfo()));

        //note: the write of an RDD to HDFS might trigger
        //lazy evaluation of pending transformations.            
        long newnnz = SparkExecutionContext.writeRDDtoHDFS(rdd, fname, oinfo);
        ((MatrixDimensionsMetaData) _metaData).getMatrixCharacteristics().setNonZeros(newnnz);
    }

    /**
     * 
     * @param filePathAndName
     * @param rlen
     * @param clen
     * @return
     * @throws IOException
     */
    private MatrixBlock readMatrixFromHDFS(String filePathAndName, long rlen, long clen) throws IOException {
        long begin = 0;

        MatrixFormatMetaData iimd = (MatrixFormatMetaData) _metaData;
        MatrixCharacteristics mc = iimd.getMatrixCharacteristics();

        if (LOG.isTraceEnabled()) {
            LOG.trace("Reading matrix from HDFS...  " + _varName + "  Path: " + filePathAndName + ", dimensions: ["
                    + mc.getRows() + ", " + mc.getCols() + ", " + mc.getNonZeros() + "]");
            begin = System.currentTimeMillis();
        }

        double sparsity = (mc.getNonZeros() >= 0 ? ((double) mc.getNonZeros()) / (mc.getRows() * mc.getCols())
                : 1.0d); //expected sparsity
        MatrixBlock newData = DataConverter.readMatrixFromHDFS(filePathAndName, iimd.getInputInfo(), rlen, clen,
                mc.getRowsPerBlock(), mc.getColsPerBlock(), sparsity, _formatProperties);

        //sanity check correct output
        if (newData == null) {
            throw new IOException("Unable to load matrix from file: " + filePathAndName);
        }

        if (LOG.isTraceEnabled())
            LOG.trace("Reading Completed: " + (System.currentTimeMillis() - begin) + " msec.");

        return newData;
    }

    /**
     * 
     * @param filePathAndName
     * @throws DMLRuntimeException
     * @throws IOException
     */
    private void writeMatrix(String filePathAndName) throws DMLRuntimeException, IOException {
        LazyWriteBuffer.writeMatrix(filePathAndName, _data);
    }

    /**
     * Writes in-memory matrix to HDFS in a specified format.
     * 
     * @throws DMLRuntimeException
     * @throws IOException
     */
    private void writeMatrixToHDFS(String filePathAndName, String outputFormat, int replication,
            FileFormatProperties formatProperties) throws DMLRuntimeException, IOException {
        long begin = 0;
        if (LOG.isTraceEnabled()) {
            LOG.trace(" Writing matrix to HDFS...  " + _varName + "  Path: " + filePathAndName + ", Format: "
                    + (outputFormat != null ? outputFormat : "inferred from metadata"));
            begin = System.currentTimeMillis();
        }

        MatrixFormatMetaData iimd = (MatrixFormatMetaData) _metaData;

        if (_data != null) {
            // Get the dimension information from the metadata stored within MatrixObject
            MatrixCharacteristics mc = iimd.getMatrixCharacteristics();
            // Write the matrix to HDFS in requested format
            OutputInfo oinfo = (outputFormat != null ? OutputInfo.stringToOutputInfo(outputFormat)
                    : InputInfo.getMatchingOutputInfo(iimd.getInputInfo()));

            // when outputFormat is binaryblock, make sure that matrixCharacteristics has correct blocking dimensions
            // note: this is only required if singlenode (due to binarycell default) 
            if (oinfo == OutputInfo.BinaryBlockOutputInfo && DMLScript.rtplatform == RUNTIME_PLATFORM.SINGLE_NODE
                    && (mc.getRowsPerBlock() != DMLTranslator.DMLBlockSize
                            || mc.getColsPerBlock() != DMLTranslator.DMLBlockSize)) {
                DataConverter.writeMatrixToHDFS(
                        _data, filePathAndName, oinfo, new MatrixCharacteristics(mc.getRows(), mc.getCols(),
                                DMLTranslator.DMLBlockSize, DMLTranslator.DMLBlockSize, mc.getNonZeros()),
                        replication, formatProperties);
            } else {
                DataConverter.writeMatrixToHDFS(_data, filePathAndName, oinfo, mc, replication, formatProperties);
            }

            if (LOG.isTraceEnabled())
                LOG.trace("Writing matrix to HDFS (" + filePathAndName + ") - COMPLETED... "
                        + (System.currentTimeMillis() - begin) + " msec.");
        } else if (LOG.isTraceEnabled()) {
            LOG.trace("Writing matrix to HDFS (" + filePathAndName + ") - NOTHING TO WRITE (_data == null).");
        }

        if (DMLScript.STATISTICS)
            CacheStatistics.incrementHDFSWrites();
    }

    /**
     * 
     * @param filePathAndName
     * @param outputFormat
     * @throws DMLRuntimeException
     * @throws IOException
     */
    private void writeMetaData(String filePathAndName, String outputFormat, FileFormatProperties formatProperties)
            throws DMLRuntimeException, IOException {
        MatrixFormatMetaData iimd = (MatrixFormatMetaData) _metaData;

        if (iimd != null) {
            // Write the matrix to HDFS in requested format         
            OutputInfo oinfo = (outputFormat != null ? OutputInfo.stringToOutputInfo(outputFormat)
                    : InputInfo.getMatchingOutputInfo(iimd.getInputInfo()));

            if (oinfo != OutputInfo.MatrixMarketOutputInfo) {
                // Get the dimension information from the metadata stored within MatrixObject
                MatrixCharacteristics mc = iimd.getMatrixCharacteristics();

                // when outputFormat is binaryblock, make sure that matrixCharacteristics has correct blocking dimensions
                // note: this is only required if singlenode (due to binarycell default) 
                if (oinfo == OutputInfo.BinaryBlockOutputInfo
                        && DMLScript.rtplatform == RUNTIME_PLATFORM.SINGLE_NODE
                        && (mc.getRowsPerBlock() != DMLTranslator.DMLBlockSize
                                || mc.getColsPerBlock() != DMLTranslator.DMLBlockSize)) {
                    mc = new MatrixCharacteristics(mc.getRows(), mc.getCols(), DMLTranslator.DMLBlockSize,
                            DMLTranslator.DMLBlockSize, mc.getNonZeros());
                }
                MapReduceTool.writeMetaDataFile(filePathAndName + ".mtd", valueType, mc, oinfo, formatProperties);
            }
        } else {
            throw new DMLRuntimeException(
                    "Unexpected error while writing mtd file (" + filePathAndName + ") -- metadata is null.");
        }
    }

    /**
     * 
     * @param outputFormat
     * @return
     */
    private boolean isEqualOutputFormat(String outputFormat) {
        boolean ret = true;

        if (outputFormat != null) {
            try {
                MatrixFormatMetaData iimd = (MatrixFormatMetaData) _metaData;
                OutputInfo oi1 = InputInfo.getMatchingOutputInfo(iimd.getInputInfo());
                OutputInfo oi2 = OutputInfo.stringToOutputInfo(outputFormat);
                if (oi1 != oi2) {
                    ret = false;
                }
            } catch (Exception ex) {
                ret = false;
            }
        }

        return ret;
    }

    @Override
    public synchronized String getDebugName() {
        int maxLength = 23;
        String debugNameEnding = (_hdfsFileName == null ? "null"
                : (_hdfsFileName.length() < maxLength ? _hdfsFileName
                        : "..." + _hdfsFileName.substring(_hdfsFileName.length() - maxLength + 3)));
        return _varName + " " + debugNameEnding;
    }

    // *******************************************
    // ***                                     ***
    // ***      LOW-LEVEL PRIVATE METHODS      ***
    // ***       FOR SOFTREFERENCE CACHE       ***
    // ***                                     ***
    // *******************************************

    /**
     * 
     */
    private void createCache() {
        _cache = new SoftReference<MatrixBlock>(_data);
    }

    /**
     * 
     */
    private void getCache() {
        if (_cache != null) {
            _data = _cache.get();
            clearCache();
        }
    }

    /**
     * 
     */
    private void clearCache() {
        if (_cache != null) {
            _cache.clear();
            _cache = null;
        }
    }

    /**
     * 
     * @param add
     */
    private void updateStatusPinned(boolean add) {
        if (_data != null) { //data should never be null
            long size = sizePinned.get();
            size += (add ? 1 : -1) * _data.getSizeInMemory();
            sizePinned.set(Math.max(size, 0));
        }
    }

    /**
     * see clear data
     * 
     * @param flag
     */
    public void enableCleanup(boolean flag) {
        _cleanupFlag = flag;
    }

    /**
     * see clear data
     * 
     * @return
     */
    public boolean isCleanupEnabled() {
        return _cleanupFlag;
    }

    /**
     * 
     * @param flag
     */
    public void enableUpdateInPlace(boolean flag) {
        _updateInPlaceFlag = flag;
    }

    /**
     * 
     * @return
     */
    public boolean isUpdateInPlaceEnabled() {
        return _updateInPlaceFlag;
    }

    /**
     * 
     */
    public void setEmptyStatus() {
        setEmpty();
    }
}