org.apache.sysml.runtime.matrix.data.FrameBlock.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.sysml.runtime.matrix.data.FrameBlock.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.sysml.runtime.matrix.data;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.Externalizable;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.io.Serializable;
import java.lang.ref.SoftReference;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

import org.apache.commons.lang.ArrayUtils;
import org.apache.hadoop.io.Writable;
import org.apache.sysml.lops.Lop;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.caching.CacheBlock;
import org.apache.sysml.runtime.io.IOUtilFunctions;
import org.apache.sysml.runtime.util.IndexRange;
import org.apache.sysml.runtime.util.UtilFunctions;

@SuppressWarnings({ "rawtypes", "unchecked" }) //allow generic native arrays
public class FrameBlock implements Writable, CacheBlock, Externalizable {
    private static final long serialVersionUID = -3993450030207130665L;

    public static final int BUFFER_SIZE = 1 * 1000 * 1000; //1M elements, size of default matrix block 

    //internal configuration
    private static final boolean REUSE_RECODE_MAPS = true;

    /** The number of rows of the FrameBlock */
    private int _numRows = -1;

    /** The schema of the data frame as an ordered list of value types */
    private ValueType[] _schema = null;

    /** The column names of the data frame as an ordered list of strings, allocated on-demand */
    private String[] _colnames = null;

    private ColumnMetadata[] _colmeta = null;

    /** The data frame data as an ordered list of columns */
    private Array[] _coldata = null;

    /** Cache for recode maps from frame meta data, indexed by column 0-based */
    private Map<Integer, SoftReference<HashMap<String, Long>>> _rcdMapCache = null;

    public FrameBlock() {
        _numRows = 0;
        if (REUSE_RECODE_MAPS)
            _rcdMapCache = new HashMap<Integer, SoftReference<HashMap<String, Long>>>();
    }

    /**
     * Copy constructor for frame blocks, which uses a shallow copy for
     * the schema (column types and names) but a deep copy for meta data 
     * and actual column data.
     * 
     * @param that frame block
     */
    public FrameBlock(FrameBlock that) {
        this(that.getSchema(), that.getColumnNames(false));
        copy(that);
        setColumnMetadata(that.getColumnMetadata());
    }

    public FrameBlock(int ncols, ValueType vt) {
        this();
        _schema = UtilFunctions.nCopies(ncols, vt);
        _colnames = null; //default not materialized
        _colmeta = new ColumnMetadata[ncols];
        for (int j = 0; j < ncols; j++)
            _colmeta[j] = new ColumnMetadata(0);
    }

    public FrameBlock(ValueType[] schema) {
        this(schema, new String[0][]);
    }

    public FrameBlock(ValueType[] schema, String[] names) {
        this(schema, names, new String[0][]);
    }

    public FrameBlock(ValueType[] schema, String[][] data) {
        //default column names not materialized
        this(schema, null, data);
    }

    public FrameBlock(ValueType[] schema, String[] names, String[][] data) {
        _numRows = 0; //maintained on append
        _schema = schema;
        _colnames = names;
        _colmeta = new ColumnMetadata[_schema.length];
        for (int j = 0; j < _schema.length; j++)
            _colmeta[j] = new ColumnMetadata(0);
        for (int i = 0; i < data.length; i++)
            appendRow(data[i]);
        if (REUSE_RECODE_MAPS)
            _rcdMapCache = new HashMap<Integer, SoftReference<HashMap<String, Long>>>();
    }

    /**
     * Get the number of rows of the frame block.
     * 
     * @return number of rows
     */
    public int getNumRows() {
        return _numRows;
    }

    public void setNumRows(int numRows) {
        _numRows = numRows;
    }

    /**
     * Get the number of columns of the frame block, that is
     * the number of columns defined in the schema.
     * 
     * @return number of columns
     */
    public int getNumColumns() {
        return (_schema != null) ? _schema.length : 0;
    }

    /**
     * Returns the schema of the frame block.
     * 
     * @return schema as array of ValueTypes
     */
    public ValueType[] getSchema() {
        return _schema;
    }

    /**
     * Sets the schema of the frame block.
     * 
     * @param schema schema as array of ValueTypes
     */
    public void setSchema(ValueType[] schema) {
        _schema = schema;
    }

    /**
     * Returns the column names of the frame block. This method 
     * allocates default column names if required.
     * 
     * @return column names
     */
    public String[] getColumnNames() {
        return getColumnNames(true);
    }

    /**
     * Returns the column names of the frame block. This method 
     * allocates default column names if required.
     * 
     * @param alloc if true, create column names
     * @return array of column names
     */
    public String[] getColumnNames(boolean alloc) {
        if (_colnames == null && alloc)
            _colnames = createColNames(getNumColumns());
        return _colnames;
    }

    /**
     * Returns the column name for the requested column. This 
     * method allocates default column names if required.
     * 
     * @param c column index
     * @return column name
     */
    public String getColumnName(int c) {
        if (_colnames == null)
            _colnames = createColNames(getNumColumns());
        return _colnames[c];
    }

    public void setColumnNames(String[] colnames) {
        _colnames = colnames;
    }

    public ColumnMetadata[] getColumnMetadata() {
        return _colmeta;
    }

    public ColumnMetadata getColumnMetadata(int c) {
        return _colmeta[c];
    }

    public boolean isColumnMetadataDefault() {
        boolean ret = true;
        for (int j = 0; j < getNumColumns() && ret; j++)
            ret &= isColumnMetadataDefault(j);
        return ret;
    }

    public boolean isColumnMetadataDefault(int c) {
        return _colmeta[c].getMvValue() == null && _colmeta[c].getNumDistinct() == 0;
    }

    public void setColumnMetadata(ColumnMetadata[] colmeta) {
        System.arraycopy(colmeta, 0, _colmeta, 0, _colmeta.length);
    }

    public void setColumnMetadata(int c, ColumnMetadata colmeta) {
        _colmeta[c] = colmeta;
    }

    /**
     * Creates a mapping from column names to column IDs, i.e., 
     * 1-based column indexes
     * 
     * @return map of column name keys and id values
     */
    public Map<String, Integer> getColumnNameIDMap() {
        Map<String, Integer> ret = new HashMap<String, Integer>();
        for (int j = 0; j < getNumColumns(); j++)
            ret.put(getColumnName(j), j + 1);
        return ret;
    }

    /**
     * Allocate column data structures if necessary, i.e., if schema specified
     * but not all column data structures created yet.
     * 
     * @param numRows number of rows
     */
    public void ensureAllocatedColumns(int numRows) {
        //early abort if already allocated
        if (_coldata != null && _schema.length == _coldata.length)
            return;
        //allocate column meta data if necessary
        if (_colmeta == null || _schema.length != _colmeta.length) {
            _colmeta = new ColumnMetadata[_schema.length];
            for (int j = 0; j < _schema.length; j++)
                _colmeta[j] = new ColumnMetadata(0);
        }
        //allocate columns if necessary
        _coldata = new Array[_schema.length];
        for (int j = 0; j < _schema.length; j++) {
            switch (_schema[j]) {
            case STRING:
                _coldata[j] = new StringArray(new String[numRows]);
                break;
            case BOOLEAN:
                _coldata[j] = new BooleanArray(new boolean[numRows]);
                break;
            case INT:
                _coldata[j] = new LongArray(new long[numRows]);
                break;
            case DOUBLE:
                _coldata[j] = new DoubleArray(new double[numRows]);
                break;
            default:
                throw new RuntimeException("Unsupported value type: " + _schema[j]);
            }
        }
        _numRows = numRows;
    }

    /**
     * Checks for matching column sizes in case of existing columns.
     *       
     * @param newlen number of rows to compare with existing number of rows
     */
    public void ensureColumnCompatibility(int newlen) {
        if (_coldata != null && _coldata.length > 0 && _numRows != newlen)
            throw new RuntimeException("Mismatch in number of rows: " + newlen + " (expected: " + _numRows + ")");
    }

    public static String[] createColNames(int size) {
        return createColNames(0, size);
    }

    public static String[] createColNames(int off, int size) {
        String[] ret = new String[size];
        for (int i = off + 1; i <= off + size; i++)
            ret[i - off - 1] = createColName(i);
        return ret;
    }

    public static String createColName(int i) {
        return "C" + i;
    }

    public boolean isColNamesDefault() {
        boolean ret = (_colnames != null);
        for (int j = 0; j < getNumColumns() && ret; j++)
            ret &= isColNameDefault(j);
        return ret;
    }

    public boolean isColNameDefault(int i) {
        return _colnames == null || _colnames[i].equals("C" + (i + 1));
    }

    public void recomputeColumnCardinality() {
        for (int j = 0; j < getNumColumns(); j++) {
            int card = 0;
            for (int i = 0; i < getNumRows(); i++)
                card += (get(i, j) != null) ? 1 : 0;
            _colmeta[j].setNumDistinct(card);
        }
    }

    ///////
    // basic get and set functionality

    /**
     * Gets a boxed object of the value in position (r,c).
     * 
     * @param r   row index, 0-based
     * @param c   column index, 0-based
     * @return object of the value at specified position
     */
    public Object get(int r, int c) {
        return _coldata[c].get(r);
    }

    /**
     * Sets the value in position (r,c), where the input is assumed
     * to be a boxed object consistent with the schema definition.
     * 
     * @param r row index
     * @param c column index
     * @param val value to set at specified position
     */
    public void set(int r, int c, Object val) {
        _coldata[c].set(r, UtilFunctions.objectToObject(_schema[c], val));
    }

    public void reset(int nrow, boolean clearMeta) {
        if (clearMeta) {
            _schema = null;
            _colnames = null;
            if (_colmeta != null) {
                for (int i = 0; i < _colmeta.length; i++)
                    if (!isColumnMetadataDefault(i))
                        _colmeta[i] = new ColumnMetadata(0);
            }
        }
        if (_coldata != null) {
            for (int i = 0; i < _coldata.length; i++)
                _coldata[i]._size = nrow;
        }
    }

    public void reset() {
        reset(0, true);
    }

    /**
     * Append a row to the end of the data frame, where all row fields
     * are boxed objects according to the schema.
     * 
     * @param row array of objects
     */
    public void appendRow(Object[] row) {
        ensureAllocatedColumns(0);
        for (int j = 0; j < row.length; j++)
            _coldata[j].append(row[j]);
        _numRows++;
    }

    /**
     * Append a row to the end of the data frame, where all row fields
     * are string encoded.
     * 
     * @param row array of strings
     */
    public void appendRow(String[] row) {
        ensureAllocatedColumns(0);
        for (int j = 0; j < row.length; j++)
            _coldata[j].append(row[j]);
        _numRows++;
    }

    /**
     * Append a column of value type STRING as the last column of 
     * the data frame. The given array is wrapped but not copied 
     * and hence might be updated in the future.
     * 
     * @param col array of strings
     */
    public void appendColumn(String[] col) {
        ensureColumnCompatibility(col.length);
        String[] colnames = getColumnNames(); //before schema modification
        _colnames = (String[]) ArrayUtils.add(colnames, createColName(_schema.length));
        _schema = (ValueType[]) ArrayUtils.add(_schema, ValueType.STRING);
        _coldata = (_coldata == null) ? new Array[] { new StringArray(col) }
                : (Array[]) ArrayUtils.add(_coldata, new StringArray(col));
        _numRows = col.length;
    }

    /**
     * Append a column of value type BOOLEAN as the last column of 
     * the data frame. The given array is wrapped but not copied 
     * and hence might be updated in the future.
     * 
     * @param col array of booleans
     */
    public void appendColumn(boolean[] col) {
        ensureColumnCompatibility(col.length);
        String[] colnames = getColumnNames(); //before schema modification
        _schema = (ValueType[]) ArrayUtils.add(_schema, ValueType.BOOLEAN);
        _colnames = (String[]) ArrayUtils.add(colnames, createColName(_schema.length));
        _coldata = (_coldata == null) ? new Array[] { new BooleanArray(col) }
                : (Array[]) ArrayUtils.add(_coldata, new BooleanArray(col));
        _numRows = col.length;
    }

    /**
     * Append a column of value type INT as the last column of 
     * the data frame. The given array is wrapped but not copied 
     * and hence might be updated in the future.
     * 
     * @param col array of longs
     */
    public void appendColumn(long[] col) {
        ensureColumnCompatibility(col.length);
        String[] colnames = getColumnNames(); //before schema modification
        _schema = (ValueType[]) ArrayUtils.add(_schema, ValueType.INT);
        _colnames = (String[]) ArrayUtils.add(colnames, createColName(_schema.length));
        _coldata = (_coldata == null) ? new Array[] { new LongArray(col) }
                : (Array[]) ArrayUtils.add(_coldata, new LongArray(col));
        _numRows = col.length;
    }

    /**
     * Append a column of value type DOUBLE as the last column of 
     * the data frame. The given array is wrapped but not copied 
     * and hence might be updated in the future.
     * 
     * @param col array of doubles
     */
    public void appendColumn(double[] col) {
        ensureColumnCompatibility(col.length);
        String[] colnames = getColumnNames(); //before schema modification
        _schema = (ValueType[]) ArrayUtils.add(_schema, ValueType.DOUBLE);
        _colnames = (String[]) ArrayUtils.add(colnames, createColName(_schema.length));
        _coldata = (_coldata == null) ? new Array[] { new DoubleArray(col) }
                : (Array[]) ArrayUtils.add(_coldata, new DoubleArray(col));
        _numRows = col.length;
    }

    /**
     * Append a set of column of value type DOUBLE at the end of the frame
     * in order to avoid repeated allocation with appendColumns. The given 
     * array is wrapped but not copied and hence might be updated in the future.
     * 
     * @param cols 2d array of doubles
     */
    public void appendColumns(double[][] cols) {
        int ncol = cols.length;
        boolean empty = (_schema == null);
        ValueType[] tmpSchema = UtilFunctions.nCopies(ncol, ValueType.DOUBLE);
        Array[] tmpData = new Array[ncol];
        for (int j = 0; j < ncol; j++)
            tmpData[j] = new DoubleArray(cols[j]);
        _colnames = empty ? null
                : (String[]) ArrayUtils.addAll(getColumnNames(), createColNames(getNumColumns(), ncol)); //before schema modification
        _schema = empty ? tmpSchema : (ValueType[]) ArrayUtils.addAll(_schema, tmpSchema);
        _coldata = empty ? tmpData : (Array[]) ArrayUtils.addAll(_coldata, tmpData);
        _numRows = cols[0].length;
    }

    public Object getColumn(int c) {
        switch (_schema[c]) {
        case STRING:
            return ((StringArray) _coldata[c])._data;
        case BOOLEAN:
            return ((BooleanArray) _coldata[c])._data;
        case INT:
            return ((LongArray) _coldata[c])._data;
        case DOUBLE:
            return ((DoubleArray) _coldata[c])._data;
        default:
            return null;
        }
    }

    /**
     * Get a row iterator over the frame where all fields are encoded
     * as strings independent of their value types.  
     * 
     * @return string array iterator
     */
    public Iterator<String[]> getStringRowIterator() {
        return new StringRowIterator(0, _numRows);
    }

    /**
     * Get a row iterator over the frame where all fields are encoded
     * as strings independent of their value types.  
     * 
     * @param rl lower row index
     * @param ru upper row index
     * @return string array iterator
     */
    public Iterator<String[]> getStringRowIterator(int rl, int ru) {
        return new StringRowIterator(rl, ru);
    }

    /**
     * Get a row iterator over the frame where all fields are encoded
     * as boxed objects according to their value types.  
     * 
     * @return object array iterator
     */
    public Iterator<Object[]> getObjectRowIterator() {
        return new ObjectRowIterator(0, _numRows);
    }

    /**
     * Get a row iterator over the frame where all fields are encoded
     * as boxed objects according to their value types.  
     * 
     * @param rl lower row index
     * @param ru upper row index
     * @return object array iterator
     */
    public Iterator<Object[]> getObjectRowIterator(int rl, int ru) {
        return new ObjectRowIterator(rl, ru);
    }

    ///////
    // serialization / deserialization (implementation of writable and externalizable)

    @Override
    public void write(DataOutput out) throws IOException {
        boolean isDefaultMeta = isColNamesDefault() && isColumnMetadataDefault();
        //write header (rows, cols, default)
        out.writeInt(getNumRows());
        out.writeInt(getNumColumns());
        out.writeBoolean(isDefaultMeta);
        //write columns (value type, data)
        for (int j = 0; j < getNumColumns(); j++) {
            out.writeByte(_schema[j].ordinal());
            if (!isDefaultMeta) {
                out.writeUTF(getColumnName(j));
                out.writeLong(_colmeta[j].getNumDistinct());
                out.writeUTF((_colmeta[j].getMvValue() != null) ? _colmeta[j].getMvValue() : "");
            }
            _coldata[j].write(out);
        }
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        //read head (rows, cols)
        _numRows = in.readInt();
        int numCols = in.readInt();
        boolean isDefaultMeta = in.readBoolean();
        //allocate schema/meta data arrays
        _schema = (_schema != null && _schema.length == numCols) ? _schema : new ValueType[numCols];
        _colnames = (_colnames != null && _colnames.length == numCols) ? _colnames : new String[numCols];
        _colmeta = (_colmeta != null && _colmeta.length == numCols) ? _colmeta : new ColumnMetadata[numCols];
        _coldata = (_coldata != null && _coldata.length == numCols) ? _coldata : new Array[numCols];
        //read columns (value type, meta, data)
        for (int j = 0; j < numCols; j++) {
            ValueType vt = ValueType.values()[in.readByte()];
            String name = isDefaultMeta ? createColName(j) : in.readUTF();
            long ndistinct = isDefaultMeta ? 0 : in.readLong();
            String mvvalue = isDefaultMeta ? null : in.readUTF();
            Array arr = null;
            switch (vt) {
            case STRING:
                arr = new StringArray(new String[_numRows]);
                break;
            case BOOLEAN:
                arr = new BooleanArray(new boolean[_numRows]);
                break;
            case INT:
                arr = new LongArray(new long[_numRows]);
                break;
            case DOUBLE:
                arr = new DoubleArray(new double[_numRows]);
                break;
            default:
                throw new IOException("Unsupported value type: " + vt);
            }
            arr.readFields(in);
            _schema[j] = vt;
            _colnames[j] = name;
            _colmeta[j] = new ColumnMetadata(ndistinct, (mvvalue == null || mvvalue.isEmpty()) ? null : mvvalue);
            _coldata[j] = arr;
        }
    }

    @Override
    public void writeExternal(ObjectOutput out) throws IOException {
        //redirect serialization to writable impl
        write(out);
    }

    @Override
    public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
        //redirect deserialization to writable impl
        readFields(in);
    }

    ////////
    // CacheBlock implementation

    @Override
    public long getInMemorySize() {
        //frame block header
        long size = 16 + 4; //object, num rows

        //schema array (overhead and int entries)
        int clen = getNumColumns();
        size += 8 + 32 + clen * 4;

        //colname array (overhead and string entries)
        size += 8 + ((_colnames != null) ? 32 : 0);
        for (int j = 0; j < clen && _colnames != null; j++)
            size += getInMemoryStringSize(getColumnName(j));

        //meta data array (overhead and entries)
        size += 8 + 32;
        for (int j = 0; j < clen; j++) {
            size += 16 + 8 + 8 //object, long num distinct, ref mv 
                    + getInMemoryStringSize(_colmeta[j].getMvValue());
        }

        //data array (overhead and entries)
        size += 8 + 32 + clen * (16 + 4 + 8 + 32);
        for (int j = 0; j < clen; j++) {
            switch (_schema[j]) {
            case BOOLEAN:
                size += _numRows;
                break;
            case INT:
            case DOUBLE:
                size += 8 * _numRows;
                break;
            case STRING:
                StringArray arr = (StringArray) _coldata[j];
                for (int i = 0; i < _numRows; i++)
                    size += getInMemoryStringSize(arr.get(i));
                break;
            default: //not applicable   
            }
        }

        return size;
    }

    @Override
    public long getExactSerializedSize() {
        //header: 2xint, boolean
        long size = 9;

        //column sizes
        boolean isDefaultMeta = isColNamesDefault() && isColumnMetadataDefault();
        for (int j = 0; j < getNumColumns(); j++) {
            size += 1; //column schema
            if (!isDefaultMeta) {
                size += IOUtilFunctions.getUTFSize(getColumnName(j));
                size += 8;
                size += IOUtilFunctions.getUTFSize(_colmeta[j].getMvValue());
            }
            switch (_schema[j]) {
            case BOOLEAN:
                size += _numRows;
                break;
            case INT:
            case DOUBLE:
                size += 8 * _numRows;
                break;
            case STRING:
                StringArray arr = (StringArray) _coldata[j];
                for (int i = 0; i < _numRows; i++)
                    size += IOUtilFunctions.getUTFSize(arr.get(i));
                break;
            default: //not applicable   
            }
        }

        return size;
    }

    @Override
    public boolean isShallowSerialize() {
        //shallow serialize if non-string schema because a frame block
        //is always dense but strings have large array overhead per cell
        boolean ret = true;
        for (int j = 0; j < _schema.length && ret; j++)
            ret &= (_schema[j] != ValueType.STRING);

        return ret;
    }

    @Override
    public void compactEmptyBlock() {
        //do nothing
    }

    /**
     * Returns the in-memory size in bytes of the given string value. 
     * 
     * @param value string value
     * @return in-memory size of string value
     */
    private long getInMemoryStringSize(String value) {
        if (value == null)
            return 0;
        return 16 + 4 + 8 //object, hash, array ref
                + 32 + value.length(); //char array 
    }

    ///////
    // indexing and append operations

    public FrameBlock leftIndexingOperations(FrameBlock rhsFrame, IndexRange ixrange, FrameBlock ret)
            throws DMLRuntimeException {
        return leftIndexingOperations(rhsFrame, (int) ixrange.rowStart, (int) ixrange.rowEnd,
                (int) ixrange.colStart, (int) ixrange.colEnd, ret);
    }

    public FrameBlock leftIndexingOperations(FrameBlock rhsFrame, int rl, int ru, int cl, int cu, FrameBlock ret)
            throws DMLRuntimeException {
        // check the validity of bounds
        if (rl < 0 || rl >= getNumRows() || ru < rl || ru >= getNumRows() || cl < 0 || cu >= getNumColumns()
                || cu < cl || cu >= getNumColumns()) {
            throw new DMLRuntimeException("Invalid values for frame indexing: [" + (rl + 1) + ":" + (ru + 1) + ","
                    + (cl + 1) + ":" + (cu + 1) + "] " + "must be within frame dimensions [" + getNumRows() + ","
                    + getNumColumns() + "].");
        }

        if ((ru - rl + 1) < rhsFrame.getNumRows() || (cu - cl + 1) < rhsFrame.getNumColumns()) {
            throw new DMLRuntimeException("Invalid values for frame indexing: " + "dimensions of the source frame ["
                    + rhsFrame.getNumRows() + "x" + rhsFrame.getNumColumns() + "] "
                    + "do not match the shape of the frame specified by indices [" + (rl + 1) + ":" + (ru + 1)
                    + ", " + (cl + 1) + ":" + (cu + 1) + "].");
        }

        //allocate output frame (incl deep copy schema)
        if (ret == null)
            ret = new FrameBlock();
        ret._numRows = _numRows;
        ret._schema = _schema.clone();
        ret._colnames = (_colnames != null) ? _colnames.clone() : null;
        ret._colmeta = _colmeta.clone();
        ret._coldata = new Array[getNumColumns()];

        //copy data to output and partial overwrite w/ rhs
        for (int j = 0; j < getNumColumns(); j++) {
            Array tmp = _coldata[j].clone();
            if (j >= cl && j <= cu)
                tmp.set(rl, ru, rhsFrame._coldata[j - cl]);
            ret._coldata[j] = tmp;
        }

        return ret;
    }

    public FrameBlock sliceOperations(IndexRange ixrange, FrameBlock ret) throws DMLRuntimeException {
        return sliceOperations((int) ixrange.rowStart, (int) ixrange.rowEnd, (int) ixrange.colStart,
                (int) ixrange.colEnd, ret);
    }

    /**
     * Right indexing operations to slice a subframe out of this frame block. 
     * Note that the existing column value types are preserved.
     * 
     * @param rl row lower index, inclusive, 0-based
     * @param ru row upper index, inclusive, 0-based
     * @param cl column lower index, inclusive, 0-based
     * @param cu column upper index, inclusive, 0-based
     * @param retCache cache block
     * @return frame block
     * @throws DMLRuntimeException if DMLRuntimeException occurs
     */
    public FrameBlock sliceOperations(int rl, int ru, int cl, int cu, CacheBlock retCache)
            throws DMLRuntimeException {
        FrameBlock ret = (FrameBlock) retCache;
        // check the validity of bounds
        if (rl < 0 || rl >= getNumRows() || ru < rl || ru >= getNumRows() || cl < 0 || cu >= getNumColumns()
                || cu < cl || cu >= getNumColumns()) {
            throw new DMLRuntimeException("Invalid values for frame indexing: [" + (rl + 1) + ":" + (ru + 1) + ","
                    + (cl + 1) + ":" + (cu + 1) + "] " + "must be within frame dimensions [" + getNumRows() + ","
                    + getNumColumns() + "]");
        }

        //allocate output frame
        if (ret == null)
            ret = new FrameBlock();
        else
            ret.reset(ru - rl + 1, true);

        //copy output schema and colnames
        int numCols = cu - cl + 1;
        boolean isDefNames = isColNamesDefault();
        ret._schema = new ValueType[numCols];
        ret._colnames = !isDefNames ? new String[numCols] : null;
        ret._colmeta = new ColumnMetadata[numCols];

        for (int j = cl; j <= cu; j++) {
            ret._schema[j - cl] = _schema[j];
            ret._colmeta[j - cl] = _colmeta[j];
            if (!isDefNames)
                ret._colnames[j - cl] = getColumnName(j);
        }
        ret._numRows = ru - rl + 1;

        //copy output data
        if (ret._coldata == null) {
            ret._coldata = new Array[numCols];
            for (int j = cl; j <= cu; j++)
                ret._coldata[j - cl] = _coldata[j].slice(rl, ru);
        } else
            for (int j = cl; j <= cu; j++)
                ret._coldata[j - cl].set(0, ru - rl, _coldata[j], rl);

        return ret;
    }

    public void sliceOperations(ArrayList<Pair<Long, FrameBlock>> outlist, IndexRange range, int rowCut) {
        FrameBlock top = null, bottom = null;
        Iterator<Pair<Long, FrameBlock>> p = outlist.iterator();

        if (range.rowStart < rowCut)
            top = (FrameBlock) p.next().getValue();

        if (range.rowEnd >= rowCut)
            bottom = (FrameBlock) p.next().getValue();

        if (getNumRows() > 0) {
            int r = (int) range.rowStart;

            for (; r < Math.min(rowCut, range.rowEnd + 1); r++) {
                Object[] row = new Object[(int) (range.colEnd - range.colStart + 1)];
                for (int c = (int) range.colStart; c < range.colEnd + 1; c++)
                    row[(int) (c - range.colStart)] = get(r, c);
                top.appendRow(row);
            }

            for (; r <= range.rowEnd; r++) {
                Object[] row = new Object[(int) (range.colEnd - range.colStart + 1)];
                for (int c = (int) range.colStart; c < range.colEnd + 1; c++)
                    row[(int) (c - range.colStart)] = get(r, c);
                bottom.appendRow(row);
            }
        }
    }

    /**
     * Appends the given argument frameblock 'that' to this frameblock by 
     * creating a deep copy to prevent side effects. For cbind, the frames
     * are appended column-wise (same number of rows), while for rbind the 
     * frames are appended row-wise (same number of columns).   
     * 
     * @param that frame block to append to current frame block
     * @param ret frame block to return, can be null
     * @param cbind if true, column append
     * @return frame block
     * @throws DMLRuntimeException if DMLRuntimeException occurs
     */
    public FrameBlock appendOperations(FrameBlock that, FrameBlock ret, boolean cbind) throws DMLRuntimeException {
        if (cbind) //COLUMN APPEND
        {
            //sanity check row dimension mismatch
            if (getNumRows() != that.getNumRows()) {
                throw new DMLRuntimeException("Incompatible number of rows for cbind: " + that.getNumRows()
                        + " (expected: " + getNumRows() + ")");
            }

            //allocate output frame
            if (ret == null)
                ret = new FrameBlock();
            ret._numRows = _numRows;

            //concatenate schemas (w/ deep copy to prevent side effects)
            ret._schema = (ValueType[]) ArrayUtils.addAll(_schema, that._schema);
            ret._colnames = (String[]) ArrayUtils.addAll(getColumnNames(), that.getColumnNames());
            ret._colmeta = (ColumnMetadata[]) ArrayUtils.addAll(_colmeta, that._colmeta);

            //concatenate column data (w/ deep copy to prevent side effects)
            ret._coldata = (Array[]) ArrayUtils.addAll(_coldata, that._coldata);
            for (int i = 0; i < ret._coldata.length; i++)
                ret._coldata[i] = ret._coldata[i].clone();
        } else //ROW APPEND
        {
            //sanity check column dimension mismatch
            if (getNumColumns() != that.getNumColumns()) {
                throw new DMLRuntimeException("Incompatible number of columns for rbind: " + that.getNumColumns()
                        + " (expected: " + getNumColumns() + ")");
            }

            //allocate output frame (incl deep copy schema)
            if (ret == null)
                ret = new FrameBlock();
            ret._numRows = _numRows;
            ret._schema = _schema.clone();
            ret._colnames = (_colnames != null) ? _colnames.clone() : null;

            //concatenate data (deep copy first, append second)
            ret._coldata = new Array[_coldata.length];
            for (int j = 0; j < _coldata.length; j++)
                ret._coldata[j] = _coldata[j].clone();
            Iterator<Object[]> iter = that.getObjectRowIterator();
            while (iter.hasNext())
                ret.appendRow(iter.next());
        }

        return ret;
    }

    public void copy(FrameBlock src) {
        copy(0, src.getNumRows() - 1, 0, src.getNumColumns() - 1, src);
    }

    public void copy(int rl, int ru, int cl, int cu, FrameBlock src) {
        //allocate columns if necessary
        ensureAllocatedColumns(ru - rl + 1);

        //copy values
        for (int j = cl; j <= cu; j++) {
            //special case: column memcopy 
            if (_schema[j].equals(src._schema[j - cl]))
                _coldata[j].set(rl, ru, src._coldata[j - cl]);
            //general case w/ schema transformation
            else
                for (int i = rl; i <= ru; i++) {
                    String tmp = src.get(i - rl, j - cl) != null ? src.get(i - rl, j - cl).toString() : null;
                    set(i, j, UtilFunctions.stringToObject(_schema[j], tmp));
                }
        }
    }

    ///////
    // transform specific functionality

    /**
     * This function will split every Recode map in the column using delimiter Lop.DATATYPE_PREFIX, 
     * as Recode map generated earlier in the form of Code+Lop.DATATYPE_PREFIX+Token and store it in a map 
     * which contains token and code for every unique tokens.
     *
     * @param col   is the column # from frame data which contains Recode map generated earlier.
     * @return map of token and code for every element in the input column of a frame containing Recode map
     */
    public HashMap<String, Long> getRecodeMap(int col) {
        //probe cache for existing map
        if (REUSE_RECODE_MAPS) {
            SoftReference<HashMap<String, Long>> tmp = _rcdMapCache.get(col);
            HashMap<String, Long> map = (tmp != null) ? tmp.get() : null;
            if (map != null)
                return map;
        }

        //construct recode map
        HashMap<String, Long> map = new HashMap<String, Long>();
        Array ldata = _coldata[col];
        for (int i = 0; i < getNumRows(); i++) {
            Object val = ldata.get(i);
            if (val != null) {
                //            String[] tmp = IOUtilFunctions.splitCSV(
                //                  val.toString(), Lop.DATATYPE_PREFIX);

                // Instead of using splitCSV which is forcing string with RFC-4180 format, using Lop.DATATYPE_PREFIX separator to split token and code 
                String[] tmp = new String[2];
                int pos = val.toString().lastIndexOf(Lop.DATATYPE_PREFIX);
                tmp[0] = val.toString().substring(0, pos);
                tmp[1] = val.toString().substring(pos + 1);
                map.put(tmp[0], Long.parseLong(tmp[1]));
            }
        }

        //put created map into cache
        if (REUSE_RECODE_MAPS) {
            _rcdMapCache.put(col, new SoftReference<HashMap<String, Long>>(map));
        }

        return map;
    }

    public void merge(CacheBlock that, boolean bDummy) throws DMLRuntimeException {
        merge((FrameBlock) that);
    }

    public void merge(FrameBlock that) throws DMLRuntimeException {
        //check for empty input source (nothing to merge)
        if (that == null || that.getNumRows() == 0)
            return;

        //check dimensions (before potentially copy to prevent implicit dimension change) 
        if (getNumRows() != that.getNumRows() || getNumColumns() != that.getNumColumns())
            throw new DMLRuntimeException("Dimension mismatch on merge disjoint (target=" + getNumRows() + "x"
                    + getNumColumns() + ", source=" + that.getNumRows() + "x" + that.getNumColumns() + ")");

        //meta data copy if necessary
        for (int j = 0; j < getNumColumns(); j++)
            if (!that.isColumnMetadataDefault(j)) {
                _colmeta[j].setNumDistinct(that._colmeta[j].getNumDistinct());
                _colmeta[j].setMvValue(that._colmeta[j].getMvValue());
            }

        //core frame block merge through cell copy
        //with column-wide access pattern
        for (int j = 0; j < getNumColumns(); j++) {
            //special case: copy non-zeros of column 
            if (_schema[j].equals(that._schema[j]))
                _coldata[j].setNz(0, _numRows - 1, that._coldata[j]);
            //general case w/ schema transformation
            else {
                for (int i = 0; i < _numRows; i++) {
                    Object obj = UtilFunctions.objectToObject(_schema[j], that.get(i, j), true);
                    if (obj != null) //merge non-zeros
                        set(i, j, obj);
                }
            }
        }
    }

    /**
     * This function ZERO OUT the data in the slicing window applicable for this block.
     * 
     * @param result frame block
     * @param range index range
     * @param complementary ?
     * @param iRowStartSrc ?
     * @param iRowStartDest ?
     * @param brlen ?
     * @param iMaxRowsToCopy ?
     * @return frame block
     * @throws DMLRuntimeException if DMLRuntimeException occurs
     */
    public FrameBlock zeroOutOperations(FrameBlock result, IndexRange range, boolean complementary,
            int iRowStartSrc, int iRowStartDest, int brlen, int iMaxRowsToCopy) throws DMLRuntimeException {
        int clen = getNumColumns();

        if (result == null)
            result = new FrameBlock(getSchema());
        else {
            result.reset(0, true);
            result.setSchema(getSchema());
        }
        result.ensureAllocatedColumns(brlen);

        if (complementary) {
            for (int r = (int) range.rowStart; r <= range.rowEnd && r + iRowStartDest < brlen; r++) {
                for (int c = (int) range.colStart; c <= range.colEnd; c++)
                    result.set(r + iRowStartDest, c, get(r + iRowStartSrc, c));
            }
        } else {
            int r = iRowStartDest;
            for (; r < (int) range.rowStart && r - iRowStartDest < iMaxRowsToCopy; r++)
                for (int c = 0; c < clen; c++/*, offset++*/)
                    result.set(r, c, get(r + iRowStartSrc - iRowStartDest, c));

            for (; r <= (int) range.rowEnd && r - iRowStartDest < iMaxRowsToCopy; r++) {
                for (int c = 0; c < (int) range.colStart; c++)
                    result.set(r, c, get(r + iRowStartSrc - iRowStartDest, c));

                for (int c = (int) range.colEnd + 1; c < clen; c++)
                    result.set(r, c, get(r + iRowStartSrc - iRowStartDest, c));
            }

            for (; r - iRowStartDest < iMaxRowsToCopy; r++)
                for (int c = 0; c < clen; c++)
                    result.set(r, c, get(r + iRowStartSrc - iRowStartDest, c));
        }

        return result;
    }

    ///////
    // row iterators (over strings and boxed objects)

    private abstract class RowIterator<T> implements Iterator<T[]> {
        protected T[] _curRow = null;
        protected int _curPos = -1;
        protected int _maxPos = -1;

        protected RowIterator(int rl, int ru) {
            _curPos = rl;
            _maxPos = ru;
            _curRow = createRow(getNumColumns());
        }

        @Override
        public boolean hasNext() {
            return (_curPos < _maxPos);
        }

        @Override
        public void remove() {
            throw new RuntimeException("RowIterator.remove is unsupported!");
        }

        protected abstract T[] createRow(int size);
    }

    private class StringRowIterator extends RowIterator<String> {
        public StringRowIterator(int rl, int ru) {
            super(rl, ru);
        }

        @Override
        protected String[] createRow(int size) {
            return new String[size];
        }

        @Override
        public String[] next() {
            for (int j = 0; j < getNumColumns(); j++) {
                Object tmp = get(_curPos, j);
                _curRow[j] = (tmp != null) ? tmp.toString() : null;
            }
            _curPos++;
            return _curRow;
        }
    }

    private class ObjectRowIterator extends RowIterator<Object> {
        public ObjectRowIterator(int rl, int ru) {
            super(rl, ru);
        }

        @Override
        protected Object[] createRow(int size) {
            return new Object[size];
        }

        @Override
        public Object[] next() {
            for (int j = 0; j < getNumColumns(); j++)
                _curRow[j] = get(_curPos, j);
            _curPos++;
            return _curRow;
        }
    }

    ///////
    // generic, resizable native arrays 

    /**
     * Base class for generic, resizable array of various value types. We 
     * use this custom class hierarchy instead of Trove or other libraries 
     * in order to avoid unnecessary dependencies.
     */
    private abstract static class Array<T> implements Writable {
        protected int _size = 0;

        protected int newSize() {
            return (int) Math.max(_size * 2, 4);
        }

        public abstract T get(int index);

        public abstract void set(int index, T value);

        public abstract void set(int rl, int ru, Array value);

        public abstract void set(int rl, int ru, Array value, int rlSrc);

        public abstract void setNz(int rl, int ru, Array value);

        public abstract void append(String value);

        public abstract void append(T value);

        public abstract Array clone();

        public abstract Array slice(int rl, int ru);
    }

    private static class StringArray extends Array<String> {
        private String[] _data = null;

        public StringArray(String[] data) {
            _data = data;
            _size = _data.length;
        }

        public String get(int index) {
            return _data[index];
        }

        public void set(int index, String value) {
            _data[index] = value;
        }

        public void set(int rl, int ru, Array value) {
            set(rl, ru, value, 0);
        }

        public void set(int rl, int ru, Array value, int rlSrc) {
            System.arraycopy(((StringArray) value)._data, rlSrc, _data, rl, ru - rl + 1);
        }

        public void setNz(int rl, int ru, Array value) {
            String[] data2 = ((StringArray) value)._data;
            for (int i = rl; i < ru + 1; i++)
                if (data2[i] != null)
                    _data[i] = data2[i];
        }

        public void append(String value) {
            if (_data.length <= _size)
                _data = Arrays.copyOf(_data, newSize());
            _data[_size++] = value;
        }

        public void write(DataOutput out) throws IOException {
            for (int i = 0; i < _size; i++)
                out.writeUTF((_data[i] != null) ? _data[i] : "");
        }

        public void readFields(DataInput in) throws IOException {
            _size = _data.length;
            for (int i = 0; i < _size; i++) {
                String tmp = in.readUTF();
                _data[i] = (!tmp.isEmpty()) ? tmp : null;
            }
        }

        public Array clone() {
            return new StringArray(Arrays.copyOf(_data, _size));
        }

        public Array slice(int rl, int ru) {
            return new StringArray(Arrays.copyOfRange(_data, rl, ru + 1));
        }
    }

    private static class BooleanArray extends Array<Boolean> {
        private boolean[] _data = null;

        public BooleanArray(boolean[] data) {
            _data = data;
            _size = _data.length;
        }

        public Boolean get(int index) {
            return _data[index];
        }

        public void set(int index, Boolean value) {
            _data[index] = (value != null) ? value : false;
        }

        public void set(int rl, int ru, Array value) {
            set(rl, ru, value, 0);
        }

        public void set(int rl, int ru, Array value, int rlSrc) {
            System.arraycopy(((BooleanArray) value)._data, rlSrc, _data, rl, ru - rl + 1);
        }

        public void setNz(int rl, int ru, Array value) {
            boolean[] data2 = ((BooleanArray) value)._data;
            for (int i = rl; i < ru + 1; i++)
                if (data2[i])
                    _data[i] = data2[i];
        }

        public void append(String value) {
            append(Boolean.parseBoolean(value));
        }

        public void append(Boolean value) {
            if (_data.length <= _size)
                _data = Arrays.copyOf(_data, newSize());
            _data[_size++] = (value != null) ? value : false;
        }

        public void write(DataOutput out) throws IOException {
            for (int i = 0; i < _size; i++)
                out.writeBoolean(_data[i]);
        }

        public void readFields(DataInput in) throws IOException {
            _size = _data.length;
            for (int i = 0; i < _size; i++)
                _data[i] = in.readBoolean();
        }

        public Array clone() {
            return new BooleanArray(Arrays.copyOf(_data, _size));
        }

        public Array slice(int rl, int ru) {
            return new BooleanArray(Arrays.copyOfRange(_data, rl, ru + 1));
        }
    }

    private static class LongArray extends Array<Long> {
        private long[] _data = null;

        public LongArray(long[] data) {
            _data = data;
            _size = _data.length;
        }

        public Long get(int index) {
            return _data[index];
        }

        public void set(int index, Long value) {
            _data[index] = (value != null) ? value : 0L;
        }

        public void set(int rl, int ru, Array value) {
            set(rl, ru, value, 0);
        }

        public void set(int rl, int ru, Array value, int rlSrc) {
            System.arraycopy(((LongArray) value)._data, rlSrc, _data, rl, ru - rl + 1);
        }

        public void setNz(int rl, int ru, Array value) {
            long[] data2 = ((LongArray) value)._data;
            for (int i = rl; i < ru + 1; i++)
                if (data2[i] != 0)
                    _data[i] = data2[i];
        }

        public void append(String value) {
            append((value != null) ? Long.parseLong(value) : null);
        }

        public void append(Long value) {
            if (_data.length <= _size)
                _data = Arrays.copyOf(_data, newSize());
            _data[_size++] = (value != null) ? value : 0L;
        }

        public void write(DataOutput out) throws IOException {
            for (int i = 0; i < _size; i++)
                out.writeLong(_data[i]);
        }

        public void readFields(DataInput in) throws IOException {
            _size = _data.length;
            for (int i = 0; i < _size; i++)
                _data[i] = in.readLong();
        }

        public Array clone() {
            return new LongArray(Arrays.copyOf(_data, _size));
        }

        public Array slice(int rl, int ru) {
            return new LongArray(Arrays.copyOfRange(_data, rl, ru + 1));
        }
    }

    private static class DoubleArray extends Array<Double> {
        private double[] _data = null;

        public DoubleArray(double[] data) {
            _data = data;
            _size = _data.length;
        }

        public Double get(int index) {
            return _data[index];
        }

        public void set(int index, Double value) {
            _data[index] = (value != null) ? value : 0d;
        }

        public void set(int rl, int ru, Array value) {
            set(rl, ru, value, 0);
        }

        public void set(int rl, int ru, Array value, int rlSrc) {
            System.arraycopy(((DoubleArray) value)._data, rlSrc, _data, rl, ru - rl + 1);
        }

        public void setNz(int rl, int ru, Array value) {
            double[] data2 = ((DoubleArray) value)._data;
            for (int i = rl; i < ru + 1; i++)
                if (data2[i] != 0)
                    _data[i] = data2[i];
        }

        public void append(String value) {
            append((value != null) ? Double.parseDouble(value) : null);
        }

        public void append(Double value) {
            if (_data.length <= _size)
                _data = Arrays.copyOf(_data, newSize());
            _data[_size++] = (value != null) ? value : 0d;
        }

        public void write(DataOutput out) throws IOException {
            for (int i = 0; i < _size; i++)
                out.writeDouble(_data[i]);
        }

        public void readFields(DataInput in) throws IOException {
            _size = _data.length;
            for (int i = 0; i < _size; i++)
                _data[i] = in.readDouble();
        }

        public Array clone() {
            return new DoubleArray(Arrays.copyOf(_data, _size));
        }

        public Array slice(int rl, int ru) {
            return new DoubleArray(Arrays.copyOfRange(_data, rl, ru + 1));
        }
    }

    public static class ColumnMetadata implements Serializable {
        private static final long serialVersionUID = -90094082422100311L;

        private long _ndistinct = 0;
        private String _mvValue = null;

        public ColumnMetadata(long ndistinct) {
            _ndistinct = ndistinct;
        }

        public ColumnMetadata(long ndistinct, String mvval) {
            _ndistinct = ndistinct;
            _mvValue = mvval;
        }

        public ColumnMetadata(ColumnMetadata that) {
            _ndistinct = that._ndistinct;
            _mvValue = that._mvValue;
        }

        public long getNumDistinct() {
            return _ndistinct;
        }

        public void setNumDistinct(long ndistinct) {
            _ndistinct = ndistinct;
        }

        public String getMvValue() {
            return _mvValue;
        }

        public void setMvValue(String mvVal) {
            _mvValue = mvVal;
        }
    }
}