Java tutorial
/** * (C) Copyright IBM Corp. 2010, 2015 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package com.ibm.bi.dml.hops; import java.util.ArrayList; import java.util.HashMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.ibm.bi.dml.api.DMLScript; import com.ibm.bi.dml.api.DMLScript.RUNTIME_PLATFORM; import com.ibm.bi.dml.conf.ConfigurationManager; import com.ibm.bi.dml.conf.DMLConfig; import com.ibm.bi.dml.lops.CSVReBlock; import com.ibm.bi.dml.lops.Checkpoint; import com.ibm.bi.dml.lops.Data; import com.ibm.bi.dml.lops.Lop; import com.ibm.bi.dml.lops.LopsException; import com.ibm.bi.dml.lops.ReBlock; import com.ibm.bi.dml.lops.UnaryCP; import com.ibm.bi.dml.lops.LopProperties.ExecType; import com.ibm.bi.dml.parser.Expression.DataType; import com.ibm.bi.dml.parser.Expression.ValueType; import com.ibm.bi.dml.runtime.controlprogram.LocalVariableMap; import com.ibm.bi.dml.runtime.controlprogram.context.SparkExecutionContext; import com.ibm.bi.dml.runtime.controlprogram.parfor.ProgramConverter; import com.ibm.bi.dml.runtime.controlprogram.parfor.util.IDSequence; import com.ibm.bi.dml.runtime.matrix.MatrixCharacteristics; import com.ibm.bi.dml.runtime.matrix.data.MatrixBlock; import com.ibm.bi.dml.runtime.util.UtilFunctions; public abstract class Hop { protected static final Log LOG = LogFactory.getLog(Hop.class.getName()); public static final long CPThreshold = 2000; protected static final boolean BREAKONSCALARS = false; protected static final boolean SPLITLARGEMATRIXMULT = true; public enum VisitStatus { DONE, VISITING, NOTVISITED, } /** * Optional hop interface, to be implemented by multi-threaded hops. */ public interface MultiThreadedHop { public abstract void setMaxNumThreads(int k); public abstract int getMaxNumThreads(); } // static variable to assign an unique ID to every hop that is created private static IDSequence _seqHopID = new IDSequence(); protected long _ID; protected String _name; protected DataType _dataType; protected ValueType _valueType; protected VisitStatus _visited = VisitStatus.NOTVISITED; protected long _dim1 = -1; protected long _dim2 = -1; protected long _rows_in_block = -1; protected long _cols_in_block = -1; protected long _nnz = -1; protected ArrayList<Hop> _parent = new ArrayList<Hop>(); protected ArrayList<Hop> _input = new ArrayList<Hop>(); protected ExecType _etype = null; //currently used exec type protected ExecType _etypeForced = null; //exec type forced via platform or external optimizer // Estimated size for the output produced from this Hop protected double _outputMemEstimate = OptimizerUtils.INVALID_SIZE; // Estimated size for the entire operation represented by this Hop // It includes the memory required for all inputs as well as the output protected double _memEstimate = OptimizerUtils.INVALID_SIZE; protected double _processingMemEstimate = 0; protected double _spBroadcastMemEstimate = 0; // indicates if there are unknowns during compilation // (in that case re-complication ensures robustness and efficiency) protected boolean _requiresRecompile = false; // indicates if the output of this hop needs to be reblocked // (usually this happens on persistent reads dataops) protected boolean _requiresReblock = false; // indicates if the output of this hop needs to be checkpointed (cached) // (the default storage level for caching is not yet exposed here) protected boolean _requiresCheckpoint = false; // indicates if the output of this hops needs to contain materialized empty blocks // if those exists; otherwise only blocks w/ non-zero values are materialized protected boolean _outputEmptyBlocks = true; private Lop _lops = null; protected Hop() { //default constructor for clone } public Hop(String l, DataType dt, ValueType vt) { _ID = getNextHopID(); setName(l); setDataType(dt); setValueType(vt); } private static long getNextHopID() { return _seqHopID.getNextID(); } public long getHopID() { return _ID; } public ExecType getExecType() { return _etype; } public void resetExecType() { _etype = null; } /** * * @return */ public ExecType getForcedExecType() { return _etypeForced; } /** * * @param etype */ public void setForcedExecType(ExecType etype) { _etypeForced = etype; } /** * * @return */ public abstract boolean allowsAllExecTypes(); /** * Defines if this operation is transpose-safe, which means that * the result of op(input) is equivalent to op(t(input)). * Usually, this applies to aggregate operations with fixed output * dimension. Finally, this information is very useful in order to * safely optimize the plan for sparse vectors, which otherwise * would be (currently) always represented dense. * * * @return */ public boolean isTransposeSafe() { //by default: its conservatively define as unsafe return false; } /** * */ public void checkAndSetForcedPlatform() { if (DMLScript.rtplatform == RUNTIME_PLATFORM.SINGLE_NODE) _etypeForced = ExecType.CP; else if (DMLScript.rtplatform == RUNTIME_PLATFORM.HADOOP) _etypeForced = ExecType.MR; else if (DMLScript.rtplatform == RUNTIME_PLATFORM.SPARK) _etypeForced = ExecType.SPARK; } /** * */ public void checkAndSetInvalidCPDimsAndSize() { if (_etype == ExecType.CP) { boolean invalid = false; //Step 1: check dimensions of output and all inputs (INTEGER) invalid |= !OptimizerUtils.isValidCPDimensions(_dim1, _dim2); for (Hop in : getInput()) invalid |= !OptimizerUtils.isValidCPDimensions(in._dim1, in._dim2); //Step 2: check valid output and input sizes for cp (<16GB for DENSE) //(if the memory estimate is smaller than max_numcells we are guaranteed to have it in sparse representation) invalid |= !(OptimizerUtils.isValidCPMatrixSize(_dim1, _dim2, OptimizerUtils.getSparsity(_dim1, _dim2, _nnz)) || getOutputMemEstimate() < OptimizerUtils.MAX_NUMCELLS_CP_DENSE); for (Hop in : getInput()) invalid |= !(OptimizerUtils.isValidCPMatrixSize(in._dim1, in._dim2, OptimizerUtils.getSparsity(in._dim1, in._dim2, in._nnz)) || in.getOutputMemEstimate() < OptimizerUtils.MAX_NUMCELLS_CP_DENSE); //force exec type mr if necessary if (invalid) { if (DMLScript.rtplatform == RUNTIME_PLATFORM.HYBRID) _etype = ExecType.MR; else if (DMLScript.rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK) _etype = ExecType.SPARK; } } } public void setRequiresReblock(boolean flag) { _requiresReblock = flag; } public boolean hasMatrixInputWithDifferentBlocksizes() { for (Hop c : getInput()) { if (c.getDataType() == DataType.MATRIX && (getRowsInBlock() != c.getRowsInBlock() || getColsInBlock() != c.getColsInBlock())) { return true; } } return false; } public void setOutputBlocksizes(long brlen, long bclen) { setRowsInBlock(brlen); setColsInBlock(bclen); } public boolean requiresReblock() { return _requiresReblock; } public void setRequiresCheckpoint(boolean flag) { _requiresCheckpoint = flag; } public boolean requiresCheckpoint() { return _requiresCheckpoint; } /** * * @throws HopsException */ public void constructAndSetLopsDataFlowProperties() throws HopsException { //Step 1: construct reblock lop if required (output of hop) constructAndSetReblockLopIfRequired(); //Step 2: construct checkpoint lop if required (output of hop or reblock) constructAndSetCheckpointLopIfRequired(); } /** * * @throws HopsException */ private void constructAndSetReblockLopIfRequired() throws HopsException { //determine execution type ExecType et = ExecType.CP; if (DMLScript.rtplatform != RUNTIME_PLATFORM.SINGLE_NODE && !(getDataType() == DataType.SCALAR)) { et = OptimizerUtils.isSparkExecutionMode() ? ExecType.SPARK : ExecType.MR; } //add reblock lop to output if required if (_requiresReblock && et != ExecType.CP) { Lop input = getLops(); Lop reblock = null; try { if ((this instanceof DataOp // CSV && ((DataOp) this).getDataOpType() == DataOpTypes.PERSISTENTREAD && ((DataOp) this).getInputFormatType() == FileFormatTypes.CSV) || (this instanceof ParameterizedBuiltinOp && ((ParameterizedBuiltinOp) this).getOp() == ParamBuiltinOp.TRANSFORM)) { reblock = new CSVReBlock(input, getRowsInBlock(), getColsInBlock(), getDataType(), getValueType(), et); } else //TEXT / MM / BINARYBLOCK / BINARYCELL { reblock = new ReBlock(input, getRowsInBlock(), getColsInBlock(), getDataType(), getValueType(), _outputEmptyBlocks, et); } } catch (LopsException ex) { throw new HopsException(ex); } setOutputDimensions(reblock); setLineNumbers(reblock); setLops(reblock); } } /** * * @throws HopsException */ private void constructAndSetCheckpointLopIfRequired() throws HopsException { //determine execution type ExecType et = ExecType.CP; if (OptimizerUtils.isSparkExecutionMode() && getDataType() != DataType.SCALAR) { //conditional checkpoint based on memory estimate in order to //(1) avoid unnecessary persist and unpersist calls, and //(2) avoid unnecessary creation of spark context (incl executors) if (OptimizerUtils.isHybridExecutionMode() && _outputMemEstimate < OptimizerUtils.getLocalMemBudget() || _etypeForced == ExecType.CP) { et = ExecType.CP; } else //default case { et = ExecType.SPARK; } } //add checkpoint lop to output if required if (_requiresCheckpoint && et != ExecType.CP) { try { //investigate need for serialized storage of large sparse matrices //(compile- instead of runtime-level for better debugging) boolean serializedStorage = false; if (dimsKnown(true)) { double matrixPSize = OptimizerUtils.estimatePartitionedSizeExactSparsity(_dim1, _dim2, _rows_in_block, _cols_in_block, _nnz); double dataCache = SparkExecutionContext.getConfiguredTotalDataMemory(true); serializedStorage = (MatrixBlock.evalSparseFormatInMemory(_dim1, _dim2, _nnz) && matrixPSize > dataCache); //sparse in-memory does not fit in agg mem } else { setRequiresRecompile(); } //construct checkpoint w/ right storage level Lop input = getLops(); Lop chkpoint = new Checkpoint(input, getDataType(), getValueType(), serializedStorage ? Checkpoint.getSerializeStorageLevelString() : Checkpoint.getDefaultStorageLevelString()); setOutputDimensions(chkpoint); setLineNumbers(chkpoint); setLops(chkpoint); } catch (LopsException ex) { throw new HopsException(ex); } } } /** * * @param inputPos * @return * @throws HopsException * @throws LopsException */ public static Lop createOffsetLop(Hop hop, boolean repCols) throws HopsException, LopsException { Lop offset = null; if (OptimizerUtils.ALLOW_DYN_RECOMPILATION && hop.dimsKnown()) { // If dynamic recompilation is enabled and dims are known, we can replace the ncol with // a literal in order to increase the piggybacking potential. This is safe because append // is always marked for recompilation and hence, we have propagated the exact dimensions. offset = Data.createLiteralLop(ValueType.INT, String.valueOf(repCols ? hop.getDim2() : hop.getDim1())); } else { offset = new UnaryCP(hop.constructLops(), repCols ? UnaryCP.OperationTypes.NCOL : UnaryCP.OperationTypes.NROW, DataType.SCALAR, ValueType.INT); } offset.getOutputParameters().setDimensions(0, 0, 0, 0, -1); offset.setAllPositions(hop.getBeginLine(), hop.getBeginColumn(), hop.getEndLine(), hop.getEndColumn()); return offset; } public void setOutputEmptyBlocks(boolean flag) { _outputEmptyBlocks = flag; } public boolean isOutputEmptyBlocks() { return _outputEmptyBlocks; } /** * Returns the memory estimate for the output produced from this Hop. * It must be invoked only within Hops. From outside Hops, one must * only use getMemEstimate(), which gives memory required to store * all inputs and the output. * * @return */ protected double getOutputSize() { return _outputMemEstimate; } /** * * @return */ protected double getInputSize() { double sum = 0; int len = _input.size(); for (int i = 0; i < len; i++) //for all inputs { Hop hi = _input.get(i); double hmout = hi.getOutputMemEstimate(); if (hmout > 1024 * 1024) {//for relevant sizes //check if already included in estimate (if an input is used //multiple times it is still only required once in memory) //(not that this check benefits from common subexpression elimination) boolean flag = false; for (int j = 0; j < i; j++) flag |= (hi == _input.get(j)); hmout = flag ? 0 : hmout; } sum += hmout; } //for(Hop h : _input ) { // sum += h._outputMemEstimate; //} return sum; } /** * * @return */ protected double getInputOutputSize() { double sum = 0; sum += _outputMemEstimate; sum += _processingMemEstimate; sum += getInputSize(); return sum; } /** * * @param pos * @return */ protected double getInputSize(int pos) { double ret = 0; if (_input.size() > pos) ret = _input.get(pos)._outputMemEstimate; return ret; } protected double getIntermediateSize() { return _processingMemEstimate; } /** * NOTES: * * Purpose: Whenever the output dimensions / sparsity of a hop are unknown, this hop * should store its worst-case output statistics (if known) in that table. Subsequent * hops can then * * Invocation: Intended to be called for ALL root nodes of one Hops DAG with the same * (initially empty) memo table. * * @param memo * @return */ public double getMemEstimate() { if (OptimizerUtils.isMemoryBasedOptLevel()) { if (!isMemEstimated()) { //LOG.warn("Nonexisting memory estimate - reestimating w/o memo table."); computeMemEstimate(new MemoTable()); } return _memEstimate; } else { return OptimizerUtils.INVALID_SIZE; } } /** * Returns memory estimate in bytes * * @param mem */ public void setMemEstimate(double mem) { _memEstimate = mem; } public void clearMemEstimate() { _memEstimate = OptimizerUtils.INVALID_SIZE; } public boolean isMemEstimated() { return (_memEstimate != OptimizerUtils.INVALID_SIZE); } //wrappers for meaningful public names to memory estimates. public double getInputMemEstimate() { return getInputSize(); } public double getOutputMemEstimate() { return getOutputSize(); } public double getIntermediateMemEstimate() { return getIntermediateSize(); } public double getSpBroadcastSize() { return _spBroadcastMemEstimate; } /** * Computes the estimate of memory required to store the input/output of this hop in memory. * This is the default implementation (orchestration of hop-specific implementation) * that should suffice for most hops. If a hop requires more control, this method should * be overwritten with awareness of (1) output estimates, and (2) propagation of worst-case * matrix characteristics (dimensions, sparsity). * * TODO remove memo table and, on constructor refresh, inference in refresh, single compute mem, * maybe general computeMemEstimate, flags to indicate if estimate or not. * * @return computed estimate */ public void computeMemEstimate(MemoTable memo) { long[] wstats = null; //////// //Step 1) Compute hop output memory estimate (incl size inference) switch (getDataType()) { case SCALAR: { //memory estimate always known if (getValueType() == ValueType.DOUBLE) //default case _outputMemEstimate = OptimizerUtils.DOUBLE_SIZE; else //literalops, dataops _outputMemEstimate = computeOutputMemEstimate(_dim1, _dim2, _nnz); break; } case MATRIX: { //1a) mem estimate based on exactly known dimensions and sparsity if (dimsKnown(true)) { //nnz always exactly known (see dimsKnown(true)) _outputMemEstimate = computeOutputMemEstimate(_dim1, _dim2, _nnz); } //1b) infer output statistics and mem estimate based on these statistics else if (memo.hasInputStatistics(this)) { //infer the output stats wstats = inferOutputCharacteristics(memo); if (wstats != null) { //use worst case characteristics to estimate mem long lnnz = ((wstats[2] >= 0) ? wstats[2] : wstats[0] * wstats[1]); _outputMemEstimate = computeOutputMemEstimate(wstats[0], wstats[1], lnnz); //propagate worst-case estimate memo.memoizeStatistics(getHopID(), wstats[0], wstats[1], wstats[2]); } else if (dimsKnown()) { //nnz unknown, estimate mem as dense long lnnz = _dim1 * _dim2; _outputMemEstimate = computeOutputMemEstimate(_dim1, _dim2, lnnz); } else { //unknown output size _outputMemEstimate = OptimizerUtils.DEFAULT_SIZE; } } //1c) mem estimate based on exactly known dimensions and unknown sparsity //(required e.g., for datagenops w/o any input statistics) else if (dimsKnown()) { //nnz unknown, estimate mem as dense long lnnz = _dim1 * _dim2; _outputMemEstimate = computeOutputMemEstimate(_dim1, _dim2, lnnz); } //1d) fallback: unknown output size else { _outputMemEstimate = OptimizerUtils.DEFAULT_SIZE; } break; } case OBJECT: case UNKNOWN: case FRAME: { //memory estimate always unknown _outputMemEstimate = OptimizerUtils.DEFAULT_SIZE; break; } } //////// //Step 2) Compute hop intermediate memory estimate //note: ensure consistency w/ step 1 (for simplified debugging) if (dimsKnown(true)) { //incl scalar output //nnz always exactly known (see dimsKnown(true)) _processingMemEstimate = computeIntermediateMemEstimate(_dim1, _dim2, _nnz); } else if (wstats != null) { //use worst case characteristics to estimate mem long lnnz = ((wstats[2] >= 0) ? wstats[2] : wstats[0] * wstats[1]); _processingMemEstimate = computeIntermediateMemEstimate(wstats[0], wstats[1], lnnz); } else if (dimsKnown()) { //nnz unknown, estimate mem as dense long lnnz = _dim1 * _dim2; _processingMemEstimate = computeIntermediateMemEstimate(_dim1, _dim2, lnnz); } //////// //Step 3) Compute final hop memory estimate //final estimate (sum of inputs/intermediates/output) _memEstimate = getInputOutputSize(); } /** * Computes the hop-specific output memory estimate in bytes. Should be 0 if not * applicable. * * @param dim1 * @param dim2 * @param nnz * @return */ protected abstract double computeOutputMemEstimate(long dim1, long dim2, long nnz); /** * Computes the hop-specific intermediate memory estimate in bytes. Should be 0 if not * applicable. * * @param dim1 * @param dim2 * @param nnz * @return */ protected abstract double computeIntermediateMemEstimate(long dim1, long dim2, long nnz); /** * Computes the output matrix characteristics (rows, cols, nnz) based on worst-case output * and/or input estimates. Should return null if dimensions are unknown. * * @param memo * @return */ protected abstract long[] inferOutputCharacteristics(MemoTable memo); /** * This function is used only for sanity check. * Returns true if estimates for all the hops in the DAG rooted at the current * hop are computed. Returns false if any of the hops have INVALID estimate. * * @return */ public boolean checkEstimates() { boolean childStatus = true; for (Hop h : this.getInput()) childStatus = childStatus && h.checkEstimates(); return childStatus && (_memEstimate != OptimizerUtils.INVALID_SIZE); } /** * Recursively computes memory estimates for all the Hops in the DAG rooted at the * current hop pointed by <code>this</code>. * */ public void refreshMemEstimates(MemoTable memo) { if (getVisited() == VisitStatus.DONE) return; for (Hop h : this.getInput()) h.refreshMemEstimates(memo); this.computeMemEstimate(memo); this.setVisited(VisitStatus.DONE); } /** * This method determines the execution type (CP, MR) based ONLY on the * estimated memory footprint required for this operation, which includes * memory for all inputs and the output represented by this Hop. * * It is used when <code>OptimizationType = MEMORY_BASED</code>. * This optimization schedules an operation to CP whenever inputs+output * fit in memory -- note that this decision MAY NOT be optimal in terms of * execution time. * * @return */ protected ExecType findExecTypeByMemEstimate() { ExecType et = null; char c = ' '; if (getMemEstimate() < OptimizerUtils.getLocalMemBudget()) { et = ExecType.CP; } else { if (DMLScript.rtplatform == DMLScript.RUNTIME_PLATFORM.HYBRID) et = ExecType.MR; else if (DMLScript.rtplatform == DMLScript.RUNTIME_PLATFORM.HYBRID_SPARK) et = ExecType.SPARK; c = '*'; } if (LOG.isDebugEnabled()) { String s = String.format(" %c %-5s %-8s (%s,%s) %s", c, getHopID(), getOpString(), OptimizerUtils.toMB(_outputMemEstimate), OptimizerUtils.toMB(_memEstimate), et); //System.out.println(s); LOG.debug(s); } return et; } public ArrayList<Hop> getParent() { return _parent; } public ArrayList<Hop> getInput() { return _input; } /** * Create bidirectional links * * @param h */ public void addInput(Hop h) { _input.add(h); h._parent.add(this); } public long getRowsInBlock() { return _rows_in_block; } public void setRowsInBlock(long rowsInBlock) { _rows_in_block = rowsInBlock; } public long getColsInBlock() { return _cols_in_block; } public void setColsInBlock(long colsInBlock) { _cols_in_block = colsInBlock; } public void setNnz(long nnz) { _nnz = nnz; } public long getNnz() { return _nnz; } public abstract Lop constructLops() throws HopsException, LopsException; protected abstract ExecType optFindExecType() throws HopsException; public abstract String getOpString(); protected boolean isVector() { return (dimsKnown() && (_dim1 == 1 || _dim2 == 1)); } protected boolean areDimsBelowThreshold() { return (dimsKnown() && _dim1 <= Hop.CPThreshold && _dim2 <= Hop.CPThreshold); } public boolean dimsKnown() { return (_dataType == DataType.SCALAR || (_dataType == DataType.MATRIX && _dim1 > 0 && _dim2 > 0)); } public boolean dimsKnown(boolean includeNnz) { return (_dataType == DataType.SCALAR || (_dataType == DataType.MATRIX && _dim1 > 0 && _dim2 > 0 && ((includeNnz) ? _nnz >= 0 : true))); } public boolean dimsKnownAny() { return (_dataType == DataType.SCALAR || (_dataType == DataType.MATRIX && (_dim1 > 0 || _dim2 > 0))); } public static void resetVisitStatus(ArrayList<Hop> hops) { if (hops != null) for (Hop hopRoot : hops) hopRoot.resetVisitStatus(); } public void resetVisitStatus() { if (getVisited() == Hop.VisitStatus.NOTVISITED) return; for (Hop h : this.getInput()) h.resetVisitStatus(); setVisited(Hop.VisitStatus.NOTVISITED); } public static void resetRecompilationFlag(ArrayList<Hop> hops, ExecType et) { resetVisitStatus(hops); for (Hop hopRoot : hops) hopRoot.resetRecompilationFlag(et); } public static void resetRecompilationFlag(Hop hops, ExecType et) { hops.resetVisitStatus(); hops.resetRecompilationFlag(et); } private void resetRecompilationFlag(ExecType et) { if (getVisited() == VisitStatus.DONE) return; //process child hops for (Hop h : getInput()) h.resetRecompilationFlag(et); //reset recompile flag if (et == null || getExecType() == et || getExecType() == null) _requiresRecompile = false; this.setVisited(VisitStatus.DONE); } /** * Test and debugging only. * * @param h * @throws HopsException */ public void checkParentChildPointers() throws HopsException { if (getVisited() == VisitStatus.DONE) return; for (Hop in : getInput()) { if (!in.getParent().contains(this)) throw new HopsException("Parent-Child pointers incorrect."); in.checkParentChildPointers(); } setVisited(VisitStatus.DONE); } public void printMe() throws HopsException { if (LOG.isDebugEnabled()) { StringBuilder s = new StringBuilder(""); s.append(getClass().getSimpleName() + " " + getHopID() + "\n"); s.append(" Label: " + getName() + "; DataType: " + _dataType + "; ValueType: " + _valueType + "\n"); s.append(" Parent: "); for (Hop h : getParent()) { s.append(h.hashCode() + "; "); } ; s.append("\n Input: "); for (Hop h : getInput()) { s.append(h.getHopID() + "; "); } s.append("\n dims [" + _dim1 + "," + _dim2 + "] blk [" + _rows_in_block + "," + _cols_in_block + "] nnz " + _nnz); s.append(" MemEstimate = Out " + (_outputMemEstimate / 1024 / 1024) + " MB, In&Out " + (_memEstimate / 1024 / 1024) + " MB"); LOG.debug(s.toString()); } } public long getDim1() { return _dim1; } public void setDim1(long dim1) { _dim1 = dim1; } public long getDim2() { return _dim2; } public void setDim2(long dim2) { _dim2 = dim2; } protected void setOutputDimensions(Lop lop) throws HopsException { lop.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz()); } public Lop getLops() { return _lops; } public void setLops(Lop lops) { _lops = lops; } public VisitStatus getVisited() { return _visited; } public DataType getDataType() { return _dataType; } public void setDataType(DataType dt) { _dataType = dt; } public void setVisited(VisitStatus visited) { _visited = visited; } public void setName(String _name) { this._name = _name; } public String getName() { return _name; } public ValueType getValueType() { return _valueType; } public void setValueType(ValueType vt) { _valueType = vt; } public enum OpOp1 { NOT, ABS, SIN, COS, TAN, ASIN, ACOS, ATAN, SQRT, LOG, EXP, CAST_AS_SCALAR, CAST_AS_MATRIX, CAST_AS_DOUBLE, CAST_AS_INT, CAST_AS_BOOLEAN, PRINT, EIGEN, NROW, NCOL, LENGTH, ROUND, IQM, STOP, CEIL, FLOOR, MEDIAN, INVERSE, //cumulative sums, products, extreme values CUMSUM, CUMPROD, CUMMIN, CUMMAX, //fused ML-specific operators for performance SPROP, //sample proportion: P * (1 - P) SIGMOID, //sigmoid function: 1 / (1 + exp(-X)) SELP, //select positive: X * (X>0) } // Operations that require two operands public enum OpOp2 { PLUS, MINUS, MULT, DIV, MODULUS, INTDIV, LESS, LESSEQUAL, GREATER, GREATEREQUAL, EQUAL, NOTEQUAL, MIN, MAX, AND, OR, LOG, POW, PRINT, CONCAT, QUANTILE, INTERQUANTILE, IQM, CENTRALMOMENT, COVARIANCE, CBIND, RBIND, SEQINCR, SOLVE, MEDIAN, INVALID, //fused ML-specific operators for performance MINUS_NZ, //sparse-safe minus: X-(mean*ppred(X,0,!=)) LOG_NZ, //sparse-safe log; ppred(X,0,"!=")*log(X,0.5) MINUS1_MULT, //1-X*Y }; // Operations that require 3 operands public enum OpOp3 { QUANTILE, INTERQUANTILE, CTABLE, CENTRALMOMENT, COVARIANCE, INVALID }; // Operations that require 4 operands public enum OpOp4 { WSLOSS, //weighted sloss mm WSIGMOID, //weighted sigmoid mm WDIVMM, //weighted divide mm WCEMM, //weighted cross entropy mm INVALID }; public enum AggOp { SUM, SUM_SQ, MIN, MAX, TRACE, PROD, MEAN, MAXINDEX, MININDEX }; public enum ReOrgOp { TRANSPOSE, DIAG, RESHAPE, SORT //Note: Diag types are invalid because for unknown sizes this would //create incorrect plans (now we try to infer it for memory estimates //and rewrites but the final choice is made during runtime) //DIAG_V2M, DIAG_M2V, }; public enum DataGenMethod { RAND, SEQ, SINIT, SAMPLE, INVALID }; public enum ParamBuiltinOp { INVALID, CDF, INVCDF, GROUPEDAGG, RMEMPTY, REPLACE, REXPAND, TRANSFORM }; /** * Functions that are built in, but whose execution takes place in an * external library */ public enum ExtBuiltInOp { EIGEN, CHOLESKY }; public enum FileFormatTypes { TEXT, BINARY, MM, CSV }; public enum DataOpTypes { PERSISTENTREAD, PERSISTENTWRITE, TRANSIENTREAD, TRANSIENTWRITE, FUNCTIONOUTPUT }; public enum Direction { RowCol, Row, Col }; protected static final HashMap<DataOpTypes, com.ibm.bi.dml.lops.Data.OperationTypes> HopsData2Lops; static { HopsData2Lops = new HashMap<Hop.DataOpTypes, com.ibm.bi.dml.lops.Data.OperationTypes>(); HopsData2Lops.put(DataOpTypes.PERSISTENTREAD, com.ibm.bi.dml.lops.Data.OperationTypes.READ); HopsData2Lops.put(DataOpTypes.PERSISTENTWRITE, com.ibm.bi.dml.lops.Data.OperationTypes.WRITE); HopsData2Lops.put(DataOpTypes.TRANSIENTWRITE, com.ibm.bi.dml.lops.Data.OperationTypes.WRITE); HopsData2Lops.put(DataOpTypes.TRANSIENTREAD, com.ibm.bi.dml.lops.Data.OperationTypes.READ); } protected static final HashMap<Hop.AggOp, com.ibm.bi.dml.lops.Aggregate.OperationTypes> HopsAgg2Lops; static { HopsAgg2Lops = new HashMap<Hop.AggOp, com.ibm.bi.dml.lops.Aggregate.OperationTypes>(); HopsAgg2Lops.put(AggOp.SUM, com.ibm.bi.dml.lops.Aggregate.OperationTypes.KahanSum); HopsAgg2Lops.put(AggOp.SUM_SQ, com.ibm.bi.dml.lops.Aggregate.OperationTypes.KahanSumSq); HopsAgg2Lops.put(AggOp.TRACE, com.ibm.bi.dml.lops.Aggregate.OperationTypes.KahanTrace); HopsAgg2Lops.put(AggOp.MIN, com.ibm.bi.dml.lops.Aggregate.OperationTypes.Min); HopsAgg2Lops.put(AggOp.MAX, com.ibm.bi.dml.lops.Aggregate.OperationTypes.Max); HopsAgg2Lops.put(AggOp.MAXINDEX, com.ibm.bi.dml.lops.Aggregate.OperationTypes.MaxIndex); HopsAgg2Lops.put(AggOp.MININDEX, com.ibm.bi.dml.lops.Aggregate.OperationTypes.MinIndex); HopsAgg2Lops.put(AggOp.PROD, com.ibm.bi.dml.lops.Aggregate.OperationTypes.Product); HopsAgg2Lops.put(AggOp.MEAN, com.ibm.bi.dml.lops.Aggregate.OperationTypes.Mean); } protected static final HashMap<ReOrgOp, com.ibm.bi.dml.lops.Transform.OperationTypes> HopsTransf2Lops; static { HopsTransf2Lops = new HashMap<ReOrgOp, com.ibm.bi.dml.lops.Transform.OperationTypes>(); HopsTransf2Lops.put(ReOrgOp.TRANSPOSE, com.ibm.bi.dml.lops.Transform.OperationTypes.Transpose); HopsTransf2Lops.put(ReOrgOp.DIAG, com.ibm.bi.dml.lops.Transform.OperationTypes.Diag); HopsTransf2Lops.put(ReOrgOp.RESHAPE, com.ibm.bi.dml.lops.Transform.OperationTypes.Reshape); HopsTransf2Lops.put(ReOrgOp.SORT, com.ibm.bi.dml.lops.Transform.OperationTypes.Sort); } protected static final HashMap<Hop.Direction, com.ibm.bi.dml.lops.PartialAggregate.DirectionTypes> HopsDirection2Lops; static { HopsDirection2Lops = new HashMap<Hop.Direction, com.ibm.bi.dml.lops.PartialAggregate.DirectionTypes>(); HopsDirection2Lops.put(Direction.RowCol, com.ibm.bi.dml.lops.PartialAggregate.DirectionTypes.RowCol); HopsDirection2Lops.put(Direction.Col, com.ibm.bi.dml.lops.PartialAggregate.DirectionTypes.Col); HopsDirection2Lops.put(Direction.Row, com.ibm.bi.dml.lops.PartialAggregate.DirectionTypes.Row); } protected static final HashMap<Hop.OpOp2, com.ibm.bi.dml.lops.Binary.OperationTypes> HopsOpOp2LopsB; static { HopsOpOp2LopsB = new HashMap<Hop.OpOp2, com.ibm.bi.dml.lops.Binary.OperationTypes>(); HopsOpOp2LopsB.put(OpOp2.PLUS, com.ibm.bi.dml.lops.Binary.OperationTypes.ADD); HopsOpOp2LopsB.put(OpOp2.MINUS, com.ibm.bi.dml.lops.Binary.OperationTypes.SUBTRACT); HopsOpOp2LopsB.put(OpOp2.MULT, com.ibm.bi.dml.lops.Binary.OperationTypes.MULTIPLY); HopsOpOp2LopsB.put(OpOp2.DIV, com.ibm.bi.dml.lops.Binary.OperationTypes.DIVIDE); HopsOpOp2LopsB.put(OpOp2.MODULUS, com.ibm.bi.dml.lops.Binary.OperationTypes.MODULUS); HopsOpOp2LopsB.put(OpOp2.INTDIV, com.ibm.bi.dml.lops.Binary.OperationTypes.INTDIV); HopsOpOp2LopsB.put(OpOp2.MINUS1_MULT, com.ibm.bi.dml.lops.Binary.OperationTypes.MINUS1_MULTIPLY); HopsOpOp2LopsB.put(OpOp2.LESS, com.ibm.bi.dml.lops.Binary.OperationTypes.LESS_THAN); HopsOpOp2LopsB.put(OpOp2.LESSEQUAL, com.ibm.bi.dml.lops.Binary.OperationTypes.LESS_THAN_OR_EQUALS); HopsOpOp2LopsB.put(OpOp2.GREATER, com.ibm.bi.dml.lops.Binary.OperationTypes.GREATER_THAN); HopsOpOp2LopsB.put(OpOp2.GREATEREQUAL, com.ibm.bi.dml.lops.Binary.OperationTypes.GREATER_THAN_OR_EQUALS); HopsOpOp2LopsB.put(OpOp2.EQUAL, com.ibm.bi.dml.lops.Binary.OperationTypes.EQUALS); HopsOpOp2LopsB.put(OpOp2.NOTEQUAL, com.ibm.bi.dml.lops.Binary.OperationTypes.NOT_EQUALS); HopsOpOp2LopsB.put(OpOp2.MIN, com.ibm.bi.dml.lops.Binary.OperationTypes.MIN); HopsOpOp2LopsB.put(OpOp2.MAX, com.ibm.bi.dml.lops.Binary.OperationTypes.MAX); HopsOpOp2LopsB.put(OpOp2.AND, com.ibm.bi.dml.lops.Binary.OperationTypes.OR); HopsOpOp2LopsB.put(OpOp2.OR, com.ibm.bi.dml.lops.Binary.OperationTypes.AND); HopsOpOp2LopsB.put(OpOp2.SOLVE, com.ibm.bi.dml.lops.Binary.OperationTypes.SOLVE); HopsOpOp2LopsB.put(OpOp2.POW, com.ibm.bi.dml.lops.Binary.OperationTypes.POW); HopsOpOp2LopsB.put(OpOp2.LOG, com.ibm.bi.dml.lops.Binary.OperationTypes.NOTSUPPORTED); } protected static final HashMap<Hop.OpOp2, com.ibm.bi.dml.lops.BinaryScalar.OperationTypes> HopsOpOp2LopsBS; static { HopsOpOp2LopsBS = new HashMap<Hop.OpOp2, com.ibm.bi.dml.lops.BinaryScalar.OperationTypes>(); HopsOpOp2LopsBS.put(OpOp2.PLUS, com.ibm.bi.dml.lops.BinaryScalar.OperationTypes.ADD); HopsOpOp2LopsBS.put(OpOp2.MINUS, com.ibm.bi.dml.lops.BinaryScalar.OperationTypes.SUBTRACT); HopsOpOp2LopsBS.put(OpOp2.MULT, com.ibm.bi.dml.lops.BinaryScalar.OperationTypes.MULTIPLY); HopsOpOp2LopsBS.put(OpOp2.DIV, com.ibm.bi.dml.lops.BinaryScalar.OperationTypes.DIVIDE); HopsOpOp2LopsBS.put(OpOp2.MODULUS, com.ibm.bi.dml.lops.BinaryScalar.OperationTypes.MODULUS); HopsOpOp2LopsBS.put(OpOp2.INTDIV, com.ibm.bi.dml.lops.BinaryScalar.OperationTypes.INTDIV); HopsOpOp2LopsBS.put(OpOp2.LESS, com.ibm.bi.dml.lops.BinaryScalar.OperationTypes.LESS_THAN); HopsOpOp2LopsBS.put(OpOp2.LESSEQUAL, com.ibm.bi.dml.lops.BinaryScalar.OperationTypes.LESS_THAN_OR_EQUALS); HopsOpOp2LopsBS.put(OpOp2.GREATER, com.ibm.bi.dml.lops.BinaryScalar.OperationTypes.GREATER_THAN); HopsOpOp2LopsBS.put(OpOp2.GREATEREQUAL, com.ibm.bi.dml.lops.BinaryScalar.OperationTypes.GREATER_THAN_OR_EQUALS); HopsOpOp2LopsBS.put(OpOp2.EQUAL, com.ibm.bi.dml.lops.BinaryScalar.OperationTypes.EQUALS); HopsOpOp2LopsBS.put(OpOp2.NOTEQUAL, com.ibm.bi.dml.lops.BinaryScalar.OperationTypes.NOT_EQUALS); HopsOpOp2LopsBS.put(OpOp2.MIN, com.ibm.bi.dml.lops.BinaryScalar.OperationTypes.MIN); HopsOpOp2LopsBS.put(OpOp2.MAX, com.ibm.bi.dml.lops.BinaryScalar.OperationTypes.MAX); HopsOpOp2LopsBS.put(OpOp2.AND, com.ibm.bi.dml.lops.BinaryScalar.OperationTypes.AND); HopsOpOp2LopsBS.put(OpOp2.OR, com.ibm.bi.dml.lops.BinaryScalar.OperationTypes.OR); HopsOpOp2LopsBS.put(OpOp2.LOG, com.ibm.bi.dml.lops.BinaryScalar.OperationTypes.LOG); HopsOpOp2LopsBS.put(OpOp2.POW, com.ibm.bi.dml.lops.BinaryScalar.OperationTypes.POW); HopsOpOp2LopsBS.put(OpOp2.PRINT, com.ibm.bi.dml.lops.BinaryScalar.OperationTypes.PRINT); HopsOpOp2LopsBS.put(OpOp2.SEQINCR, com.ibm.bi.dml.lops.BinaryScalar.OperationTypes.SEQINCR); } protected static final HashMap<Hop.OpOp2, com.ibm.bi.dml.lops.Unary.OperationTypes> HopsOpOp2LopsU; static { HopsOpOp2LopsU = new HashMap<Hop.OpOp2, com.ibm.bi.dml.lops.Unary.OperationTypes>(); HopsOpOp2LopsU.put(OpOp2.PLUS, com.ibm.bi.dml.lops.Unary.OperationTypes.ADD); HopsOpOp2LopsU.put(OpOp2.MINUS, com.ibm.bi.dml.lops.Unary.OperationTypes.SUBTRACT); HopsOpOp2LopsU.put(OpOp2.MULT, com.ibm.bi.dml.lops.Unary.OperationTypes.MULTIPLY); HopsOpOp2LopsU.put(OpOp2.DIV, com.ibm.bi.dml.lops.Unary.OperationTypes.DIVIDE); HopsOpOp2LopsU.put(OpOp2.MODULUS, com.ibm.bi.dml.lops.Unary.OperationTypes.MODULUS); HopsOpOp2LopsU.put(OpOp2.INTDIV, com.ibm.bi.dml.lops.Unary.OperationTypes.INTDIV); HopsOpOp2LopsU.put(OpOp2.MINUS1_MULT, com.ibm.bi.dml.lops.Unary.OperationTypes.MINUS1_MULTIPLY); HopsOpOp2LopsU.put(OpOp2.LESSEQUAL, com.ibm.bi.dml.lops.Unary.OperationTypes.LESS_THAN_OR_EQUALS); HopsOpOp2LopsU.put(OpOp2.LESS, com.ibm.bi.dml.lops.Unary.OperationTypes.LESS_THAN); HopsOpOp2LopsU.put(OpOp2.GREATEREQUAL, com.ibm.bi.dml.lops.Unary.OperationTypes.GREATER_THAN_OR_EQUALS); HopsOpOp2LopsU.put(OpOp2.GREATER, com.ibm.bi.dml.lops.Unary.OperationTypes.GREATER_THAN); HopsOpOp2LopsU.put(OpOp2.EQUAL, com.ibm.bi.dml.lops.Unary.OperationTypes.EQUALS); HopsOpOp2LopsU.put(OpOp2.NOTEQUAL, com.ibm.bi.dml.lops.Unary.OperationTypes.NOT_EQUALS); HopsOpOp2LopsU.put(OpOp2.AND, com.ibm.bi.dml.lops.Unary.OperationTypes.NOTSUPPORTED); HopsOpOp2LopsU.put(OpOp2.OR, com.ibm.bi.dml.lops.Unary.OperationTypes.NOTSUPPORTED); HopsOpOp2LopsU.put(OpOp2.MAX, com.ibm.bi.dml.lops.Unary.OperationTypes.MAX); HopsOpOp2LopsU.put(OpOp2.MIN, com.ibm.bi.dml.lops.Unary.OperationTypes.MIN); HopsOpOp2LopsU.put(OpOp2.LOG, com.ibm.bi.dml.lops.Unary.OperationTypes.LOG); HopsOpOp2LopsU.put(OpOp2.POW, com.ibm.bi.dml.lops.Unary.OperationTypes.POW); HopsOpOp2LopsU.put(OpOp2.MINUS_NZ, com.ibm.bi.dml.lops.Unary.OperationTypes.SUBTRACT_NZ); HopsOpOp2LopsU.put(OpOp2.LOG_NZ, com.ibm.bi.dml.lops.Unary.OperationTypes.LOG_NZ); } protected static final HashMap<Hop.OpOp1, com.ibm.bi.dml.lops.Unary.OperationTypes> HopsOpOp1LopsU; static { HopsOpOp1LopsU = new HashMap<Hop.OpOp1, com.ibm.bi.dml.lops.Unary.OperationTypes>(); HopsOpOp1LopsU.put(OpOp1.NOT, com.ibm.bi.dml.lops.Unary.OperationTypes.NOT); HopsOpOp1LopsU.put(OpOp1.ABS, com.ibm.bi.dml.lops.Unary.OperationTypes.ABS); HopsOpOp1LopsU.put(OpOp1.SIN, com.ibm.bi.dml.lops.Unary.OperationTypes.SIN); HopsOpOp1LopsU.put(OpOp1.COS, com.ibm.bi.dml.lops.Unary.OperationTypes.COS); HopsOpOp1LopsU.put(OpOp1.TAN, com.ibm.bi.dml.lops.Unary.OperationTypes.TAN); HopsOpOp1LopsU.put(OpOp1.ASIN, com.ibm.bi.dml.lops.Unary.OperationTypes.ASIN); HopsOpOp1LopsU.put(OpOp1.ACOS, com.ibm.bi.dml.lops.Unary.OperationTypes.ACOS); HopsOpOp1LopsU.put(OpOp1.ATAN, com.ibm.bi.dml.lops.Unary.OperationTypes.ATAN); HopsOpOp1LopsU.put(OpOp1.SQRT, com.ibm.bi.dml.lops.Unary.OperationTypes.SQRT); HopsOpOp1LopsU.put(OpOp1.EXP, com.ibm.bi.dml.lops.Unary.OperationTypes.EXP); HopsOpOp1LopsU.put(OpOp1.LOG, com.ibm.bi.dml.lops.Unary.OperationTypes.LOG); HopsOpOp1LopsU.put(OpOp1.ROUND, com.ibm.bi.dml.lops.Unary.OperationTypes.ROUND); HopsOpOp1LopsU.put(OpOp1.CEIL, com.ibm.bi.dml.lops.Unary.OperationTypes.CEIL); HopsOpOp1LopsU.put(OpOp1.FLOOR, com.ibm.bi.dml.lops.Unary.OperationTypes.FLOOR); HopsOpOp1LopsU.put(OpOp1.CUMSUM, com.ibm.bi.dml.lops.Unary.OperationTypes.CUMSUM); HopsOpOp1LopsU.put(OpOp1.CUMPROD, com.ibm.bi.dml.lops.Unary.OperationTypes.CUMPROD); HopsOpOp1LopsU.put(OpOp1.CUMMIN, com.ibm.bi.dml.lops.Unary.OperationTypes.CUMMIN); HopsOpOp1LopsU.put(OpOp1.CUMMAX, com.ibm.bi.dml.lops.Unary.OperationTypes.CUMMAX); HopsOpOp1LopsU.put(OpOp1.INVERSE, com.ibm.bi.dml.lops.Unary.OperationTypes.INVERSE); HopsOpOp1LopsU.put(OpOp1.CAST_AS_SCALAR, com.ibm.bi.dml.lops.Unary.OperationTypes.NOTSUPPORTED); HopsOpOp1LopsU.put(OpOp1.CAST_AS_MATRIX, com.ibm.bi.dml.lops.Unary.OperationTypes.NOTSUPPORTED); HopsOpOp1LopsU.put(OpOp1.SPROP, com.ibm.bi.dml.lops.Unary.OperationTypes.SPROP); HopsOpOp1LopsU.put(OpOp1.SIGMOID, com.ibm.bi.dml.lops.Unary.OperationTypes.SIGMOID); HopsOpOp1LopsU.put(OpOp1.SELP, com.ibm.bi.dml.lops.Unary.OperationTypes.SELP); } protected static final HashMap<Hop.OpOp1, com.ibm.bi.dml.lops.UnaryCP.OperationTypes> HopsOpOp1LopsUS; static { HopsOpOp1LopsUS = new HashMap<Hop.OpOp1, com.ibm.bi.dml.lops.UnaryCP.OperationTypes>(); HopsOpOp1LopsUS.put(OpOp1.NOT, com.ibm.bi.dml.lops.UnaryCP.OperationTypes.NOT); HopsOpOp1LopsUS.put(OpOp1.ABS, com.ibm.bi.dml.lops.UnaryCP.OperationTypes.ABS); HopsOpOp1LopsUS.put(OpOp1.SIN, com.ibm.bi.dml.lops.UnaryCP.OperationTypes.SIN); HopsOpOp1LopsUS.put(OpOp1.COS, com.ibm.bi.dml.lops.UnaryCP.OperationTypes.COS); HopsOpOp1LopsUS.put(OpOp1.TAN, com.ibm.bi.dml.lops.UnaryCP.OperationTypes.TAN); HopsOpOp1LopsUS.put(OpOp1.ASIN, com.ibm.bi.dml.lops.UnaryCP.OperationTypes.ASIN); HopsOpOp1LopsUS.put(OpOp1.ACOS, com.ibm.bi.dml.lops.UnaryCP.OperationTypes.ACOS); HopsOpOp1LopsUS.put(OpOp1.ATAN, com.ibm.bi.dml.lops.UnaryCP.OperationTypes.ATAN); HopsOpOp1LopsUS.put(OpOp1.SQRT, com.ibm.bi.dml.lops.UnaryCP.OperationTypes.SQRT); HopsOpOp1LopsUS.put(OpOp1.EXP, com.ibm.bi.dml.lops.UnaryCP.OperationTypes.EXP); HopsOpOp1LopsUS.put(OpOp1.LOG, com.ibm.bi.dml.lops.UnaryCP.OperationTypes.LOG); HopsOpOp1LopsUS.put(OpOp1.CAST_AS_SCALAR, com.ibm.bi.dml.lops.UnaryCP.OperationTypes.CAST_AS_SCALAR); HopsOpOp1LopsUS.put(OpOp1.CAST_AS_MATRIX, com.ibm.bi.dml.lops.UnaryCP.OperationTypes.CAST_AS_MATRIX); HopsOpOp1LopsUS.put(OpOp1.CAST_AS_DOUBLE, com.ibm.bi.dml.lops.UnaryCP.OperationTypes.CAST_AS_DOUBLE); HopsOpOp1LopsUS.put(OpOp1.CAST_AS_INT, com.ibm.bi.dml.lops.UnaryCP.OperationTypes.CAST_AS_INT); HopsOpOp1LopsUS.put(OpOp1.CAST_AS_BOOLEAN, com.ibm.bi.dml.lops.UnaryCP.OperationTypes.CAST_AS_BOOLEAN); HopsOpOp1LopsUS.put(OpOp1.NROW, com.ibm.bi.dml.lops.UnaryCP.OperationTypes.NROW); HopsOpOp1LopsUS.put(OpOp1.NCOL, com.ibm.bi.dml.lops.UnaryCP.OperationTypes.NCOL); HopsOpOp1LopsUS.put(OpOp1.LENGTH, com.ibm.bi.dml.lops.UnaryCP.OperationTypes.LENGTH); HopsOpOp1LopsUS.put(OpOp1.PRINT, com.ibm.bi.dml.lops.UnaryCP.OperationTypes.PRINT); HopsOpOp1LopsUS.put(OpOp1.ROUND, com.ibm.bi.dml.lops.UnaryCP.OperationTypes.ROUND); HopsOpOp1LopsUS.put(OpOp1.CEIL, com.ibm.bi.dml.lops.UnaryCP.OperationTypes.CEIL); HopsOpOp1LopsUS.put(OpOp1.FLOOR, com.ibm.bi.dml.lops.UnaryCP.OperationTypes.FLOOR); HopsOpOp1LopsUS.put(OpOp1.STOP, com.ibm.bi.dml.lops.UnaryCP.OperationTypes.STOP); } protected static final HashMap<Hop.OpOp1, String> HopsOpOp12String; static { HopsOpOp12String = new HashMap<OpOp1, String>(); HopsOpOp12String.put(OpOp1.ABS, "abs"); HopsOpOp12String.put(OpOp1.CAST_AS_SCALAR, "castAsScalar"); HopsOpOp12String.put(OpOp1.COS, "cos"); HopsOpOp12String.put(OpOp1.EIGEN, "eigen"); HopsOpOp12String.put(OpOp1.EXP, "exp"); HopsOpOp12String.put(OpOp1.IQM, "iqm"); HopsOpOp12String.put(OpOp1.MEDIAN, "median"); HopsOpOp12String.put(OpOp1.LENGTH, "length"); HopsOpOp12String.put(OpOp1.LOG, "log"); HopsOpOp12String.put(OpOp1.NCOL, "ncol"); HopsOpOp12String.put(OpOp1.NOT, "!"); HopsOpOp12String.put(OpOp1.NROW, "nrow"); HopsOpOp12String.put(OpOp1.PRINT, "print"); HopsOpOp12String.put(OpOp1.ROUND, "round"); HopsOpOp12String.put(OpOp1.SIN, "sin"); HopsOpOp12String.put(OpOp1.SQRT, "sqrt"); HopsOpOp12String.put(OpOp1.TAN, "tan"); HopsOpOp12String.put(OpOp1.ASIN, "asin"); HopsOpOp12String.put(OpOp1.ACOS, "acos"); HopsOpOp12String.put(OpOp1.ATAN, "atan"); HopsOpOp12String.put(OpOp1.STOP, "stop"); HopsOpOp12String.put(OpOp1.INVERSE, "inv"); HopsOpOp12String.put(OpOp1.SPROP, "sprop"); HopsOpOp12String.put(OpOp1.SIGMOID, "sigmoid"); } protected static final HashMap<Hop.ParamBuiltinOp, com.ibm.bi.dml.lops.ParameterizedBuiltin.OperationTypes> HopsParameterizedBuiltinLops; static { HopsParameterizedBuiltinLops = new HashMap<Hop.ParamBuiltinOp, com.ibm.bi.dml.lops.ParameterizedBuiltin.OperationTypes>(); HopsParameterizedBuiltinLops.put(ParamBuiltinOp.CDF, com.ibm.bi.dml.lops.ParameterizedBuiltin.OperationTypes.CDF); HopsParameterizedBuiltinLops.put(ParamBuiltinOp.INVCDF, com.ibm.bi.dml.lops.ParameterizedBuiltin.OperationTypes.INVCDF); HopsParameterizedBuiltinLops.put(ParamBuiltinOp.RMEMPTY, com.ibm.bi.dml.lops.ParameterizedBuiltin.OperationTypes.RMEMPTY); HopsParameterizedBuiltinLops.put(ParamBuiltinOp.REPLACE, com.ibm.bi.dml.lops.ParameterizedBuiltin.OperationTypes.REPLACE); HopsParameterizedBuiltinLops.put(ParamBuiltinOp.REXPAND, com.ibm.bi.dml.lops.ParameterizedBuiltin.OperationTypes.REXPAND); HopsParameterizedBuiltinLops.put(ParamBuiltinOp.TRANSFORM, com.ibm.bi.dml.lops.ParameterizedBuiltin.OperationTypes.TRANSFORM); } protected static final HashMap<Hop.OpOp2, String> HopsOpOp2String; static { HopsOpOp2String = new HashMap<Hop.OpOp2, String>(); HopsOpOp2String.put(OpOp2.PLUS, "+"); HopsOpOp2String.put(OpOp2.MINUS, "-"); HopsOpOp2String.put(OpOp2.MINUS_NZ, "-nz"); HopsOpOp2String.put(OpOp2.MINUS1_MULT, "-1*"); HopsOpOp2String.put(OpOp2.MULT, "*"); HopsOpOp2String.put(OpOp2.DIV, "/"); HopsOpOp2String.put(OpOp2.MODULUS, "%%"); HopsOpOp2String.put(OpOp2.INTDIV, "%/%"); HopsOpOp2String.put(OpOp2.MIN, "min"); HopsOpOp2String.put(OpOp2.MAX, "max"); HopsOpOp2String.put(OpOp2.LESSEQUAL, "<="); HopsOpOp2String.put(OpOp2.LESS, "<"); HopsOpOp2String.put(OpOp2.GREATEREQUAL, ">="); HopsOpOp2String.put(OpOp2.GREATER, ">"); HopsOpOp2String.put(OpOp2.EQUAL, "="); HopsOpOp2String.put(OpOp2.NOTEQUAL, "!="); HopsOpOp2String.put(OpOp2.OR, "|"); HopsOpOp2String.put(OpOp2.AND, "&"); HopsOpOp2String.put(OpOp2.LOG, "log"); HopsOpOp2String.put(OpOp2.LOG_NZ, "log_nz"); HopsOpOp2String.put(OpOp2.POW, "^"); HopsOpOp2String.put(OpOp2.CONCAT, "concat"); HopsOpOp2String.put(OpOp2.INVALID, "?"); HopsOpOp2String.put(OpOp2.QUANTILE, "quantile"); HopsOpOp2String.put(OpOp2.INTERQUANTILE, "interquantile"); HopsOpOp2String.put(OpOp2.IQM, "IQM"); HopsOpOp2String.put(OpOp2.MEDIAN, "median"); HopsOpOp2String.put(OpOp2.CENTRALMOMENT, "cm"); HopsOpOp2String.put(OpOp2.COVARIANCE, "cov"); HopsOpOp2String.put(OpOp2.CBIND, "cbind"); HopsOpOp2String.put(OpOp2.RBIND, "rbind"); HopsOpOp2String.put(OpOp2.SOLVE, "solve"); HopsOpOp2String.put(OpOp2.SEQINCR, "seqincr"); } public static String getOpOp2String(OpOp2 op) { return HopsOpOp2String.get(op); } protected static final HashMap<Hop.OpOp3, String> HopsOpOp3String; static { HopsOpOp3String = new HashMap<Hop.OpOp3, String>(); HopsOpOp3String.put(OpOp3.QUANTILE, "quantile"); HopsOpOp3String.put(OpOp3.INTERQUANTILE, "interquantile"); HopsOpOp3String.put(OpOp3.CTABLE, "ctable"); HopsOpOp3String.put(OpOp3.CENTRALMOMENT, "cm"); HopsOpOp3String.put(OpOp3.COVARIANCE, "cov"); } protected static final HashMap<Hop.OpOp4, String> HopsOpOp4String; static { HopsOpOp4String = new HashMap<Hop.OpOp4, String>(); HopsOpOp4String.put(OpOp4.WSLOSS, "wsloss"); HopsOpOp4String.put(OpOp4.WSIGMOID, "wsigmoid"); } protected static final HashMap<Hop.Direction, String> HopsDirection2String; static { HopsDirection2String = new HashMap<Hop.Direction, String>(); HopsDirection2String.put(Direction.RowCol, "RC"); HopsDirection2String.put(Direction.Col, "C"); HopsDirection2String.put(Direction.Row, "R"); } protected static final HashMap<Hop.AggOp, String> HopsAgg2String; static { HopsAgg2String = new HashMap<Hop.AggOp, String>(); HopsAgg2String.put(AggOp.SUM, "+"); HopsAgg2String.put(AggOp.SUM_SQ, "sq+"); HopsAgg2String.put(AggOp.PROD, "*"); HopsAgg2String.put(AggOp.MIN, "min"); HopsAgg2String.put(AggOp.MAX, "max"); HopsAgg2String.put(AggOp.MAXINDEX, "maxindex"); HopsAgg2String.put(AggOp.MININDEX, "minindex"); HopsAgg2String.put(AggOp.TRACE, "trace"); HopsAgg2String.put(AggOp.MEAN, "mean"); } protected static final HashMap<Hop.ReOrgOp, String> HopsTransf2String; static { HopsTransf2String = new HashMap<ReOrgOp, String>(); HopsTransf2String.put(ReOrgOp.TRANSPOSE, "t"); HopsTransf2String.put(ReOrgOp.DIAG, "diag"); HopsTransf2String.put(ReOrgOp.RESHAPE, "rshape"); HopsTransf2String.put(ReOrgOp.SORT, "sort"); } protected static final HashMap<DataOpTypes, String> HopsData2String; static { HopsData2String = new HashMap<Hop.DataOpTypes, String>(); HopsData2String.put(DataOpTypes.PERSISTENTREAD, "PRead"); HopsData2String.put(DataOpTypes.PERSISTENTWRITE, "PWrite"); HopsData2String.put(DataOpTypes.TRANSIENTWRITE, "TWrite"); HopsData2String.put(DataOpTypes.TRANSIENTREAD, "TRead"); } public static boolean isFunction(OpOp2 op) { return op == OpOp2.MIN || op == OpOp2.MAX || op == OpOp2.LOG;// || op == OpOp2.CONCAT; //concat is || in Netezza } public static boolean isSupported(OpOp2 op) { return op != OpOp2.INVALID && op != OpOp2.QUANTILE && op != OpOp2.INTERQUANTILE && op != OpOp2.IQM; } public static boolean isFunction(OpOp1 op) { return op == OpOp1.SIN || op == OpOp1.TAN || op == OpOp1.COS || op == OpOp1.ABS || op == OpOp1.EXP || op == OpOp1.LOG || op == OpOp1.ROUND || op == OpOp1.SQRT; } public static boolean isBooleanOperation(OpOp2 op) { return op == OpOp2.AND || op == OpOp2.EQUAL || op == OpOp2.GREATER || op == OpOp2.GREATEREQUAL || op == OpOp2.LESS || op == OpOp2.LESSEQUAL || op == OpOp2.OR; } /** * * @param op * @return */ public static OpOp2 getOpOp2ForOuterVectorOperation(String op) { if ("+".equals(op)) return OpOp2.PLUS; else if ("-".equals(op)) return OpOp2.MINUS; else if ("*".equals(op)) return OpOp2.MULT; else if ("/".equals(op)) return OpOp2.DIV; else if ("%%".equals(op)) return OpOp2.MODULUS; else if ("%/%".equals(op)) return OpOp2.INTDIV; else if ("min".equals(op)) return OpOp2.MIN; else if ("max".equals(op)) return OpOp2.MAX; else if ("<=".equals(op)) return OpOp2.LESSEQUAL; else if ("<".equals(op)) return OpOp2.LESS; else if (">=".equals(op)) return OpOp2.GREATEREQUAL; else if (">".equals(op)) return OpOp2.GREATER; else if ("==".equals(op)) return OpOp2.EQUAL; else if ("!=".equals(op)) return OpOp2.NOTEQUAL; else if ("|".equals(op)) return OpOp2.OR; else if ("&".equals(op)) return OpOp2.AND; else if ("log".equals(op)) return OpOp2.LOG; else if ("^".equals(op)) return OpOp2.POW; return null; } public static ValueType getResultValueType(ValueType vt1, ValueType vt2) { if (vt1 == ValueType.STRING || vt2 == ValueType.STRING) return ValueType.STRING; else if (vt1 == ValueType.DOUBLE || vt2 == ValueType.DOUBLE) return ValueType.DOUBLE; else return ValueType.INT; } ///////////////////////////////////// // methods for dynamic re-compilation ///////////////////////////////////// /** * Indicates if dynamic recompilation is required for this hop. */ public boolean requiresRecompile() { return _requiresRecompile; } public void setRequiresRecompile() { _requiresRecompile = true; } public void unsetRequiresRecompile() { _requiresRecompile = false; } /** * Update the output size information for this hop. */ public abstract void refreshSizeInformation(); /** * Util function for refreshing scalar rows input parameter. */ protected void refreshRowsParameterInformation(Hop input) { long size = computeSizeInformation(input); //always set the computed size not just if known (positive) in order to allow //recompile with unknowns to reset sizes (otherwise potential for incorrect results) setDim1(size); } /** * Util function for refreshing scalar cols input parameter. */ protected void refreshColsParameterInformation(Hop input) { long size = computeSizeInformation(input); //always set the computed size not just if known (positive) in order to allow //recompile with unknowns to reset sizes (otherwise potential for incorrect results) setDim2(size); } /** * * @param input * @return */ public long computeSizeInformation(Hop input) { long ret = -1; try { long tmp = OptimizerUtils.rEvalSimpleLongExpression(input, new HashMap<Long, Long>()); if (tmp != Long.MAX_VALUE) ret = tmp; } catch (Exception ex) { LOG.error("Failed to compute size information.", ex); ret = -1; } return ret; } /** * * @param input * @param vars */ public void refreshRowsParameterInformation(Hop input, LocalVariableMap vars) { long size = computeSizeInformation(input, vars); //always set the computed size not just if known (positive) in order to allow //recompile with unknowns to reset sizes (otherwise potential for incorrect results) setDim1(size); } /** * * @param input * @param vars */ public void refreshColsParameterInformation(Hop input, LocalVariableMap vars) { long size = computeSizeInformation(input, vars); //always set the computed size not just if known (positive) in order to allow //recompile with unknowns to reset sizes (otherwise potential for incorrect results) setDim2(size); } /** * * @param input * @param vars * @return */ public long computeSizeInformation(Hop input, LocalVariableMap vars) { long ret = -1; try { long tmp = OptimizerUtils.rEvalSimpleLongExpression(input, new HashMap<Long, Long>(), vars); if (tmp != Long.MAX_VALUE) ret = tmp; } catch (Exception ex) { LOG.error("Failed to compute size information.", ex); ret = -1; } return ret; } /** * * @param input * @return */ public double computeBoundsInformation(Hop input) { double ret = Double.MAX_VALUE; try { ret = OptimizerUtils.rEvalSimpleDoubleExpression(input, new HashMap<Long, Double>()); } catch (Exception ex) { LOG.error("Failed to compute bounds information.", ex); ret = Double.MAX_VALUE; } return ret; } /** * Computes bound information for sequence if possible, otherwise returns * Double.MAX_VALUE * * @param input * @param vars * @return */ public double computeBoundsInformation(Hop input, LocalVariableMap vars) { double ret = Double.MAX_VALUE; try { ret = OptimizerUtils.rEvalSimpleDoubleExpression(input, new HashMap<Long, Double>(), vars); } catch (Exception ex) { LOG.error("Failed to compute bounds information.", ex); ret = Double.MAX_VALUE; } return ret; } /** * Compute worst case estimate for size expression based on worst-case * statistics of inputs. Limited set of supported operations in comparison * to refresh rows/cols. * * @param input * @param memo */ protected long computeDimParameterInformation(Hop input, MemoTable memo) { long ret = -1; if (input instanceof UnaryOp) { if (((UnaryOp) input).getOp() == Hop.OpOp1.NROW) { MatrixCharacteristics mc = memo.getAllInputStats(input.getInput().get(0)); if (mc.getRows() > 0) ret = mc.getRows(); } else if (((UnaryOp) input).getOp() == Hop.OpOp1.NCOL) { MatrixCharacteristics mc = memo.getAllInputStats(input.getInput().get(0)); if (mc.getCols() > 0) ret = mc.getCols(); } } else if (input instanceof LiteralOp) { ret = UtilFunctions.parseToLong(input.getName()); } else if (input instanceof BinaryOp) { long dim = rEvalSimpleBinaryLongExpression(input, new HashMap<Long, Long>(), memo); if (dim != Long.MAX_VALUE) //if known ret = dim; } return ret; } /** * * @param root * @param valMemo * @return */ protected long rEvalSimpleBinaryLongExpression(Hop root, HashMap<Long, Long> valMemo, MemoTable memo) { //memoization (prevent redundant computation of common subexpr) if (valMemo.containsKey(root.getHopID())) return valMemo.get(root.getHopID()); long ret = Long.MAX_VALUE; if (root instanceof LiteralOp) { long dim = UtilFunctions.parseToLong(root.getName()); if (dim != -1) //if known ret = dim; } else if (root instanceof UnaryOp) { UnaryOp uroot = (UnaryOp) root; long dim = -1; if (uroot.getOp() == Hop.OpOp1.NROW) { MatrixCharacteristics mc = memo.getAllInputStats(uroot.getInput().get(0)); dim = mc.getRows(); } else if (uroot.getOp() == Hop.OpOp1.NCOL) { MatrixCharacteristics mc = memo.getAllInputStats(uroot.getInput().get(0)); dim = mc.getCols(); } if (dim != -1) //if known ret = dim; } else if (root instanceof BinaryOp) { if (OptimizerUtils.ALLOW_WORSTCASE_SIZE_EXPRESSION_EVALUATION) { BinaryOp broot = (BinaryOp) root; long lret = rEvalSimpleBinaryLongExpression(broot.getInput().get(0), valMemo, memo); long rret = rEvalSimpleBinaryLongExpression(broot.getInput().get(1), valMemo, memo); //note: positive and negative values might be valid subexpressions if (lret != Long.MAX_VALUE && rret != Long.MAX_VALUE) //if known { switch (broot.getOp()) { case PLUS: ret = lret + rret; break; case MULT: ret = lret * rret; break; case MIN: ret = Math.min(lret, rret); break; case MAX: ret = Math.max(lret, rret); break; default: ret = Long.MAX_VALUE; } } //exploit min constraint to propagate else if (broot.getOp() == OpOp2.MIN && (lret != Double.MAX_VALUE || rret != Double.MAX_VALUE)) { ret = Math.min(lret, rret); } } } valMemo.put(root.getHopID(), ret); return ret; } /** * * @return */ public String constructBaseDir() { StringBuilder sb = new StringBuilder(); sb.append(ConfigurationManager.getConfig().getTextValue(DMLConfig.SCRATCH_SPACE)); sb.append(Lop.FILE_SEPARATOR); sb.append(Lop.PROCESS_PREFIX); sb.append(DMLScript.getUUID()); sb.append(Lop.FILE_SEPARATOR); sb.append(Lop.FILE_SEPARATOR); sb.append(ProgramConverter.CP_ROOT_THREAD_ID); sb.append(Lop.FILE_SEPARATOR); return sb.toString(); } /** * Clones the attributes of that and copies it over to this. * * @param that * @throws HopsException */ protected void clone(Hop that, boolean withRefs) throws CloneNotSupportedException { if (withRefs) throw new CloneNotSupportedException("Hops deep copy w/ lops/inputs/parents not supported."); _ID = that._ID; _name = that._name; _dataType = that._dataType; _valueType = that._valueType; _visited = that._visited; _dim1 = that._dim1; _dim2 = that._dim2; _rows_in_block = that._rows_in_block; _cols_in_block = that._cols_in_block; _nnz = that._nnz; //no copy of lops (regenerated) _parent = new ArrayList<Hop>(); _input = new ArrayList<Hop>(); _lops = null; _etype = that._etype; _etypeForced = that._etypeForced; _outputMemEstimate = that._outputMemEstimate; _memEstimate = that._memEstimate; _processingMemEstimate = that._processingMemEstimate; _requiresRecompile = that._requiresRecompile; _requiresReblock = that._requiresReblock; _requiresCheckpoint = that._requiresCheckpoint; _outputEmptyBlocks = that._outputEmptyBlocks; _beginLine = that._beginLine; _beginColumn = that._beginColumn; _endLine = that._endLine; _endColumn = that._endColumn; } public abstract Object clone() throws CloneNotSupportedException; public abstract boolean compare(Hop that); /////////////////////////////////////////////////////////////////////////// // store position information for Hops /////////////////////////////////////////////////////////////////////////// public int _beginLine, _beginColumn; public int _endLine, _endColumn; public void setBeginLine(int passed) { _beginLine = passed; } public void setBeginColumn(int passed) { _beginColumn = passed; } public void setEndLine(int passed) { _endLine = passed; } public void setEndColumn(int passed) { _endColumn = passed; } public void setAllPositions(int blp, int bcp, int elp, int ecp) { _beginLine = blp; _beginColumn = bcp; _endLine = elp; _endColumn = ecp; } public int getBeginLine() { return _beginLine; } public int getBeginColumn() { return _beginColumn; } public int getEndLine() { return _endLine; } public int getEndColumn() { return _endColumn; } public String printErrorLocation() { return "ERROR: line " + _beginLine + ", column " + _beginColumn + " -- "; } public String printWarningLocation() { return "WARNING: line " + _beginLine + ", column " + _beginColumn + " -- "; } /** * Sets the linenumbers of this hop to a given lop. * * @param lop */ protected void setLineNumbers(Lop lop) { lop.setAllPositions(this.getBeginLine(), this.getBeginColumn(), this.getEndLine(), this.getEndColumn()); } } // end class