Java tutorial
/** * (C) Copyright IBM Corp. 2010, 2015 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package com.ibm.bi.dml.hops; import java.util.HashMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.ibm.bi.dml.api.DMLScript; import com.ibm.bi.dml.api.DMLScript.RUNTIME_PLATFORM; import com.ibm.bi.dml.conf.ConfigurationManager; import com.ibm.bi.dml.conf.DMLConfig; import com.ibm.bi.dml.hops.Hop.DataOpTypes; import com.ibm.bi.dml.hops.Hop.FileFormatTypes; import com.ibm.bi.dml.hops.Hop.OpOp2; import com.ibm.bi.dml.hops.rewrite.HopRewriteUtils; import com.ibm.bi.dml.lops.LopProperties.ExecType; import com.ibm.bi.dml.runtime.DMLRuntimeException; import com.ibm.bi.dml.runtime.controlprogram.LocalVariableMap; import com.ibm.bi.dml.runtime.controlprogram.context.SparkExecutionContext; import com.ibm.bi.dml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer; import com.ibm.bi.dml.runtime.instructions.cp.Data; import com.ibm.bi.dml.runtime.instructions.cp.ScalarObject; import com.ibm.bi.dml.runtime.matrix.MatrixCharacteristics; import com.ibm.bi.dml.runtime.matrix.data.MatrixBlock; import com.ibm.bi.dml.runtime.matrix.data.OutputInfo; import com.ibm.bi.dml.runtime.matrix.data.SparseRow; import com.ibm.bi.dml.runtime.util.UtilFunctions; import com.ibm.bi.dml.yarn.ropt.YarnClusterAnalyzer; public class OptimizerUtils { private static final Log LOG = LogFactory.getLog(OptimizerUtils.class.getName()); //////////////////////////////////////////////////////// // Optimizer constants and flags (incl tuning knobs) // //////////////////////////////////////////////////////// /** * Utilization factor used in deciding whether an operation to be scheduled on CP or MR. * NOTE: it is important that MEM_UTIL_FACTOR+CacheableData.CACHING_BUFFER_SIZE < 1.0 */ public static double MEM_UTIL_FACTOR = 0.7d; /** * Default memory size, which is used the actual estimate can not be computed * -- for example, when input/output dimensions are unknown. In case of ROBUST, * the default is set to a large value so that operations are scheduled on MR. */ public static double DEFAULT_SIZE; public static final long DOUBLE_SIZE = 8; public static final long INT_SIZE = 4; public static final long CHAR_SIZE = 1; public static final long BOOLEAN_SIZE = 1; public static final double BIT_SIZE = (double) 1 / 8; public static final double INVALID_SIZE = -1d; // memory estimate not computed public static final long MAX_NUMCELLS_CP_DENSE = Integer.MAX_VALUE; /** * Enables/disables dynamic re-compilation of lops/instructions. * If enabled, we recompile each program block that contains at least * one hop that requires re-compilation (e.g., unknown statistics * during compilation, or program blocks in functions). */ public static boolean ALLOW_DYN_RECOMPILATION = true; public static boolean ALLOW_PARALLEL_DYN_RECOMPILATION = ALLOW_DYN_RECOMPILATION && true; /** * Enables/disables to put operations with data-dependent output * size into individual statement blocks / program blocks. * Since recompilation is done on the granularity of program blocks * this enables recompilation of subsequent operations according * to the actual output size. This rewrite might limit the opportunity * for piggybacking and therefore should only be applied if * dyanmic recompilation is enabled as well. */ public static boolean ALLOW_INDIVIDUAL_SB_SPECIFIC_OPS = ALLOW_DYN_RECOMPILATION && true; /** * Enables common subexpression elimination in dags. There is however, a potential tradeoff * between computation redundancy and data transfer between MR jobs. Since, we do not reason * about transferred data yet, this rewrite rule is enabled by default. */ public static boolean ALLOW_COMMON_SUBEXPRESSION_ELIMINATION = true; /** * Enables constant folding in dags. Constant folding computes simple expressions of binary * operations and literals and replaces the hop sub-DAG with a new literal operator. */ public static boolean ALLOW_CONSTANT_FOLDING = true; /** * */ public static boolean ALLOW_ALGEBRAIC_SIMPLIFICATION = true; /** * Enables if-else branch removal for constant predicates (original literals or * results of constant folding). * */ public static boolean ALLOW_BRANCH_REMOVAL = true; /** * */ public static boolean ALLOW_AUTO_VECTORIZATION = true; /** * Enables simple expression evaluation for datagen parameters 'rows', 'cols'. Simple * expressions are defined as binary operations on literals and nrow/ncol. This applies * only to exact size information. */ public static boolean ALLOW_SIZE_EXPRESSION_EVALUATION = true; /** * Enables simple expression evaluation for datagen parameters 'rows', 'cols'. Simple * expressions are defined as binary operations on literals and b(+) or b(*) on nrow/ncol. * This applies also to worst-case size information. */ public static boolean ALLOW_WORSTCASE_SIZE_EXPRESSION_EVALUATION = true; /** * */ public static boolean ALLOW_RAND_JOB_RECOMPILE = true; /** * Enables CP-side data transformation for small files. */ public static boolean ALLOW_TRANSFORM_RECOMPILE = true; /** * Enables parfor runtime piggybacking of MR jobs into the packed jobs for * scan sharing. */ public static boolean ALLOW_RUNTIME_PIGGYBACKING = true; /** * Enables interprocedural analysis between main script and functions as well as functions * and other functions. This includes, for example, to propagate statistics into functions * if save to do so (e.g., if called once). */ public static boolean ALLOW_INTER_PROCEDURAL_ANALYSIS = true; /** * Enables sum product rewrites such as mapmultchains. In the future, this will cover * all sum-product related rewrites. */ public static boolean ALLOW_SUM_PRODUCT_REWRITES = true; /** * Enables a specific hop dag rewrite that splits hop dags after csv persistent reads with * unknown size in order to allow for recompile. */ public static boolean ALLOW_SPLIT_HOP_DAGS = true; /** * Enables parallel read/write of all text formats (textcell, csv, mm) * and binary formats (binary block). * */ public static boolean PARALLEL_CP_READ_TEXTFORMATS = true; public static boolean PARALLEL_CP_WRITE_TEXTFORMATS = true; public static boolean PARALLEL_CP_READ_BINARYFORMATS = true; public static boolean PARALLEL_CP_WRITE_BINARYFORMATS = true; /** * Specifies a multiplier computing the degree of parallelism of parallel * text read/write out of the available degree of parallelism. Set it to 1.0 * to get a number of threads equal the number of virtual cores. * */ public static final double PARALLEL_CP_READ_PARALLELISM_MULTIPLIER = 1.0; public static final double PARALLEL_CP_WRITE_PARALLELISM_MULTIPLIER = 1.0; /** * Enables multi-threaded matrix multiply for mm, mmchain, and tsmm. * */ public static boolean PARALLEL_CP_MATRIX_MULTIPLY = true; /** * Enables the use of CombineSequenceFileInputFormat with splitsize = 2x hdfs blocksize, * if sort buffer size large enough and parallelism not hurt. This solves to issues: * (1) it combines small files (depending on producers), and (2) it reduces task * latency of large jobs with many tasks by factor 2. * */ public static final boolean ALLOW_COMBINE_FILE_INPUT_FORMAT = true; ////////////////////// // Optimizer levels // ////////////////////// private static OptimizationLevel _optLevel = OptimizationLevel.O2_LOCAL_MEMORY_DEFAULT; /** * Optimization Types for Compilation * * O0 STATIC - Decisions for scheduling operations on CP/MR are based on * predefined set of rules, which check if the dimensions are below a * fixed/static threshold (OLD Method of choosing between CP and MR). * The optimization scope is LOCAL, i.e., per statement block. * Advanced rewrites like constant folding, common subexpression elimination, * or inter procedural analysis are NOT applied. * * O1 MEMORY_BASED - Every operation is scheduled on CP or MR, solely * based on the amount of memory required to perform that operation. * It does NOT take the execution time into account. * The optimization scope is LOCAL, i.e., per statement block. * Advanced rewrites like constant folding, common subexpression elimination, * or inter procedural analysis are NOT applied. * * O2 MEMORY_BASED - Every operation is scheduled on CP or MR, solely * based on the amount of memory required to perform that operation. * It does NOT take the execution time into account. * The optimization scope is LOCAL, i.e., per statement block. * All advanced rewrites are applied. This is the default optimization * level of SystemML. * * O3 GLOBAL TIME_MEMORY_BASED - Operation scheduling on CP or MR as well as * many other rewrites of data flow properties such as block size, partitioning, * replication, vectorization, etc are done with the optimization objective of * minimizing execution time under hard memory constraints per operation and * execution context. The optimization scope if GLOBAL, i.e., program-wide. * All advanced rewrites are applied. This optimization level requires more * optimization time but has higher optimization potential. * * O4 DEBUG MODE - All optimizations, global and local, which interfere with * breakpoints are NOT applied. This optimization level is REQUIRED for the * compiler running in debug mode. */ public enum OptimizationLevel { O0_LOCAL_STATIC, O1_LOCAL_MEMORY_MIN, O2_LOCAL_MEMORY_DEFAULT, O3_LOCAL_RESOURCE_TIME_MEMORY, O4_GLOBAL_TIME_MEMORY, O5_DEBUG_MODE, }; public static OptimizationLevel getOptLevel() { return _optLevel; } public static boolean isMemoryBasedOptLevel() { return (_optLevel != OptimizationLevel.O0_LOCAL_STATIC); } public static boolean isOptLevel(OptimizationLevel level) { return (_optLevel == level); } /** * * @param optlevel * @throws DMLRuntimeException */ public static void setOptimizationLevel(int optlevel) throws DMLRuntimeException { if (optlevel < 0 || optlevel > 5) throw new DMLRuntimeException( "Error: invalid optimization level '" + optlevel + "' (valid values: 0-5)."); // This overrides any optimization level that is present in the configuration file. // Why ? This simplifies the calling logic: User doesnot have to maintain two config file or worse // edit config file everytime he/she is trying to call the debugger. if (DMLScript.ENABLE_DEBUG_MODE) { optlevel = 5; } switch (optlevel) { // opt level 0: static dimensionality case 0: _optLevel = OptimizationLevel.O0_LOCAL_STATIC; ALLOW_CONSTANT_FOLDING = false; ALLOW_COMMON_SUBEXPRESSION_ELIMINATION = false; ALLOW_ALGEBRAIC_SIMPLIFICATION = false; ALLOW_AUTO_VECTORIZATION = false; ALLOW_INTER_PROCEDURAL_ANALYSIS = false; ALLOW_BRANCH_REMOVAL = false; ALLOW_SUM_PRODUCT_REWRITES = false; break; // opt level 1: memory-based (no advanced rewrites) case 1: _optLevel = OptimizationLevel.O1_LOCAL_MEMORY_MIN; ALLOW_CONSTANT_FOLDING = false; ALLOW_COMMON_SUBEXPRESSION_ELIMINATION = false; ALLOW_ALGEBRAIC_SIMPLIFICATION = false; ALLOW_AUTO_VECTORIZATION = false; ALLOW_INTER_PROCEDURAL_ANALYSIS = false; ALLOW_BRANCH_REMOVAL = false; ALLOW_SUM_PRODUCT_REWRITES = false; break; // opt level 2: memory-based (all advanced rewrites) case 2: _optLevel = OptimizationLevel.O2_LOCAL_MEMORY_DEFAULT; break; // opt level 3: resource optimization, time- and memory-based (2 w/ resource optimizat) case 3: _optLevel = OptimizationLevel.O3_LOCAL_RESOURCE_TIME_MEMORY; break; // opt level 3: global, time- and memory-based (all advanced rewrites) case 4: _optLevel = OptimizationLevel.O4_GLOBAL_TIME_MEMORY; break; // opt level 4: debug mode (no interfering rewrites) case 5: _optLevel = OptimizationLevel.O5_DEBUG_MODE; ALLOW_CONSTANT_FOLDING = false; ALLOW_COMMON_SUBEXPRESSION_ELIMINATION = false; ALLOW_ALGEBRAIC_SIMPLIFICATION = false; ALLOW_INTER_PROCEDURAL_ANALYSIS = false; ALLOW_BRANCH_REMOVAL = false; ALLOW_DYN_RECOMPILATION = false; ALLOW_SIZE_EXPRESSION_EVALUATION = false; ALLOW_WORSTCASE_SIZE_EXPRESSION_EVALUATION = false; ALLOW_RAND_JOB_RECOMPILE = false; ALLOW_SUM_PRODUCT_REWRITES = false; ALLOW_SPLIT_HOP_DAGS = false; break; } setDefaultSize(); //handle parallel text io (incl awareness of thread contention in <jdk8) if (!ConfigurationManager.getConfig().getBooleanValue(DMLConfig.CP_PARALLEL_TEXTIO)) { PARALLEL_CP_READ_TEXTFORMATS = false; PARALLEL_CP_WRITE_TEXTFORMATS = false; PARALLEL_CP_READ_BINARYFORMATS = false; PARALLEL_CP_WRITE_BINARYFORMATS = false; } else if (InfrastructureAnalyzer.isJavaVersionLessThanJDK8() && InfrastructureAnalyzer.getLocalParallelism() > 1) { LOG.warn( "Auto-disable multi-threaded text read for 'text' and 'csv' due to thread contention on JRE < 1.8" + " (java.version=" + System.getProperty("java.version") + ")."); //disable parallel text read PARALLEL_CP_READ_TEXTFORMATS = false; } //handle parallel matrix mult / rand configuration if (!ConfigurationManager.getConfig().getBooleanValue(DMLConfig.CP_PARALLEL_MATRIXMULT)) { PARALLEL_CP_MATRIX_MULTIPLY = false; } } /** * */ public static void setDefaultSize() { //we need to set default_size larger than any execution context //memory budget, however, it should not produce overflows on sum DEFAULT_SIZE = Math.max(InfrastructureAnalyzer.getLocalMaxMemory(), Math.max( InfrastructureAnalyzer.getRemoteMaxMemoryMap(), InfrastructureAnalyzer.getRemoteMaxMemoryReduce())); } /** * Returns memory budget (according to util factor) in bytes * * @param localOnly specifies if only budget of current JVM or also MR JVMs * @return */ public static double getLocalMemBudget() { double ret = InfrastructureAnalyzer.getLocalMaxMemory(); return ret * OptimizerUtils.MEM_UTIL_FACTOR; } /** * * @return */ public static double getRemoteMemBudgetMap() { return getRemoteMemBudgetMap(false); } /** * * @return */ public static double getRemoteMemBudgetMap(boolean substractSortBuffer) { double ret = InfrastructureAnalyzer.getRemoteMaxMemoryMap(); if (substractSortBuffer) ret -= InfrastructureAnalyzer.getRemoteMaxMemorySortBuffer(); return ret * OptimizerUtils.MEM_UTIL_FACTOR; } /** * * @return */ public static double getRemoteMemBudgetReduce() { double ret = InfrastructureAnalyzer.getRemoteMaxMemoryReduce(); return ret * OptimizerUtils.MEM_UTIL_FACTOR; } /** * * @param size * @return */ public static boolean checkSparkBroadcastMemoryBudget(double size) { double memBudgetExec = SparkExecutionContext.getBroadcastMemoryBudget(); double memBudgetLocal = OptimizerUtils.getLocalMemBudget(); //basic requirement: the broadcast needs to to fit once in the remote broadcast memory //and twice into the local memory budget because we have to create a partitioned broadcast //memory and hand it over to the spark context as in-memory object return (size < memBudgetExec && 2 * size < memBudgetLocal); } /** * * @param rlen * @param clen * @param brlen * @param bclen * @param nnz * @return */ public static boolean checkSparkBroadcastMemoryBudget(long rlen, long clen, long brlen, long bclen, long nnz) { double memBudgetExec = SparkExecutionContext.getBroadcastMemoryBudget(); double memBudgetLocal = OptimizerUtils.getLocalMemBudget(); double sp = getSparsity(rlen, clen, nnz); double size = estimateSizeExactSparsity(rlen, clen, sp); double sizeP = estimatePartitionedSizeExactSparsity(rlen, clen, brlen, bclen, sp); //basic requirement: the broadcast needs to to fit once in the remote broadcast memory //and twice into the local memory budget because we have to create a partitioned broadcast //memory and hand it over to the spark context as in-memory object return (OptimizerUtils.isValidCPDimensions(rlen, clen) && sizeP < memBudgetExec && size + sizeP < memBudgetLocal); } /** * * @param rlen * @param clen * @param brlen * @param bclen * @param nnz * @return */ public static boolean checkSparkCollectMemoryBudget(long rlen, long clen, int brlen, int bclen, long nnz, long memPinned) { //compute size of output matrix and its blocked representation double sp = getSparsity(rlen, clen, nnz); double memMatrix = estimateSizeExactSparsity(rlen, clen, sp); double memPMatrix = estimatePartitionedSizeExactSparsity(rlen, clen, brlen, bclen, sp); //check if both output matrix and partitioned matrix fit into local mem budget return (memPinned + memMatrix + memPMatrix < getLocalMemBudget()); } /** * Returns the number of reducers that potentially run in parallel. * This is either just the configured value (SystemML config) or * the minimum of configured value and available reduce slots. * * @param configOnly * @return */ public static int getNumReducers(boolean configOnly) { int ret = ConfigurationManager.getConfig().getIntValue(DMLConfig.NUM_REDUCERS); if (!configOnly) { ret = Math.min(ret, InfrastructureAnalyzer.getRemoteParallelReduceTasks()); //correction max number of reducers on yarn clusters if (InfrastructureAnalyzer.isYarnEnabled()) ret = (int) Math.max(ret, YarnClusterAnalyzer.getNumCores() / 2); } return ret; } /** * * @return */ public static int getNumMappers() { int ret = InfrastructureAnalyzer.getRemoteParallelMapTasks(); //correction max number of reducers on yarn clusters if (InfrastructureAnalyzer.isYarnEnabled()) ret = (int) Math.max(ret, YarnClusterAnalyzer.getNumCores()); return ret; } /** * * @return */ public static boolean isSparkExecutionMode() { return (DMLScript.rtplatform == RUNTIME_PLATFORM.SPARK || DMLScript.rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK); } /** * * @return */ public static boolean isHybridExecutionMode() { return (DMLScript.rtplatform == RUNTIME_PLATFORM.HYBRID || DMLScript.rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK); } /** * Returns the degree of parallelism used for parallel text read. * This is computed as the number of virtual cores scales by the * PARALLEL_READ_PARALLELISM_MULTIPLIER. If PARALLEL_READ_TEXTFORMATS * is disabled, this method returns 1. * * @return */ public static int getParallelTextReadParallelism() { if (!PARALLEL_CP_READ_TEXTFORMATS) return 1; // sequential execution //compute degree of parallelism for parallel text read double dop = InfrastructureAnalyzer.getLocalParallelism() * PARALLEL_CP_READ_PARALLELISM_MULTIPLIER; return (int) Math.round(dop); } /** * * @return */ public static int getParallelBinaryReadParallelism() { if (!PARALLEL_CP_READ_BINARYFORMATS) return 1; // sequential execution //compute degree of parallelism for parallel text read double dop = InfrastructureAnalyzer.getLocalParallelism() * PARALLEL_CP_READ_PARALLELISM_MULTIPLIER; return (int) Math.round(dop); } /** * Returns the degree of parallelism used for parallel text write. * This is computed as the number of virtual cores scales by the * PARALLEL_WRITE_PARALLELISM_MULTIPLIER. If PARALLEL_WRITE_TEXTFORMATS * is disabled, this method returns 1. * * @return */ public static int getParallelTextWriteParallelism() { if (!PARALLEL_CP_WRITE_TEXTFORMATS) return 1; // sequential execution //compute degree of parallelism for parallel text read double dop = InfrastructureAnalyzer.getLocalParallelism() * PARALLEL_CP_WRITE_PARALLELISM_MULTIPLIER; return (int) Math.round(dop); } /** * * @return */ public static int getParallelBinaryWriteParallelism() { if (!PARALLEL_CP_WRITE_BINARYFORMATS) return 1; // sequential execution //compute degree of parallelism for parallel text read double dop = InfrastructureAnalyzer.getLocalParallelism() * PARALLEL_CP_WRITE_PARALLELISM_MULTIPLIER; return (int) Math.round(dop); } //////////////////////// // Memory Estimates // //////////////////////// /** * * @param mc * @return */ public static long estimateSizeExactSparsity(MatrixCharacteristics mc) { return estimateSizeExactSparsity(mc.getRows(), mc.getCols(), mc.getNonZeros()); } /** * Estimates the footprint (in bytes) for an in-memory representation of a * matrix with dimensions=(nrows,ncols) and and number of non-zeros nnz. * * @param nrows * @param ncols * @param sp * @return */ public static long estimateSizeExactSparsity(long nrows, long ncols, long nnz) { double sp = getSparsity(nrows, ncols, nnz); return estimateSizeExactSparsity(nrows, ncols, sp); } /** * Estimates the footprint (in bytes) for an in-memory representation of a * matrix with dimensions=(nrows,ncols) and sparsity=sp. * * This function can be used directly in Hops, when the actual sparsity is * known i.e., <code>sp</code> is guaranteed to give worst-case estimate * (e.g., Rand with a fixed sparsity). In all other cases, estimateSize() * must be used so that worst-case estimates are computed, whenever * applicable. * * @param nrows * @param ncols * @param sp * @return */ public static long estimateSizeExactSparsity(long nrows, long ncols, double sp) { return MatrixBlock.estimateSizeInMemory(nrows, ncols, sp); } /** * Estimates the footprint (in bytes) for a partitioned in-memory representation of a * matrix with the given matrix characteristics * * @param mc * @return */ public static long estimatePartitionedSizeExactSparsity(MatrixCharacteristics mc) { return estimatePartitionedSizeExactSparsity(mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), mc.getNonZeros()); } /** * Estimates the footprint (in bytes) for a partitioned in-memory representation of a * matrix with dimensions=(nrows,ncols) and number of non-zeros nnz. * * @param nrows * @param ncols * @param sp * @return */ public static long estimatePartitionedSizeExactSparsity(long rlen, long clen, long brlen, long bclen, long nnz) { double sp = getSparsity(rlen, clen, nnz); return estimatePartitionedSizeExactSparsity(rlen, clen, brlen, bclen, sp); } /** * Estimates the footprint (in bytes) for a partitioned in-memory representation of a * matrix with dimensions=(nrows,ncols) and sparsity=sp. * * @param nrows * @param ncols * @param sp * @return */ public static long estimatePartitionedSizeExactSparsity(long rlen, long clen, long brlen, long bclen, double sp) { long ret = 0; //check for guaranteed existence of empty blocks (less nnz than total number of blocks) long tnrblks = (long) Math.ceil((double) rlen / brlen); long tncblks = (long) Math.ceil((double) clen / bclen); long nnz = (long) Math.ceil(sp * rlen * clen); if (nnz < tnrblks * tncblks) { long lrlen = Math.min(rlen, brlen); long lclen = Math.min(clen, bclen); return nnz * estimateSizeExactSparsity(lrlen, lclen, 1) + (tnrblks * tncblks - nnz) * estimateSizeEmptyBlock(lrlen, lclen); } //estimate size of full brlen x bclen blocks long nrblks = rlen / brlen; long ncblks = clen / bclen; if (nrblks * ncblks > 0) ret += nrblks * ncblks * estimateSizeExactSparsity(brlen, bclen, sp); //estimate size of bottom boundary blocks long lrlen = rlen % brlen; if (ncblks > 0 && lrlen > 0) ret += ncblks * estimateSizeExactSparsity(lrlen, bclen, sp); //estimate size of right boundary blocks long lclen = clen % bclen; if (nrblks > 0 && lclen > 0) ret += nrblks * estimateSizeExactSparsity(brlen, lclen, sp); //estimate size of bottom right boundary block if (lrlen > 0 && lclen > 0) ret += estimateSizeExactSparsity(lrlen, lclen, sp); return ret; } /** * Similar to estimate() except that it provides worst-case estimates * when the optimization type is ROBUST. * * @param nrows * @param ncols * @param sp * @return */ public static long estimateSize(long nrows, long ncols) { return estimateSizeExactSparsity(nrows, ncols, 1.0); } /** * * @param nrows * @param ncols * @return */ public static long estimateSizeEmptyBlock(long nrows, long ncols) { return estimateSizeExactSparsity(0, 0, 0.0d); } /** * Estimates the memory footprint of a SparseRow with <code>clen</code> * columns and <code>sp</code> sparsity. This method accounts for the * overhead incurred by extra cells allocated (but not used) for SparseRow. * It assumes that non-zeros are uniformly distributed in the matrix -- * i.e., #estimated nnz in a given SparseRow = clen*sp. * * @param clen * @param sp * @return estimated size in bytes */ public static long estimateRowSize(long clen, double sp) { if (sp == 0) return 0; int basicSize = 28; int cellSize = 12; // every cell takes 12 (8+4) bytes if (sp == 1) { return clen * cellSize; } long numCells = SparseRow.initialCapacity; if ((long) (sp * clen) > numCells) { numCells = (long) (sp * clen); } long allocatedCells = (long) Math.pow(2, Math.ceil(Math.log(numCells) / Math.log(2))); long rowSize = basicSize + allocatedCells * cellSize; return rowSize; } public static long estimateSizeTextOutput(long rows, long cols, long nnz, OutputInfo oinfo) { long bsize = MatrixBlock.estimateSizeOnDisk(rows, cols, nnz); if (oinfo == OutputInfo.TextCellOutputInfo || oinfo == OutputInfo.MatrixMarketOutputInfo) return bsize * 3; else if (oinfo == OutputInfo.CSVOutputInfo) return bsize * 2; //unknown output info return bsize; } /** * Returns false if dimensions known to be invalid; other true * * @param rows * @param cols * @return */ public static boolean isValidCPDimensions(long rows, long cols) { //the current CP runtime implementation requires that rows and cols //are integers since we use a single matrixblock to represent the //entire matrix return (rows <= Integer.MAX_VALUE && cols <= Integer.MAX_VALUE); } /** * Determines if valid matrix size to be represented in CP data structures. Note that * sparsity needs to be specified as rows*cols if unknown. * * @param rows * @param cols * @param sparsity * @return */ public static boolean isValidCPMatrixSize(long rows, long cols, double sparsity) { boolean ret = true; //the current CP runtime implementation has several limitations: //1) for dense: 16GB because we use a linearized array (bounded to int in java) //2) for sparse: 2G x 2G nnz because (1) nnz maintained as long, (2) potential changes // to dense, and (3) sparse row arrays also of max int size (worst case in case of skew) long nnz = (long) (sparsity * rows * cols); boolean sparse = MatrixBlock.evalSparseFormatInMemory(rows, cols, nnz); if (sparse) //SPARSE { //check max nnz ret = (nnz <= Long.MAX_VALUE); } else //DENSE { //check number of matrix cell ret = ((rows * cols) <= MAX_NUMCELLS_CP_DENSE); } return ret; } /** * * @return * @throws HopsException */ public static boolean allowsToFilterEmptyBlockOutputs(Hop hop) throws HopsException { boolean ret = true; for (Hop p : hop.getParent()) { p.optFindExecType(); //ensure exec type evaluated ret &= (p.getExecType() == ExecType.CP || (p instanceof AggBinaryOp && allowsToFilterEmptyBlockOutputs(p)) || (p instanceof DataOp && ((DataOp) p).getDataOpType() == DataOpTypes.PERSISTENTWRITE && ((DataOp) p).getInputFormatType() == FileFormatTypes.TEXT)) && !(p instanceof FunctionOp || (p instanceof DataOp && ((DataOp) p).getInputFormatType() != FileFormatTypes.TEXT)); //no function call or transient write } return ret; } /** * * @return */ public static int getConstrainedNumThreads(int maxNumThreads) { //by default max local parallelism (vcores) int ret = InfrastructureAnalyzer.getLocalParallelism(); //apply external max constraint (e.g., set by parfor or other rewrites) if (maxNumThreads > 0) { ret = Math.min(ret, maxNumThreads); } //apply global multi-threading constraint if (!PARALLEL_CP_MATRIX_MULTIPLY) { ret = 1; } return ret; } //////////////////////// // Sparsity Estimates // //////////////////////// /** * Estimates the result sparsity for Matrix Multiplication A %*% B. * * @param sp1 -- sparsity of A * @param sp2 -- sparsity of B * @param m -- nrow(A) * @param k -- ncol(A), nrow(B) * @param n -- ncol(B) * @return */ public static double getMatMultSparsity(double sp1, double sp2, long m, long k, long n, boolean worstcase) { if (worstcase) { double nnz1 = sp1 * m * k; double nnz2 = sp2 * k * n; return Math.min(1, nnz1 / m) * Math.min(1, nnz2 / n); } else return (1 - Math.pow(1 - sp1 * sp2, k)); } /** * * @param rlen1 * @param clen1 * @param nnz1 * @param rlen2 * @param clen2 * @param nnz2 * @return */ public static double getLeftIndexingSparsity(long rlen1, long clen1, long nnz1, long rlen2, long clen2, long nnz2) { boolean scalarRhs = (rlen2 == 0 && clen2 == 0); //infer output worstcase output nnz long lnnz = -1; if (nnz1 >= 0 && scalarRhs) lnnz = nnz1 + 1; // nnz(left) + scalar else if (nnz1 >= 0 && nnz2 >= 0) lnnz = nnz1 + nnz2; // nnz(left) + nnz(right) else if (nnz1 >= 0 && rlen2 > 0 && clen2 > 0) lnnz = nnz1 + rlen2 * clen2; // nnz(left) + nnz(right_dense) lnnz = Math.min(lnnz, rlen1 * clen1); return getSparsity(rlen1, clen1, (lnnz >= 0) ? lnnz : rlen1 * clen1); } /** * Determines if a given binary op is potentially conditional sparse safe. * * @param op * @return */ public static boolean isBinaryOpConditionalSparseSafe(OpOp2 op) { return (op == OpOp2.GREATER || op == OpOp2.LESS || op == OpOp2.NOTEQUAL || op == OpOp2.EQUAL || op == OpOp2.MINUS); } /** * Determines if a given binary op with scalar literal guarantee an output * sparsity which is exactly the same as its matrix input sparsity. * * @param op * @param lit * @return */ public static boolean isBinaryOpConditionalSparseSafeExact(OpOp2 op, LiteralOp lit) { double val = HopRewriteUtils.getDoubleValueSafe(lit); return (op == OpOp2.NOTEQUAL && val == 0); } /** * * @param sp1 * @param op * @param lit * @return */ public static double getBinaryOpSparsityConditionalSparseSafe(double sp1, OpOp2 op, LiteralOp lit) { double val = HopRewriteUtils.getDoubleValueSafe(lit); return ((op == OpOp2.GREATER && val == 0) || (op == OpOp2.LESS && val == 0) || (op == OpOp2.NOTEQUAL && val == 0) || (op == OpOp2.EQUAL && val != 0) || (op == OpOp2.MINUS && val == 0)) ? sp1 : 1.0; } /** * Estimates the result sparsity for matrix-matrix binary operations (A op B) * * @param sp1 -- sparsity of A * @param sp2 -- sparsity of B * @param op -- binary operation * @return * * NOTE: append has specific computation */ public static double getBinaryOpSparsity(double sp1, double sp2, OpOp2 op, boolean worstcase) { // default is worst-case estimate for robustness double ret = 1.0; if (worstcase) { //NOTE: for matrix-scalar operations this estimate is too conservative, because //Math.min(1, sp1 + sp2) will always give a sparsity 1 if we pass sp2=1 for scalars. //In order to do better (with guarantees), we need to take the actual values into account switch (op) { case PLUS: case MINUS: case LESS: case GREATER: case NOTEQUAL: case MIN: case MAX: case OR: ret = Math.min(1, sp1 + sp2); break; case MULT: case AND: ret = Math.min(sp1, sp2); break; case DIV: case MODULUS: case POW: case MINUS_NZ: case LOG_NZ: ret = sp1; break; //case EQUAL: //doesnt work on worstcase estimates, but on // ret = 1-Math.abs(sp1-sp2); break; default: ret = 1.0; } } else { switch (op) { case PLUS: case MINUS: // result[i,j] != 0 iff A[i,j] !=0 || B[i,j] != 0 // worst case estimate = sp1+sp2 ret = (1 - (1 - sp1) * (1 - sp2)); break; case MULT: // result[i,j] != 0 iff A[i,j] !=0 && B[i,j] != 0 // worst case estimate = min(sp1,sp2) ret = sp1 * sp2; break; case DIV: ret = 1.0; // worst case estimate break; case LESS: case LESSEQUAL: case GREATER: case GREATEREQUAL: case EQUAL: case NOTEQUAL: ret = 1.0; // purely data-dependent operations, and hence worse-case estimate break; //MIN, MAX, AND, OR, LOG, POW default: ret = 1.0; } } return ret; } public static double getSparsity(long dim1, long dim2, long nnz) { if (dim1 <= 0 || dim2 <= 0 || nnz < 0) return 1.0; else return Math.min(((double) nnz) / dim1 / dim2, 1.0); } public static String toMB(double inB) { if (inB < 0) return "-"; return String.format("%.0f", inB / (1024 * 1024)); } /** * Function to evaluate simple size expressions over literals and now/ncol. * * It returns the exact results of this expressions if known, otherwise * Long.MAX_VALUE if unknown. * * @param root * @return * @throws HopsException */ public static long rEvalSimpleLongExpression(Hop root, HashMap<Long, Long> valMemo) throws HopsException { long ret = Long.MAX_VALUE; //for simplicity and robustness call double and cast. HashMap<Long, Double> dvalMemo = new HashMap<Long, Double>(); double tmp = rEvalSimpleDoubleExpression(root, dvalMemo); if (tmp != Double.MAX_VALUE) ret = UtilFunctions.toLong(tmp); return ret; } /** * * @param root * @param valMemo * @param vars * @return * @throws HopsException */ public static long rEvalSimpleLongExpression(Hop root, HashMap<Long, Long> valMemo, LocalVariableMap vars) throws HopsException { long ret = Long.MAX_VALUE; //for simplicity and robustness call double and cast. HashMap<Long, Double> dvalMemo = new HashMap<Long, Double>(); double tmp = rEvalSimpleDoubleExpression(root, dvalMemo, vars); if (tmp != Double.MAX_VALUE) ret = UtilFunctions.toLong(tmp); return ret; } /** * * @param root * @param valMemo * @return * @throws HopsException */ public static double rEvalSimpleDoubleExpression(Hop root, HashMap<Long, Double> valMemo) throws HopsException { //memoization (prevent redundant computation of common subexpr) if (valMemo.containsKey(root.getHopID())) return valMemo.get(root.getHopID()); double ret = Double.MAX_VALUE; //always use constants if (root instanceof LiteralOp) ret = HopRewriteUtils.getDoubleValue((LiteralOp) root); //advanced size expression evaluation if (OptimizerUtils.ALLOW_SIZE_EXPRESSION_EVALUATION) { if (root instanceof UnaryOp) ret = rEvalSimpleUnaryDoubleExpression(root, valMemo); else if (root instanceof BinaryOp) ret = rEvalSimpleBinaryDoubleExpression(root, valMemo); } valMemo.put(root.getHopID(), ret); return ret; } /** * * @param root * @param valMemo * @param vars * @return * @throws HopsException */ public static double rEvalSimpleDoubleExpression(Hop root, HashMap<Long, Double> valMemo, LocalVariableMap vars) throws HopsException { //memoization (prevent redundant computation of common subexpr) if (valMemo.containsKey(root.getHopID())) return valMemo.get(root.getHopID()); double ret = Double.MAX_VALUE; if (OptimizerUtils.ALLOW_SIZE_EXPRESSION_EVALUATION) { if (root instanceof LiteralOp) ret = HopRewriteUtils.getDoubleValue((LiteralOp) root); else if (root instanceof UnaryOp) ret = rEvalSimpleUnaryDoubleExpression(root, valMemo, vars); else if (root instanceof BinaryOp) ret = rEvalSimpleBinaryDoubleExpression(root, valMemo, vars); else if (root instanceof DataOp) { String name = root.getName(); Data dat = vars.get(name); if (dat != null && dat instanceof ScalarObject) ret = ((ScalarObject) dat).getDoubleValue(); } } valMemo.put(root.getHopID(), ret); return ret; } /** * * @param root * @param valMemo * @return * @throws HopsException */ protected static double rEvalSimpleUnaryDoubleExpression(Hop root, HashMap<Long, Double> valMemo) throws HopsException { //memoization (prevent redundant computation of common subexpr) if (valMemo.containsKey(root.getHopID())) return valMemo.get(root.getHopID()); double ret = Double.MAX_VALUE; UnaryOp uroot = (UnaryOp) root; Hop input = uroot.getInput().get(0); if (uroot.getOp() == Hop.OpOp1.NROW) ret = (input.getDim1() > 0) ? input.getDim1() : Double.MAX_VALUE; else if (uroot.getOp() == Hop.OpOp1.NCOL) ret = (input.getDim2() > 0) ? input.getDim2() : Double.MAX_VALUE; else { double lval = rEvalSimpleDoubleExpression(uroot.getInput().get(0), valMemo); if (lval != Double.MAX_VALUE) { switch (uroot.getOp()) { case SQRT: ret = Math.sqrt(lval); break; case ROUND: ret = Math.round(lval); break; case CAST_AS_BOOLEAN: ret = (lval != 0) ? 1 : 0; break; case CAST_AS_INT: ret = UtilFunctions.toLong(lval); break; case CAST_AS_DOUBLE: ret = lval; break; default: ret = Double.MAX_VALUE; } } } valMemo.put(root.getHopID(), ret); return ret; } /** * * @param root * @param valMemo * @param vars * @return * @throws HopsException */ protected static double rEvalSimpleUnaryDoubleExpression(Hop root, HashMap<Long, Double> valMemo, LocalVariableMap vars) throws HopsException { //memoization (prevent redundant computation of common subexpr) if (valMemo.containsKey(root.getHopID())) return valMemo.get(root.getHopID()); double ret = Double.MAX_VALUE; UnaryOp uroot = (UnaryOp) root; Hop input = uroot.getInput().get(0); if (uroot.getOp() == Hop.OpOp1.NROW) ret = (input.getDim1() > 0) ? input.getDim1() : Double.MAX_VALUE; else if (uroot.getOp() == Hop.OpOp1.NCOL) ret = (input.getDim2() > 0) ? input.getDim2() : Double.MAX_VALUE; else { double lval = rEvalSimpleDoubleExpression(uroot.getInput().get(0), valMemo, vars); if (lval != Double.MAX_VALUE) { switch (uroot.getOp()) { case SQRT: ret = Math.sqrt(lval); break; case ROUND: ret = Math.round(lval); break; case CAST_AS_BOOLEAN: ret = (lval != 0) ? 1 : 0; break; case CAST_AS_INT: ret = UtilFunctions.toLong(lval); break; case CAST_AS_DOUBLE: ret = lval; break; default: ret = Double.MAX_VALUE; } } } valMemo.put(root.getHopID(), ret); return ret; } /** * * @param root * @param valMemo * @return * @throws HopsException */ protected static double rEvalSimpleBinaryDoubleExpression(Hop root, HashMap<Long, Double> valMemo) throws HopsException { //memoization (prevent redundant computation of common subexpr) if (valMemo.containsKey(root.getHopID())) return valMemo.get(root.getHopID()); double ret = Double.MAX_VALUE; BinaryOp broot = (BinaryOp) root; double lret = rEvalSimpleDoubleExpression(broot.getInput().get(0), valMemo); double rret = rEvalSimpleDoubleExpression(broot.getInput().get(1), valMemo); //note: positive and negative values might be valid subexpressions if (lret != Double.MAX_VALUE && rret != Double.MAX_VALUE) //if known { switch (broot.getOp()) { case PLUS: ret = lret + rret; break; case MINUS: ret = lret - rret; break; case MULT: ret = lret * rret; break; case DIV: ret = lret / rret; break; case MIN: ret = Math.min(lret, rret); break; case MAX: ret = Math.max(lret, rret); break; case POW: ret = Math.pow(lret, rret); break; default: ret = Double.MAX_VALUE; } } valMemo.put(root.getHopID(), ret); return ret; } /** * * @param root * @param valMemo * @param vars * @return * @throws HopsException */ protected static double rEvalSimpleBinaryDoubleExpression(Hop root, HashMap<Long, Double> valMemo, LocalVariableMap vars) throws HopsException { //memoization (prevent redundant computation of common subexpr) if (valMemo.containsKey(root.getHopID())) return valMemo.get(root.getHopID()); double ret = Double.MAX_VALUE; BinaryOp broot = (BinaryOp) root; double lret = rEvalSimpleDoubleExpression(broot.getInput().get(0), valMemo, vars); double rret = rEvalSimpleDoubleExpression(broot.getInput().get(1), valMemo, vars); //note: positive and negative values might be valid subexpressions if (lret != Double.MAX_VALUE && rret != Double.MAX_VALUE) //if known { switch (broot.getOp()) { case PLUS: ret = lret + rret; break; case MINUS: ret = lret - rret; break; case MULT: ret = lret * rret; break; case DIV: ret = lret / rret; break; case MIN: ret = Math.min(lret, rret); break; case MAX: ret = Math.max(lret, rret); break; case POW: ret = Math.pow(lret, rret); break; default: ret = Double.MAX_VALUE; } } valMemo.put(root.getHopID(), ret); return ret; } }