Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.sysml.hops.cost; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Map.Entry; import java.util.StringTokenizer; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.sysml.conf.ConfigurationManager; import org.apache.sysml.lops.Lop; import org.apache.sysml.parser.DMLProgram; import org.apache.sysml.runtime.DMLRuntimeException; import org.apache.sysml.runtime.controlprogram.ExternalFunctionProgramBlock; import org.apache.sysml.runtime.controlprogram.ForProgramBlock; import org.apache.sysml.runtime.controlprogram.FunctionProgramBlock; import org.apache.sysml.runtime.controlprogram.IfProgramBlock; import org.apache.sysml.runtime.controlprogram.LocalVariableMap; import org.apache.sysml.runtime.controlprogram.Program; import org.apache.sysml.runtime.controlprogram.ProgramBlock; import org.apache.sysml.runtime.controlprogram.WhileProgramBlock; import org.apache.sysml.runtime.controlprogram.caching.MatrixObject; import org.apache.sysml.runtime.instructions.Instruction; import org.apache.sysml.runtime.instructions.InstructionUtils; import org.apache.sysml.runtime.instructions.MRInstructionParser; import org.apache.sysml.runtime.instructions.MRJobInstruction; import org.apache.sysml.runtime.instructions.cp.AggregateTernaryCPInstruction; import org.apache.sysml.runtime.instructions.cp.AggregateUnaryCPInstruction; import org.apache.sysml.runtime.instructions.cp.BinaryCPInstruction; import org.apache.sysml.runtime.instructions.cp.CPInstruction; import org.apache.sysml.runtime.instructions.cp.Data; import org.apache.sysml.runtime.instructions.cp.DataGenCPInstruction; import org.apache.sysml.runtime.instructions.cp.FunctionCallCPInstruction; import org.apache.sysml.runtime.instructions.cp.MMTSJCPInstruction; import org.apache.sysml.runtime.instructions.cp.MultiReturnBuiltinCPInstruction; import org.apache.sysml.runtime.instructions.cp.ParameterizedBuiltinCPInstruction; import org.apache.sysml.runtime.instructions.cp.StringInitCPInstruction; import org.apache.sysml.runtime.instructions.cp.UnaryCPInstruction; import org.apache.sysml.runtime.instructions.cp.VariableCPInstruction; import org.apache.sysml.runtime.instructions.mr.MRInstruction; import org.apache.sysml.runtime.matrix.MatrixCharacteristics; import org.apache.sysml.runtime.matrix.MatrixDimensionsMetaData; import org.apache.sysml.runtime.matrix.operators.CMOperator; import org.apache.sysml.runtime.matrix.operators.CMOperator.AggregateOperationTypes; import org.apache.sysml.runtime.util.UtilFunctions; public abstract class CostEstimator { protected static final Log LOG = LogFactory.getLog(CostEstimator.class.getName()); private static final int DEFAULT_NUMITER = 15; protected static final VarStats _unknownStats = new VarStats(1, 1, -1, -1, -1, false); protected static final VarStats _scalarStats = new VarStats(1, 1, 1, 1, 1, true); public double getTimeEstimate(Program rtprog, LocalVariableMap vars, HashMap<String, VarStats> stats) throws DMLRuntimeException { double costs = 0; //obtain stats from symboltable (e.g., during recompile) maintainVariableStatistics(vars, stats); //get cost estimate for (ProgramBlock pb : rtprog.getProgramBlocks()) costs += rGetTimeEstimate(pb, stats, new HashSet<String>(), true); return costs; } public double getTimeEstimate(ProgramBlock pb, LocalVariableMap vars, HashMap<String, VarStats> stats, boolean recursive) throws DMLRuntimeException { //obtain stats from symboltable (e.g., during recompile) maintainVariableStatistics(vars, stats); //get cost estimate return rGetTimeEstimate(pb, stats, new HashSet<String>(), recursive); } private double rGetTimeEstimate(ProgramBlock pb, HashMap<String, VarStats> stats, HashSet<String> memoFunc, boolean recursive) throws DMLRuntimeException { double ret = 0; if (pb instanceof WhileProgramBlock) { WhileProgramBlock tmp = (WhileProgramBlock) pb; if (recursive) for (ProgramBlock pb2 : tmp.getChildBlocks()) ret += rGetTimeEstimate(pb2, stats, memoFunc, recursive); ret *= DEFAULT_NUMITER; } else if (pb instanceof IfProgramBlock) { IfProgramBlock tmp = (IfProgramBlock) pb; if (recursive) { for (ProgramBlock pb2 : tmp.getChildBlocksIfBody()) ret += rGetTimeEstimate(pb2, stats, memoFunc, recursive); if (tmp.getChildBlocksElseBody() != null) for (ProgramBlock pb2 : tmp.getChildBlocksElseBody()) { ret += rGetTimeEstimate(pb2, stats, memoFunc, recursive); ret /= 2; //weighted sum } } } else if (pb instanceof ForProgramBlock) //includes ParFORProgramBlock { ForProgramBlock tmp = (ForProgramBlock) pb; if (recursive) for (ProgramBlock pb2 : tmp.getChildBlocks()) ret += rGetTimeEstimate(pb2, stats, memoFunc, recursive); ret *= getNumIterations(stats, tmp.getIterablePredicateVars()); } else if (pb instanceof FunctionProgramBlock && !(pb instanceof ExternalFunctionProgramBlock)) //see generic { FunctionProgramBlock tmp = (FunctionProgramBlock) pb; if (recursive) for (ProgramBlock pb2 : tmp.getChildBlocks()) ret += rGetTimeEstimate(pb2, stats, memoFunc, recursive); } else { ArrayList<Instruction> tmp = pb.getInstructions(); for (Instruction inst : tmp) { if (inst instanceof CPInstruction) //CP { //obtain stats from createvar, cpvar, rmvar, rand maintainCPInstVariableStatistics((CPInstruction) inst, stats); //extract statistics (instruction-specific) Object[] o = extractCPInstStatistics(inst, stats); VarStats[] vs = (VarStats[]) o[0]; String[] attr = (String[]) o[1]; //if(LOG.isDebugEnabled()) // LOG.debug(inst); //call time estimation for inst ret += getCPInstTimeEstimate(inst, vs, attr); if (inst instanceof FunctionCallCPInstruction) //functions { FunctionCallCPInstruction finst = (FunctionCallCPInstruction) inst; String fkey = DMLProgram.constructFunctionKey(finst.getNamespace(), finst.getFunctionName()); //awareness of recursive functions, missing program if (!memoFunc.contains(fkey) && pb.getProgram() != null) { if (LOG.isDebugEnabled()) LOG.debug("Begin Function " + fkey); memoFunc.add(fkey); Program prog = pb.getProgram(); FunctionProgramBlock fpb = prog.getFunctionProgramBlock(finst.getNamespace(), finst.getFunctionName()); ret += rGetTimeEstimate(fpb, stats, memoFunc, recursive); memoFunc.remove(fkey); if (LOG.isDebugEnabled()) LOG.debug("End Function " + fkey); } } } else if (inst instanceof MRJobInstruction) //MR { //obtain stats for job maintainMRJobInstVariableStatistics(inst, stats); //extract input statistics Object[] o = extractMRJobInstStatistics(inst, stats); VarStats[] vs = (VarStats[]) o[0]; //if(LOG.isDebugEnabled()) // LOG.debug(inst); if (LOG.isDebugEnabled()) LOG.debug("Begin MRJob type=" + ((MRJobInstruction) inst).getJobType()); //call time estimation for complex MR inst ret += getMRJobInstTimeEstimate(inst, vs, null); if (LOG.isDebugEnabled()) LOG.debug("End MRJob"); //cleanup stats for job cleanupMRJobVariableStatistics(inst, stats); } } } return ret; } private void maintainVariableStatistics(LocalVariableMap vars, HashMap<String, VarStats> stats) throws DMLRuntimeException { for (String varname : vars.keySet()) { Data dat = vars.get(varname); VarStats vs = null; if (dat instanceof MatrixObject) //matrix { MatrixObject mo = (MatrixObject) dat; MatrixCharacteristics mc = ((MatrixDimensionsMetaData) mo.getMetaData()).getMatrixCharacteristics(); long rlen = mc.getRows(); long clen = mc.getCols(); long brlen = mc.getRowsPerBlock(); long bclen = mc.getColsPerBlock(); long nnz = mc.getNonZeros(); boolean inmem = mo.getStatusAsString().equals("CACHED"); vs = new VarStats(rlen, clen, brlen, bclen, nnz, inmem); } else //scalar { vs = _scalarStats; } stats.put(varname, vs); //System.out.println(varname+" "+vs); } } private void maintainCPInstVariableStatistics(CPInstruction inst, HashMap<String, VarStats> stats) { if (inst instanceof VariableCPInstruction) { String optype = inst.getOpcode(); String[] parts = InstructionUtils.getInstructionParts(inst.toString()); if (optype.equals("createvar")) { if (parts.length < 10) return; String varname = parts[1]; long rlen = Long.parseLong(parts[6]); long clen = Long.parseLong(parts[7]); long brlen = Long.parseLong(parts[8]); long bclen = Long.parseLong(parts[9]); long nnz = Long.parseLong(parts[10]); VarStats vs = new VarStats(rlen, clen, brlen, bclen, nnz, false); stats.put(varname, vs); //System.out.println(varname+" "+vs); } else if (optype.equals("cpvar")) { String varname = parts[1]; String varname2 = parts[2]; VarStats vs = stats.get(varname); stats.put(varname2, vs); } else if (optype.equals("mvvar")) { String varname = parts[1]; String varname2 = parts[2]; VarStats vs = stats.remove(varname); stats.put(varname2, vs); } else if (optype.equals("rmvar")) { String varname = parts[1]; stats.remove(varname); } } else if (inst instanceof DataGenCPInstruction) { DataGenCPInstruction randInst = (DataGenCPInstruction) inst; String varname = randInst.output.getName(); long rlen = randInst.getRows(); long clen = randInst.getCols(); long brlen = randInst.getRowsInBlock(); long bclen = randInst.getColsInBlock(); long nnz = (long) (randInst.getSparsity() * rlen * clen); VarStats vs = new VarStats(rlen, clen, brlen, bclen, nnz, true); stats.put(varname, vs); } else if (inst instanceof StringInitCPInstruction) { StringInitCPInstruction iinst = (StringInitCPInstruction) inst; String varname = iinst.output.getName(); long rlen = iinst.getRows(); long clen = iinst.getCols(); VarStats vs = new VarStats(rlen, clen, ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize(), rlen * clen, true); stats.put(varname, vs); } else if (inst instanceof FunctionCallCPInstruction) { FunctionCallCPInstruction finst = (FunctionCallCPInstruction) inst; ArrayList<String> outVars = finst.getBoundOutputParamNames(); for (String varname : outVars) { stats.put(varname, _unknownStats); //System.out.println(varname+" "+vs); } } } private void maintainMRJobInstVariableStatistics(Instruction inst, HashMap<String, VarStats> stats) throws DMLRuntimeException { MRJobInstruction jobinst = (MRJobInstruction) inst; //input sizes (varname, index mapping) String[] inVars = jobinst.getInputVars(); int index = -1; for (String varname : inVars) { VarStats vs = stats.get(varname); if (vs == null) vs = _unknownStats; stats.put(String.valueOf(++index), vs); } //rand output String rdInst = jobinst.getIv_randInstructions(); if (rdInst != null && rdInst.length() > 0) { StringTokenizer st = new StringTokenizer(rdInst, Lop.INSTRUCTION_DELIMITOR); while (st.hasMoreTokens()) //foreach rand instruction { String[] parts = InstructionUtils.getInstructionParts(st.nextToken()); byte outIndex = Byte.parseByte(parts[2]); long rlen = parts[3].contains(Lop.VARIABLE_NAME_PLACEHOLDER) ? -1 : UtilFunctions.parseToLong(parts[3]); long clen = parts[4].contains(Lop.VARIABLE_NAME_PLACEHOLDER) ? -1 : UtilFunctions.parseToLong(parts[4]); long brlen = Long.parseLong(parts[5]); long bclen = Long.parseLong(parts[6]); long nnz = (long) (Double.parseDouble(parts[9]) * rlen * clen); VarStats vs = new VarStats(rlen, clen, brlen, bclen, nnz, false); stats.put(String.valueOf(outIndex), vs); } } //compute intermediate result indices HashMap<Byte, MatrixCharacteristics> dims = new HashMap<Byte, MatrixCharacteristics>(); //populate input indices for (Entry<String, VarStats> e : stats.entrySet()) { if (UtilFunctions.isIntegerNumber(e.getKey())) { byte ix = Byte.parseByte(e.getKey()); VarStats vs = e.getValue(); if (vs != null) { MatrixCharacteristics mc = new MatrixCharacteristics(vs._rlen, vs._clen, (int) vs._brlen, (int) vs._bclen, (long) vs._nnz); dims.put(ix, mc); } } } //compute dims for all instructions String[] instCat = new String[] { jobinst.getIv_randInstructions(), jobinst.getIv_recordReaderInstructions(), jobinst.getIv_instructionsInMapper(), jobinst.getIv_shuffleInstructions(), jobinst.getIv_aggInstructions(), jobinst.getIv_otherInstructions() }; for (String linstCat : instCat) if (linstCat != null && linstCat.length() > 0) { String[] linst = linstCat.split(Instruction.INSTRUCTION_DELIM); for (String instStr : linst) { String instStr2 = replaceInstructionPatch(instStr); MRInstruction mrinst = MRInstructionParser.parseSingleInstruction(instStr2); MatrixCharacteristics.computeDimension(dims, mrinst); } } //create varstats if necessary for (Entry<Byte, MatrixCharacteristics> e : dims.entrySet()) { byte ix = e.getKey(); if (!stats.containsKey(String.valueOf(ix))) { MatrixCharacteristics mc = e.getValue(); VarStats vs = new VarStats(mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), mc.getNonZeros(), false); stats.put(String.valueOf(ix), vs); } } //map result indexes String[] outLabels = jobinst.getOutputVars(); byte[] resultIndexes = jobinst.getIv_resultIndices(); for (int i = 0; i < resultIndexes.length; i++) { String varname = outLabels[i]; VarStats varvs = stats.get(String.valueOf(resultIndexes[i])); if (varvs == null) { varvs = stats.get(outLabels[i]); } varvs._inmem = false; stats.put(varname, varvs); } } protected String replaceInstructionPatch(String inst) { String ret = inst; while (ret.contains(Lop.VARIABLE_NAME_PLACEHOLDER)) { int index1 = ret.indexOf(Lop.VARIABLE_NAME_PLACEHOLDER); int index2 = ret.indexOf(Lop.VARIABLE_NAME_PLACEHOLDER, index1 + 1); String replace = ret.substring(index1, index2 + 1); ret = ret.replaceAll(replace, "1"); } return ret; } private Object[] extractCPInstStatistics(Instruction inst, HashMap<String, VarStats> stats) { Object[] ret = new Object[2]; //stats, attrs VarStats[] vs = new VarStats[3]; String[] attr = null; if (inst instanceof UnaryCPInstruction) { if (inst instanceof DataGenCPInstruction) { DataGenCPInstruction rinst = (DataGenCPInstruction) inst; vs[0] = _unknownStats; vs[1] = _unknownStats; vs[2] = stats.get(rinst.output.getName()); //prepare attributes for cost estimation int type = 2; //full rand if (rinst.getMinValue() == 0.0 && rinst.getMaxValue() == 0.0) type = 0; else if (rinst.getSparsity() == 1.0 && rinst.getMinValue() == rinst.getMaxValue()) type = 1; attr = new String[] { String.valueOf(type) }; } else if (inst instanceof StringInitCPInstruction) { StringInitCPInstruction rinst = (StringInitCPInstruction) inst; vs[0] = _unknownStats; vs[1] = _unknownStats; vs[2] = stats.get(rinst.output.getName()); } else //general unary { UnaryCPInstruction uinst = (UnaryCPInstruction) inst; vs[0] = stats.get(uinst.input1.getName()); vs[1] = _unknownStats; vs[2] = stats.get(uinst.output.getName()); if (vs[0] == null) //scalar input, e.g., print vs[0] = _scalarStats; if (vs[2] == null) //scalar output vs[2] = _scalarStats; if (inst instanceof MMTSJCPInstruction) { String type = ((MMTSJCPInstruction) inst).getMMTSJType().toString(); attr = new String[] { type }; } else if (inst instanceof AggregateUnaryCPInstruction) { String[] parts = InstructionUtils.getInstructionParts(inst.toString()); String opcode = parts[0]; if (opcode.equals("cm")) attr = new String[] { parts[parts.length - 2] }; } } } else if (inst instanceof BinaryCPInstruction) { BinaryCPInstruction binst = (BinaryCPInstruction) inst; vs[0] = stats.get(binst.input1.getName()); vs[1] = stats.get(binst.input2.getName()); vs[2] = stats.get(binst.output.getName()); if (vs[0] == null) //scalar input, vs[0] = _scalarStats; if (vs[1] == null) //scalar input, vs[1] = _scalarStats; if (vs[2] == null) //scalar output vs[2] = _scalarStats; } else if (inst instanceof AggregateTernaryCPInstruction) { AggregateTernaryCPInstruction binst = (AggregateTernaryCPInstruction) inst; //of same dimension anyway but missing third input vs[0] = stats.get(binst.input1.getName()); vs[1] = stats.get(binst.input2.getName()); vs[2] = stats.get(binst.output.getName()); if (vs[0] == null) //scalar input, vs[0] = _scalarStats; if (vs[1] == null) //scalar input, vs[1] = _scalarStats; if (vs[2] == null) //scalar output vs[2] = _scalarStats; } else if (inst instanceof ParameterizedBuiltinCPInstruction) { //ParameterizedBuiltinCPInstruction pinst = (ParameterizedBuiltinCPInstruction) inst; String[] parts = InstructionUtils.getInstructionParts(inst.toString()); String opcode = parts[0]; if (opcode.equals("groupedagg")) { HashMap<String, String> paramsMap = ParameterizedBuiltinCPInstruction.constructParameterMap(parts); String fn = paramsMap.get("fn"); String order = paramsMap.get("order"); AggregateOperationTypes type = CMOperator.getAggOpType(fn, order); attr = new String[] { String.valueOf(type.ordinal()) }; } else if (opcode.equals("rmempty")) { HashMap<String, String> paramsMap = ParameterizedBuiltinCPInstruction.constructParameterMap(parts); attr = new String[] { String.valueOf(paramsMap.get("margin").equals("rows") ? 0 : 1) }; } vs[0] = stats.get(parts[1].substring(7).replaceAll(Lop.VARIABLE_NAME_PLACEHOLDER, "")); vs[1] = _unknownStats; //TODO vs[2] = stats.get(parts[parts.length - 1]); if (vs[0] == null) //scalar input vs[0] = _scalarStats; if (vs[2] == null) //scalar output vs[2] = _scalarStats; } else if (inst instanceof MultiReturnBuiltinCPInstruction) { //applies to qr, lu, eigen (cost computation on input1) MultiReturnBuiltinCPInstruction minst = (MultiReturnBuiltinCPInstruction) inst; vs[0] = stats.get(minst.input1.getName()); vs[1] = stats.get(minst.getOutput(0).getName()); vs[2] = stats.get(minst.getOutput(1).getName()); } else if (inst instanceof VariableCPInstruction) { setUnknownStats(vs); VariableCPInstruction varinst = (VariableCPInstruction) inst; if (varinst.getOpcode().equals("write")) { //special handling write of matrix objects (non existing if scalar) if (stats.containsKey(varinst.getInput1().getName())) vs[0] = stats.get(varinst.getInput1().getName()); attr = new String[] { varinst.getInput3().getName() }; } } else { setUnknownStats(vs); } //maintain var status (CP output always inmem) vs[2]._inmem = true; ret[0] = vs; ret[1] = attr; return ret; } private void setUnknownStats(VarStats[] vs) { vs[0] = _unknownStats; vs[1] = _unknownStats; vs[2] = _unknownStats; } private Object[] extractMRJobInstStatistics(Instruction inst, HashMap<String, VarStats> stats) { Object[] ret = new Object[2]; //stats, attrs VarStats[] vs = null; String[] attr = null; MRJobInstruction jinst = (MRJobInstruction) inst; //get number of indices byte[] indexes = jinst.getIv_resultIndices(); byte maxIx = -1; for (int i = 0; i < indexes.length; i++) if (maxIx < indexes[i]) maxIx = indexes[i]; vs = new VarStats[maxIx + 1]; //get inputs, intermediates, and outputs for (int i = 0; i < vs.length; i++) { vs[i] = stats.get(String.valueOf(i)); if (vs[i] == null) { vs[i] = _unknownStats; } } //result preparation ret[0] = vs; ret[1] = attr; return ret; } private void cleanupMRJobVariableStatistics(Instruction inst, HashMap<String, VarStats> stats) { MRJobInstruction jinst = (MRJobInstruction) inst; //get number of indices byte[] indexes = jinst.getIv_resultIndices(); byte maxIx = -1; for (int i = 0; i < indexes.length; i++) if (maxIx < indexes[i]) maxIx = indexes[i]; //remove all stats up to max index for (int i = 0; i <= maxIx; i++) { VarStats tmp = stats.remove(String.valueOf(i)); if (tmp != null) tmp._inmem = false; //all MR job outptus on HDFS } } // TODO use of vars - needed for recompilation w/o exception private int getNumIterations(HashMap<String, VarStats> stats, String[] pred) { int N = DEFAULT_NUMITER; if (pred != null && pred[1] != null && pred[2] != null && pred[3] != null) { try { int from = Integer.parseInt(pred[1]); int to = Integer.parseInt(pred[2]); int increment = Integer.parseInt(pred[3]); N = (int) Math.ceil(((double) (to - from + 1)) / increment); } catch (Exception ex) { } } return N; } protected abstract double getCPInstTimeEstimate(Instruction inst, VarStats[] vs, String[] args) throws DMLRuntimeException; protected abstract double getMRJobInstTimeEstimate(Instruction inst, VarStats[] vs, String[] args) throws DMLRuntimeException; }