Java tutorial
/** * (C) Copyright IBM Corp. 2010, 2015 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package com.ibm.bi.dml.hops.globalopt; import java.util.ArrayList; import java.util.HashMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.ibm.bi.dml.hops.DataOp; import com.ibm.bi.dml.hops.Hop; import com.ibm.bi.dml.hops.Hop.DataOpTypes; import com.ibm.bi.dml.hops.Hop.VisitStatus; import com.ibm.bi.dml.hops.HopsException; import com.ibm.bi.dml.hops.Hop.FileFormatTypes; import com.ibm.bi.dml.hops.OptimizerUtils; import com.ibm.bi.dml.hops.cost.CostEstimationWrapper; import com.ibm.bi.dml.hops.globalopt.gdfgraph.GDFGraph; import com.ibm.bi.dml.hops.globalopt.gdfgraph.GDFLoopNode; import com.ibm.bi.dml.hops.globalopt.gdfgraph.GDFNode; import com.ibm.bi.dml.hops.globalopt.gdfgraph.GDFNode.NodeType; import com.ibm.bi.dml.hops.globalopt.gdfresolve.GDFMismatchHeuristic; import com.ibm.bi.dml.hops.globalopt.gdfresolve.GDFMismatchHeuristic.MismatchHeuristicType; import com.ibm.bi.dml.hops.globalopt.gdfresolve.MismatchHeuristicFactory; import com.ibm.bi.dml.hops.rewrite.HopRewriteUtils; import com.ibm.bi.dml.hops.recompile.Recompiler; import com.ibm.bi.dml.lops.LopsException; import com.ibm.bi.dml.lops.LopProperties.ExecType; import com.ibm.bi.dml.parser.DMLTranslator; import com.ibm.bi.dml.runtime.DMLRuntimeException; import com.ibm.bi.dml.runtime.DMLUnsupportedOperationException; import com.ibm.bi.dml.runtime.controlprogram.LocalVariableMap; import com.ibm.bi.dml.runtime.controlprogram.Program; import com.ibm.bi.dml.runtime.controlprogram.ProgramBlock; import com.ibm.bi.dml.runtime.controlprogram.context.ExecutionContext; import com.ibm.bi.dml.runtime.controlprogram.context.ExecutionContextFactory; import com.ibm.bi.dml.runtime.controlprogram.parfor.stat.Timing; /** * Global data flow optimization via enumeration-based optimizer (dynamic programming). * * * ADDITIONAL PERFORMANCE OPT (once everything is completely working) * TODO cache for interesting properties * TODO partial runtime plan generation * TODO partial runtime plan costing * */ public class GDFEnumOptimizer extends GlobalOptimizer { private static final Log LOG = LogFactory.getLog(GDFEnumOptimizer.class); //internal configuration parameters //note: that branch and bound pruning is invalid if we cost entire programs private static final boolean BRANCH_AND_BOUND_PRUNING = true; private static final boolean PREFERRED_PLAN_SELECTION = true; private static final boolean COST_FULL_PROGRAMS = false; private static final boolean ENUM_CP_BLOCKSIZES = false; private static final MismatchHeuristicType DEFAULT_MISMATCH_HEURISTIC = MismatchHeuristicType.FIRST; //internal configuration parameters private static final int[] BLOCK_SIZES = new int[] { 1024, //1 * DMLTranslator.DMLBlockSize, 2048, //2 * DMLTranslator.DMLBlockSize, 4096 };//4 * DMLTranslator.DMLBlockSize}; private static final double BRANCH_AND_BOUND_REL_THRES = Math.pow(10, -5); //private static final int[] REPLICATION_FACTORS = new int[]{1,3,5}; private MemoStructure _memo = null; //plan memoization table private static GDFMismatchHeuristic _resolve = null; private static long _enumeratedPlans = 0; private static long _prunedInvalidPlans = 0; private static long _prunedSuboptimalPlans = 0; private static long _compiledPlans = 0; private static long _costedPlans = 0; private static long _planMismatches = 0; public GDFEnumOptimizer() throws DMLRuntimeException { //init internal memo structure _memo = new MemoStructure(); //init mismatch heuristic _resolve = MismatchHeuristicFactory.createMismatchHeuristic(DEFAULT_MISMATCH_HEURISTIC); } @Override public GDFGraph optimize(GDFGraph gdfgraph, Summary summary) throws DMLRuntimeException, DMLUnsupportedOperationException, HopsException, LopsException { Timing time = new Timing(true); Program prog = gdfgraph.getRuntimeProgram(); ExecutionContext ec = ExecutionContextFactory.createContext(prog); ArrayList<GDFNode> roots = gdfgraph.getGraphRootNodes(); //Step 1: baseline costing for branch and bound costs double initCosts = Double.MAX_VALUE; if (BRANCH_AND_BOUND_PRUNING) { initCosts = CostEstimationWrapper.getTimeEstimate(prog, ec); initCosts = initCosts * (1 + BRANCH_AND_BOUND_REL_THRES); } //Step 2: dynamic programming plan generation //(finally, pick optimal root plans over all interesting property sets) ArrayList<Plan> rootPlans = new ArrayList<Plan>(); for (GDFNode node : roots) { PlanSet ps = enumOpt(node, _memo, initCosts); Plan optPlan = ps.getPlanWithMinCosts(); rootPlans.add(optPlan); } long enumPlanMismatch = getPlanMismatches(); //check for final containment of independent roots and pick optimal HashMap<Long, Plan> memo = new HashMap<Long, Plan>(); resetPlanMismatches(); for (Plan p : rootPlans) rSetRuntimePlanConfig(p, memo); long finalPlanMismatch = getPlanMismatches(); //generate final runtime plan (w/ optimal config) Recompiler.recompileProgramBlockHierarchy(prog.getProgramBlocks(), new LocalVariableMap(), 0, false); ec = ExecutionContextFactory.createContext(prog); double optCosts = CostEstimationWrapper.getTimeEstimate(prog, ec); //maintain optimization summary statistics summary.setCostsInitial(initCosts); summary.setCostsOptimal(optCosts); summary.setNumEnumPlans(_enumeratedPlans); summary.setNumPrunedInvalidPlans(_prunedInvalidPlans); summary.setNumPrunedSuboptPlans(_prunedSuboptimalPlans); summary.setNumCompiledPlans(_compiledPlans); summary.setNumCostedPlans(_costedPlans); summary.setNumEnumPlanMismatch(enumPlanMismatch); summary.setNumFinalPlanMismatch(finalPlanMismatch); summary.setTimeOptim(time.stop()); return gdfgraph; } /** * Core dynamic programming enumeration algorithm * for global data flow optimization. * * @param node * @param maxCosts * @return * @throws DMLRuntimeException * @throws DMLUnsupportedOperationException */ public static PlanSet enumOpt(GDFNode node, MemoStructure memo, double maxCosts) throws DMLRuntimeException, DMLUnsupportedOperationException { //memoization of already enumerated subgraphs if (memo.constainsEntry(node)) return memo.getEntry(node); //enumerate node plans PlanSet P = enumNodePlans(node, memo, maxCosts); //System.out.println("Plans after enumNodePlan:\n"+P.toString()); //combine local node plan with optimal child plans for (GDFNode c : node.getInputs()) { //recursive optimization PlanSet Pc = enumOpt(c, memo, maxCosts); if (c instanceof GDFLoopNode) Pc = Pc.selectChild(node); //combine parent-child plans P = P.crossProductChild(Pc); _enumeratedPlans += P.size(); //prune invalid plans pruneInvalidPlans(P); } //prune suboptimal plans pruneSuboptimalPlans(P, maxCosts); //memoization of created entries memo.putEntry(node, P); return P; } /** * * @param node * @param memo * @return * @throws DMLUnsupportedOperationException * @throws DMLRuntimeException */ private static PlanSet enumNodePlans(GDFNode node, MemoStructure memo, double maxCosts) throws DMLRuntimeException, DMLUnsupportedOperationException { ArrayList<Plan> plans = new ArrayList<Plan>(); ExecType CLUSTER = OptimizerUtils.isSparkExecutionMode() ? ExecType.SPARK : ExecType.MR; //ENUMERATE HOP PLANS // CASE 1: core hop enumeration (other than persistent/transient read/write) if (node.getNodeType() == NodeType.HOP_NODE && !(node.getHop() instanceof DataOp)) { //core rewrite enumeration for cp and mr enumHopNodePlans(node, plans); } //CASE 2: dataop hop enumeration else if (node.getHop() instanceof DataOp) { DataOp dhop = (DataOp) node.getHop(); if (dhop.getDataOpType() == DataOpTypes.PERSISTENTREAD) { //for persistent read the interesting properties are fixed by the input //but we can decide on output properties ExecType et = (dhop.getMemEstimate() > OptimizerUtils.getLocalMemBudget() || HopRewriteUtils.alwaysRequiresReblock(dhop)) ? CLUSTER : ExecType.CP; int[] blocksizes = (et == CLUSTER) ? BLOCK_SIZES : new int[] { BLOCK_SIZES[0] }; for (Integer bs : blocksizes) { RewriteConfig rcmr = new RewriteConfig(et, bs, FileFormatTypes.BINARY); InterestingProperties ipsmr = rcmr.deriveInterestingProperties(); Plan mrplan = new Plan(node, ipsmr, rcmr, null); plans.add(mrplan); } } else if (dhop.getDataOpType() == DataOpTypes.PERSISTENTWRITE) { //for persistent write the interesting properties are fixed by the given //write specification ExecType et = (dhop.getMemEstimate() > OptimizerUtils.getLocalMemBudget()) ? CLUSTER : ExecType.CP; RewriteConfig rcmr = new RewriteConfig(et, (int) dhop.getRowsInBlock(), dhop.getInputFormatType()); InterestingProperties ipsmr = rcmr.deriveInterestingProperties(); Plan mrplan = new Plan(node, ipsmr, rcmr, null); plans.add(mrplan); } else if (dhop.getDataOpType() == DataOpTypes.TRANSIENTREAD || dhop.getDataOpType() == DataOpTypes.TRANSIENTWRITE) { //note: full enumeration for transient read and write; otherwise the properties //of these hops are never set because pass-through plans refer to different hops enumHopNodePlans(node, plans); } } //ENUMERATE LOOP PLANS else if (node.getNodeType() == NodeType.LOOP_NODE) { //TODO consistency checks inputs and outputs (updated vars) GDFLoopNode lnode = (GDFLoopNode) node; //step 0: recursive call optimize on inputs //no additional pruning (validity, optimality) required for (GDFNode in : lnode.getLoopInputs().values()) enumOpt(in, memo, maxCosts); //step 1: enumerate loop plan, incl partitioning/checkpoints/reblock for inputs RewriteConfig rc = new RewriteConfig(ExecType.CP, -1, null); InterestingProperties ips = rc.deriveInterestingProperties(); Plan lplan = new Plan(node, ips, rc, null); plans.add(lplan); //step 2: recursive call optimize on predicate //(predicate might be null if single variable) if (lnode.getLoopPredicate() != null) enumOpt(lnode.getLoopPredicate(), memo, maxCosts); //step 3: recursive call optimize on outputs //(return union of all output plans, later selected by output var) PlanSet Pout = new PlanSet(); for (GDFNode out : lnode.getLoopOutputs().values()) Pout = Pout.union(enumOpt(out, memo, maxCosts)); plans.addAll(Pout.getPlans()); //note: global pruning later done when returning to enumOpt //for the entire loop node } //CREATE DUMMY CROSSBLOCK PLAN else if (node.getNodeType() == NodeType.CROSS_BLOCK_NODE) { //do nothing (leads to pass-through on crossProductChild) } return new PlanSet(plans); } /** * * @param node * @param plans */ private static void enumHopNodePlans(GDFNode node, ArrayList<Plan> plans) { ExecType CLUSTER = OptimizerUtils.isSparkExecutionMode() ? ExecType.SPARK : ExecType.MR; //create cp plan, if allowed (note: most interesting properties are irrelevant for CP) if (node.getHop().getMemEstimate() < OptimizerUtils.getLocalMemBudget()) { int[] bstmp = ENUM_CP_BLOCKSIZES ? BLOCK_SIZES : new int[] { BLOCK_SIZES[0] }; for (Integer bs : bstmp) { RewriteConfig rccp = new RewriteConfig(ExecType.CP, bs, FileFormatTypes.BINARY); InterestingProperties ipscp = rccp.deriveInterestingProperties(); Plan cpplan = new Plan(node, ipscp, rccp, null); plans.add(cpplan); } } //create mr plans, if required if (node.requiresMREnumeration()) { for (Integer bs : BLOCK_SIZES) { RewriteConfig rcmr = new RewriteConfig(CLUSTER, bs, FileFormatTypes.BINARY); InterestingProperties ipsmr = rcmr.deriveInterestingProperties(); Plan mrplan = new Plan(node, ipsmr, rcmr, null); plans.add(mrplan); } } } /** * * @param plans */ private static void pruneInvalidPlans(PlanSet plans) { ArrayList<Plan> valid = new ArrayList<Plan>(); //check each plan in planset for validity for (Plan plan : plans.getPlans()) { //a) check matching blocksizes if operation in MR if (!plan.checkValidBlocksizesInMR()) { //System.out.println("pruned invalid blocksize mr op"); continue; } //b) check matching blocksizes of tread/twrite pairs if (!plan.checkValidBlocksizesTRead()) { //System.out.println("pruned invalid blocksize tread"); continue; } //c) check valid format in MR if (!plan.checkValidFormatInMR()) { //System.out.println("pruned invalid format: "+plan.getNode().getHop().getClass()); continue; } //d) check valid execution type per hop (e.g., function, reblock) if (!plan.checkValidExecutionType()) { //System.out.println("pruned invalid execution type: "+plan.getNode().getHop().getClass()); continue; } valid.add(plan); } //debug output int sizeBefore = plans.size(); int sizeAfter = valid.size(); _prunedInvalidPlans += (sizeBefore - sizeAfter); LOG.debug("Pruned invalid plans: " + sizeBefore + " --> " + sizeAfter); plans.setPlans(valid); } /** * * @param plans * @param maxCosts * @throws DMLRuntimeException * @throws DMLUnsupportedOperationException */ private static void pruneSuboptimalPlans(PlanSet plans, double maxCosts) throws DMLRuntimeException, DMLUnsupportedOperationException { //costing of all plans incl containment check for (Plan p : plans.getPlans()) { p.setCosts(costRuntimePlan(p)); } //build and probe for optimal plans (hash-groupby on IPC, min costs) HashMap<InterestingProperties, Plan> probeMap = new HashMap<InterestingProperties, Plan>(); for (Plan p : plans.getPlans()) { //max cost pruning filter (branch-and-bound) if (BRANCH_AND_BOUND_PRUNING && p.getCosts() > maxCosts) { continue; } //plan cost per IPS pruning filter (allow smaller or equal costs) Plan best = probeMap.get(p.getInterestingProperties()); if (best != null && p.getCosts() > best.getCosts()) { continue; } //non-preferred plan pruning filter (allow smaller cost or equal cost and preferred plan) if (PREFERRED_PLAN_SELECTION && best != null && p.getCosts() == best.getCosts() && !p.isPreferredPlan()) { continue; } //add plan as best per IPS probeMap.put(p.getInterestingProperties(), p); } //copy over plans per IPC into one plan set ArrayList<Plan> optimal = new ArrayList<Plan>(probeMap.values()); int sizeBefore = plans.size(); int sizeAfter = optimal.size(); _prunedSuboptimalPlans += (sizeBefore - sizeAfter); LOG.debug("Pruned suboptimal plans: " + sizeBefore + " --> " + sizeAfter); plans.setPlans(optimal); } /** * * @param p * @return * @throws DMLRuntimeException * @throws DMLUnsupportedOperationException */ private static double costRuntimePlan(Plan p) throws DMLRuntimeException, DMLUnsupportedOperationException { Program prog = p.getNode().getProgram(); if (prog == null) throw new DMLRuntimeException("Program not available for runtime plan costing."); //put data flow configuration into program rSetRuntimePlanConfig(p, new HashMap<Long, Plan>()); double costs = -1; if (COST_FULL_PROGRAMS || (p.getNode().getHop() == null || p.getNode().getProgramBlock() == null)) { //recompile entire runtime program Recompiler.recompileProgramBlockHierarchy(prog.getProgramBlocks(), new LocalVariableMap(), 0, false); _compiledPlans++; //cost entire runtime program ExecutionContext ec = ExecutionContextFactory.createContext(prog); costs = CostEstimationWrapper.getTimeEstimate(prog, ec); } else { Hop currentHop = p.getNode().getHop(); ProgramBlock pb = p.getNode().getProgramBlock(); try { //keep the old dag roots ArrayList<Hop> oldRoots = pb.getStatementBlock().get_hops(); Hop tmpHop = null; if (!(currentHop instanceof DataOp && ((DataOp) currentHop).isWrite())) { ArrayList<Hop> newRoots = new ArrayList<Hop>(); tmpHop = new DataOp("_tmp", currentHop.getDataType(), currentHop.getValueType(), currentHop, DataOpTypes.TRANSIENTWRITE, "tmp"); tmpHop.setVisited(VisitStatus.DONE); //ensure recursive visitstatus reset on recompile newRoots.add(tmpHop); pb.getStatementBlock().set_hops(newRoots); } //recompile modified runtime program Recompiler.recompileProgramBlockHierarchy(prog.getProgramBlocks(), new LocalVariableMap(), 0, false); _compiledPlans++; //cost partial runtime program up to current hop ExecutionContext ec = ExecutionContextFactory.createContext(prog); costs = CostEstimationWrapper.getTimeEstimate(prog, ec); //restore original hop dag if (tmpHop != null) HopRewriteUtils.removeChildReference(tmpHop, currentHop); pb.getStatementBlock().set_hops(oldRoots); } catch (HopsException ex) { throw new DMLRuntimeException(ex); } } //release forced data flow configuration from program rResetRuntimePlanConfig(p, new HashMap<Long, Plan>()); _costedPlans++; return costs; } private static void rSetRuntimePlanConfig(Plan p, HashMap<Long, Plan> memo) { ExecType CLUSTER = OptimizerUtils.isSparkExecutionMode() ? ExecType.SPARK : ExecType.MR; //basic memoization including containment check if (memo.containsKey(p.getNode().getID())) { Plan pmemo = memo.get(p.getNode().getID()); if (!p.getInterestingProperties().equals(pmemo.getInterestingProperties())) { //replace plan in memo with new plan //TODO this would require additional cleanup in special cases if (_resolve.resolveMismatch(pmemo.getRewriteConfig(), p.getRewriteConfig())) memo.put(p.getNode().getID(), p); //logging of encounter plan mismatch LOG.warn("Configuration mismatch on shared node (" + p.getNode().getHop().getHopID() + "). Falling back to heuristic '" + _resolve.getName() + "'."); LOG.warn(p.getInterestingProperties().toString()); LOG.warn(memo.get(p.getNode().getID()).getInterestingProperties()); _planMismatches++; return; } } //set plan configuration Hop hop = p.getNode().getHop(); if (hop != null) { RewriteConfig rc = p.getRewriteConfig(); //set exec type hop.setForcedExecType(rc.getExecType()); //set blocksizes and reblock hop.setRowsInBlock(rc.getBlockSize()); hop.setColsInBlock(rc.getBlockSize()); if (rc.getExecType() == CLUSTER) //after blocksize update { //TODO double check dataop condition - side effect from plan validity boolean reblock = HopRewriteUtils.alwaysRequiresReblock(hop) || (hop.hasMatrixInputWithDifferentBlocksizes() && !(hop instanceof DataOp)); hop.setRequiresReblock(reblock); } else hop.setRequiresReblock(false); } //process childs if (p.getChilds() != null) for (Plan c : p.getChilds()) rSetRuntimePlanConfig(c, memo); //memoization (mark as processed) memo.put(p.getNode().getID(), p); } private static void rResetRuntimePlanConfig(Plan p, HashMap<Long, Plan> memo) { //basic memoization including containment check if (memo.containsKey(p.getNode().getID())) { return; } //release forced plan configuration Hop hop = p.getNode().getHop(); if (hop != null) { hop.setForcedExecType(null); hop.setRowsInBlock(DMLTranslator.DMLBlockSize); hop.setColsInBlock(DMLTranslator.DMLBlockSize); if (!HopRewriteUtils.alwaysRequiresReblock(hop)) { hop.setRequiresReblock(false); } } //process childs if (p.getChilds() != null) for (Plan c : p.getChilds()) rResetRuntimePlanConfig(c, memo); //memoization (mark as processed) memo.put(p.getNode().getID(), p); } private static long getPlanMismatches() { return _planMismatches; } private static void resetPlanMismatches() { _planMismatches = 0; } }