Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flink.compiler.plandump; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintWriter; import java.io.StringWriter; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Map; import org.apache.commons.lang3.StringEscapeUtils; import org.apache.flink.api.common.operators.CompilerHints; import org.apache.flink.compiler.CompilerException; import org.apache.flink.compiler.dag.BinaryUnionNode; import org.apache.flink.compiler.dag.BulkIterationNode; import org.apache.flink.compiler.dag.DataSinkNode; import org.apache.flink.compiler.dag.DataSourceNode; import org.apache.flink.compiler.dag.OptimizerNode; import org.apache.flink.compiler.dag.PactConnection; import org.apache.flink.compiler.dag.TempMode; import org.apache.flink.compiler.dag.WorksetIterationNode; import org.apache.flink.compiler.dataproperties.GlobalProperties; import org.apache.flink.compiler.dataproperties.LocalProperties; import org.apache.flink.compiler.plan.BulkIterationPlanNode; import org.apache.flink.compiler.plan.Channel; import org.apache.flink.compiler.plan.OptimizedPlan; import org.apache.flink.compiler.plan.PlanNode; import org.apache.flink.compiler.plan.SingleInputPlanNode; import org.apache.flink.compiler.plan.SinkPlanNode; import org.apache.flink.compiler.plan.WorksetIterationPlanNode; import org.apache.flink.compiler.util.Utils; import org.apache.flink.runtime.operators.DriverStrategy; import org.apache.flink.runtime.operators.shipping.ShipStrategyType; import org.apache.flink.util.StringUtils; /** * */ public class PlanJSONDumpGenerator { private Map<DumpableNode<?>, Integer> nodeIds; // resolves pact nodes to ids private int nodeCnt; private boolean encodeForHTML; // -------------------------------------------------------------------------------------------- public void setEncodeForHTML(boolean encodeForHTML) { this.encodeForHTML = encodeForHTML; } public boolean isEncodeForHTML() { return encodeForHTML; } public void dumpPactPlanAsJSON(List<DataSinkNode> nodes, PrintWriter writer) { @SuppressWarnings("unchecked") List<DumpableNode<?>> n = (List<DumpableNode<?>>) (List<?>) nodes; compilePlanToJSON(n, writer); } public String getPactPlanAsJSON(List<DataSinkNode> nodes) { StringWriter sw = new StringWriter(); PrintWriter pw = new PrintWriter(sw); dumpPactPlanAsJSON(nodes, pw); return sw.toString(); } public void dumpOptimizerPlanAsJSON(OptimizedPlan plan, File toFile) throws IOException { PrintWriter pw = null; try { pw = new PrintWriter(new FileOutputStream(toFile), false); dumpOptimizerPlanAsJSON(plan, pw); pw.flush(); } finally { if (pw != null) { pw.close(); } } } public String getOptimizerPlanAsJSON(OptimizedPlan plan) { StringWriter sw = new StringWriter(); PrintWriter pw = new PrintWriter(sw); dumpOptimizerPlanAsJSON(plan, pw); pw.close(); return sw.toString(); } public void dumpOptimizerPlanAsJSON(OptimizedPlan plan, PrintWriter writer) { Collection<SinkPlanNode> sinks = plan.getDataSinks(); if (sinks instanceof List) { dumpOptimizerPlanAsJSON((List<SinkPlanNode>) sinks, writer); } else { List<SinkPlanNode> n = new ArrayList<SinkPlanNode>(); n.addAll(sinks); dumpOptimizerPlanAsJSON(n, writer); } } public void dumpOptimizerPlanAsJSON(List<SinkPlanNode> nodes, PrintWriter writer) { @SuppressWarnings("unchecked") List<DumpableNode<?>> n = (List<DumpableNode<?>>) (List<?>) nodes; compilePlanToJSON(n, writer); } // -------------------------------------------------------------------------------------------- private void compilePlanToJSON(List<DumpableNode<?>> nodes, PrintWriter writer) { // initialization to assign node ids this.nodeIds = new HashMap<DumpableNode<?>, Integer>(); this.nodeCnt = 0; // JSON header writer.print("{\n\t\"nodes\": [\n\n"); // Generate JSON for plan for (int i = 0; i < nodes.size(); i++) { visit(nodes.get(i), writer, i == 0); } // JSON Footer writer.println("\n\t]\n}"); } private boolean visit(DumpableNode<?> node, PrintWriter writer, boolean first) { // check for duplicate traversal if (this.nodeIds.containsKey(node)) { return false; } // assign an id first this.nodeIds.put(node, this.nodeCnt++); // then recurse for (DumpableNode<?> child : node.getPredecessors()) { //This is important, because when the node was already in the graph it is not allowed //to set first to false! if (visit(child, writer, first)) { first = false; } ; } // check if this node should be skipped from the dump final OptimizerNode n = node.getOptimizerNode(); // ------------------ dump after the ascend --------------------- // start a new node and output node id if (!first) { writer.print(",\n"); } // open the node writer.print("\t{\n"); // recurse, it is is an iteration node if (node instanceof BulkIterationNode || node instanceof BulkIterationPlanNode) { DumpableNode<?> innerChild = node instanceof BulkIterationNode ? ((BulkIterationNode) node).getNextPartialSolution() : ((BulkIterationPlanNode) node).getRootOfStepFunction(); DumpableNode<?> begin = node instanceof BulkIterationNode ? ((BulkIterationNode) node).getPartialSolution() : ((BulkIterationPlanNode) node).getPartialSolutionPlanNode(); writer.print("\t\t\"step_function\": [\n"); visit(innerChild, writer, true); writer.print("\n\t\t],\n"); writer.print("\t\t\"partial_solution\": " + this.nodeIds.get(begin) + ",\n"); writer.print("\t\t\"next_partial_solution\": " + this.nodeIds.get(innerChild) + ",\n"); } else if (node instanceof WorksetIterationNode || node instanceof WorksetIterationPlanNode) { DumpableNode<?> worksetRoot = node instanceof WorksetIterationNode ? ((WorksetIterationNode) node).getNextWorkset() : ((WorksetIterationPlanNode) node).getNextWorkSetPlanNode(); DumpableNode<?> solutionDelta = node instanceof WorksetIterationNode ? ((WorksetIterationNode) node).getSolutionSetDelta() : ((WorksetIterationPlanNode) node).getSolutionSetDeltaPlanNode(); DumpableNode<?> workset = node instanceof WorksetIterationNode ? ((WorksetIterationNode) node).getWorksetNode() : ((WorksetIterationPlanNode) node).getWorksetPlanNode(); DumpableNode<?> solutionSet = node instanceof WorksetIterationNode ? ((WorksetIterationNode) node).getSolutionSetNode() : ((WorksetIterationPlanNode) node).getSolutionSetPlanNode(); writer.print("\t\t\"step_function\": [\n"); visit(worksetRoot, writer, true); visit(solutionDelta, writer, false); writer.print("\n\t\t],\n"); writer.print("\t\t\"workset\": " + this.nodeIds.get(workset) + ",\n"); writer.print("\t\t\"solution_set\": " + this.nodeIds.get(solutionSet) + ",\n"); writer.print("\t\t\"next_workset\": " + this.nodeIds.get(worksetRoot) + ",\n"); writer.print("\t\t\"solution_delta\": " + this.nodeIds.get(solutionDelta) + ",\n"); } // print the id writer.print("\t\t\"id\": " + this.nodeIds.get(node)); final String type; String contents; if (n instanceof DataSinkNode) { type = "sink"; contents = n.getPactContract().toString(); } else if (n instanceof DataSourceNode) { type = "source"; contents = n.getPactContract().toString(); } else if (n instanceof BulkIterationNode) { type = "bulk_iteration"; contents = n.getPactContract().getName(); } else if (n instanceof WorksetIterationNode) { type = "workset_iteration"; contents = n.getPactContract().getName(); } else if (n instanceof BinaryUnionNode) { type = "pact"; contents = ""; } else { type = "pact"; contents = n.getPactContract().getName(); } contents = StringUtils.showControlCharacters(contents); if (encodeForHTML) { contents = StringEscapeUtils.escapeHtml4(contents); contents = contents.replace("\\", "\"); } String name = n.getName(); if (name.equals("Reduce") && (node instanceof SingleInputPlanNode) && ((SingleInputPlanNode) node).getDriverStrategy() == DriverStrategy.SORTED_GROUP_COMBINE) { name = "Combine"; } // output the type identifier writer.print(",\n\t\t\"type\": \"" + type + "\""); // output node name writer.print(",\n\t\t\"pact\": \"" + name + "\""); // output node contents writer.print(",\n\t\t\"contents\": \"" + contents + "\""); // degree of parallelism writer.print(",\n\t\t\"parallelism\": \"" + (n.getDegreeOfParallelism() >= 1 ? n.getDegreeOfParallelism() : "default") + "\""); // output node predecessors Iterator<? extends DumpableConnection<?>> inConns = node.getDumpableInputs().iterator(); String child1name = "", child2name = ""; if (inConns != null && inConns.hasNext()) { // start predecessor list writer.print(",\n\t\t\"predecessors\": ["); int inputNum = 0; while (inConns.hasNext()) { final DumpableConnection<?> inConn = inConns.next(); final DumpableNode<?> source = inConn.getSource(); writer.print(inputNum == 0 ? "\n" : ",\n"); if (inputNum == 0) { child1name += child1name.length() > 0 ? ", " : ""; child1name += source.getOptimizerNode().getPactContract().getName(); } else if (inputNum == 1) { child2name += child2name.length() > 0 ? ", " : ""; child2name = source.getOptimizerNode().getPactContract().getName(); } // output predecessor id writer.print("\t\t\t{\"id\": " + this.nodeIds.get(source)); // output connection side if (inConns.hasNext() || inputNum > 0) { writer.print(", \"side\": \"" + (inputNum == 0 ? "first" : "second") + "\""); } // output shipping strategy and channel type final Channel channel = (inConn instanceof Channel) ? (Channel) inConn : null; final ShipStrategyType shipType = channel != null ? channel.getShipStrategy() : ((PactConnection) inConn).getShipStrategy(); String shipStrategy = null; if (shipType != null) { switch (shipType) { case NONE: // nothing break; case FORWARD: shipStrategy = "Forward"; break; case BROADCAST: shipStrategy = "Broadcast"; break; case PARTITION_HASH: shipStrategy = "Hash Partition"; break; case PARTITION_RANGE: shipStrategy = "Range Partition"; break; case PARTITION_RANDOM: shipStrategy = "Redistribute"; break; case PARTITION_FORCED_REBALANCE: shipStrategy = "Rebalance"; break; case PARTITION_CUSTOM: shipStrategy = "Custom Partition"; break; default: throw new CompilerException("Unknown ship strategy '" + inConn.getShipStrategy().name() + "' in JSON generator."); } } if (channel != null && channel.getShipStrategyKeys() != null && channel.getShipStrategyKeys().size() > 0) { shipStrategy += " on " + (channel.getShipStrategySortOrder() == null ? channel.getShipStrategyKeys().toString() : Utils.createOrdering(channel.getShipStrategyKeys(), channel.getShipStrategySortOrder()).toString()); } if (shipStrategy != null) { writer.print(", \"ship_strategy\": \"" + shipStrategy + "\""); } if (channel != null) { String localStrategy = null; switch (channel.getLocalStrategy()) { case NONE: break; case SORT: localStrategy = "Sort"; break; case COMBININGSORT: localStrategy = "Sort (combining)"; break; default: throw new CompilerException("Unknown local strategy " + channel.getLocalStrategy().name()); } if (channel != null && channel.getLocalStrategyKeys() != null && channel.getLocalStrategyKeys().size() > 0) { localStrategy += " on " + (channel.getLocalStrategySortOrder() == null ? channel.getLocalStrategyKeys().toString() : Utils.createOrdering(channel.getLocalStrategyKeys(), channel.getLocalStrategySortOrder()).toString()); } if (localStrategy != null) { writer.print(", \"local_strategy\": \"" + localStrategy + "\""); } if (channel != null && channel.getTempMode() != TempMode.NONE) { String tempMode = channel.getTempMode().toString(); writer.print(", \"temp_mode\": \"" + tempMode + "\""); } } writer.print('}'); inputNum++; } // finish predecessors writer.print("\n\t\t]"); } //--------------------------------------------------------------------------------------- // the part below here is relevant only to plan nodes with concrete strategies, etc //--------------------------------------------------------------------------------------- final PlanNode p = node.getPlanNode(); if (p == null) { // finish node writer.print("\n\t}"); return true; } // local strategy String locString = null; if (p.getDriverStrategy() != null) { switch (p.getDriverStrategy()) { case NONE: case BINARY_NO_OP: break; case UNARY_NO_OP: locString = "No-Op"; break; case COLLECTOR_MAP: case MAP: locString = "Map"; break; case FLAT_MAP: locString = "FlatMap"; break; case MAP_PARTITION: locString = "Map Partition"; break; case ALL_REDUCE: locString = "Reduce All"; break; case ALL_GROUP_REDUCE: case ALL_GROUP_COMBINE: locString = "Group Reduce All"; break; case SORTED_REDUCE: locString = "Sorted Reduce"; break; case SORTED_PARTIAL_REDUCE: locString = "Sorted Combine/Reduce"; break; case SORTED_GROUP_REDUCE: locString = "Sorted Group Reduce"; break; case SORTED_GROUP_COMBINE: locString = "Sorted Combine"; break; case HYBRIDHASH_BUILD_FIRST: locString = "Hybrid Hash (build: " + child1name + ")"; break; case HYBRIDHASH_BUILD_SECOND: locString = "Hybrid Hash (build: " + child2name + ")"; break; case HYBRIDHASH_BUILD_FIRST_CACHED: locString = "Hybrid Hash (CACHED) (build: " + child1name + ")"; break; case HYBRIDHASH_BUILD_SECOND_CACHED: locString = "Hybrid Hash (CACHED) (build: " + child2name + ")"; break; case NESTEDLOOP_BLOCKED_OUTER_FIRST: locString = "Nested Loops (Blocked Outer: " + child1name + ")"; break; case NESTEDLOOP_BLOCKED_OUTER_SECOND: locString = "Nested Loops (Blocked Outer: " + child2name + ")"; break; case NESTEDLOOP_STREAMED_OUTER_FIRST: locString = "Nested Loops (Streamed Outer: " + child1name + ")"; break; case NESTEDLOOP_STREAMED_OUTER_SECOND: locString = "Nested Loops (Streamed Outer: " + child2name + ")"; break; case MERGE: locString = "Merge"; break; case CO_GROUP: locString = "Co-Group"; break; default: locString = p.getDriverStrategy().name(); break; } if (locString != null) { writer.print(",\n\t\t\"driver_strategy\": \""); writer.print(locString); writer.print("\""); } } { // output node global properties final GlobalProperties gp = p.getGlobalProperties(); writer.print(",\n\t\t\"global_properties\": [\n"); addProperty(writer, "Partitioning", gp.getPartitioning().name(), true); if (gp.getPartitioningFields() != null) { addProperty(writer, "Partitioned on", gp.getPartitioningFields().toString(), false); } if (gp.getPartitioningOrdering() != null) { addProperty(writer, "Partitioning Order", gp.getPartitioningOrdering().toString(), false); } else { addProperty(writer, "Partitioning Order", "(none)", false); } if (n.getUniqueFields() == null || n.getUniqueFields().size() == 0) { addProperty(writer, "Uniqueness", "not unique", false); } else { addProperty(writer, "Uniqueness", n.getUniqueFields().toString(), false); } writer.print("\n\t\t]"); } { // output node local properties LocalProperties lp = p.getLocalProperties(); writer.print(",\n\t\t\"local_properties\": [\n"); if (lp.getOrdering() != null) { addProperty(writer, "Order", lp.getOrdering().toString(), true); } else { addProperty(writer, "Order", "(none)", true); } if (lp.getGroupedFields() != null && lp.getGroupedFields().size() > 0) { addProperty(writer, "Grouped on", lp.getGroupedFields().toString(), false); } else { addProperty(writer, "Grouping", "not grouped", false); } if (n.getUniqueFields() == null || n.getUniqueFields().size() == 0) { addProperty(writer, "Uniqueness", "not unique", false); } else { addProperty(writer, "Uniqueness", n.getUniqueFields().toString(), false); } writer.print("\n\t\t]"); } // output node size estimates writer.print(",\n\t\t\"estimates\": [\n"); addProperty(writer, "Est. Output Size", n.getEstimatedOutputSize() == -1 ? "(unknown)" : formatNumber(n.getEstimatedOutputSize(), "B"), true); addProperty(writer, "Est. Cardinality", n.getEstimatedNumRecords() == -1 ? "(unknown)" : formatNumber(n.getEstimatedNumRecords()), false); writer.print("\t\t]"); // output node cost if (p.getNodeCosts() != null) { writer.print(",\n\t\t\"costs\": [\n"); addProperty(writer, "Network", p.getNodeCosts().getNetworkCost() == -1 ? "(unknown)" : formatNumber(p.getNodeCosts().getNetworkCost(), "B"), true); addProperty(writer, "Disk I/O", p.getNodeCosts().getDiskCost() == -1 ? "(unknown)" : formatNumber(p.getNodeCosts().getDiskCost(), "B"), false); addProperty(writer, "CPU", p.getNodeCosts().getCpuCost() == -1 ? "(unknown)" : formatNumber(p.getNodeCosts().getCpuCost(), ""), false); addProperty(writer, "Cumulative Network", p.getCumulativeCosts().getNetworkCost() == -1 ? "(unknown)" : formatNumber(p.getCumulativeCosts().getNetworkCost(), "B"), false); addProperty(writer, "Cumulative Disk I/O", p.getCumulativeCosts().getDiskCost() == -1 ? "(unknown)" : formatNumber(p.getCumulativeCosts().getDiskCost(), "B"), false); addProperty(writer, "Cumulative CPU", p.getCumulativeCosts().getCpuCost() == -1 ? "(unknown)" : formatNumber(p.getCumulativeCosts().getCpuCost(), ""), false); writer.print("\n\t\t]"); } // output the node compiler hints if (n.getPactContract().getCompilerHints() != null) { CompilerHints hints = n.getPactContract().getCompilerHints(); CompilerHints defaults = new CompilerHints(); String size = hints.getOutputSize() == defaults.getOutputSize() ? "(none)" : String.valueOf(hints.getOutputSize()); String card = hints.getOutputCardinality() == defaults.getOutputCardinality() ? "(none)" : String.valueOf(hints.getOutputCardinality()); String width = hints.getAvgOutputRecordSize() == defaults.getAvgOutputRecordSize() ? "(none)" : String.valueOf(hints.getAvgOutputRecordSize()); String filter = hints.getFilterFactor() == defaults.getFilterFactor() ? "(none)" : String.valueOf(hints.getFilterFactor()); writer.print(",\n\t\t\"compiler_hints\": [\n"); addProperty(writer, "Output Size (bytes)", size, true); addProperty(writer, "Output Cardinality", card, false); addProperty(writer, "Avg. Output Record Size (bytes)", width, false); addProperty(writer, "Filter Factor", filter, false); writer.print("\t\t]"); } // finish node writer.print("\n\t}"); return true; } private void addProperty(PrintWriter writer, String name, String value, boolean first) { if (!first) { writer.print(",\n"); } writer.print("\t\t\t{ \"name\": \""); writer.print(name); writer.print("\", \"value\": \""); writer.print(value); writer.print("\" }"); } public static final String formatNumber(double number) { return formatNumber(number, ""); } public static final String formatNumber(double number, String suffix) { if (number <= 0.0) { return String.valueOf(number); } int power = (int) Math.ceil(Math.log10(number)); int group = (power - 1) / 3; if (group >= SIZE_SUFFIXES.length) { group = SIZE_SUFFIXES.length - 1; } else if (group < 0) { group = 0; } // truncate fractional part int beforeDecimal = power - group * 3; if (power > beforeDecimal) { for (int i = power - beforeDecimal; i > 0; i--) { number /= 10; } } return group > 0 ? String.format(Locale.US, "%.2f %s", number, SIZE_SUFFIXES[group]) : String.format(Locale.US, "%.2f", number); } private static final char[] SIZE_SUFFIXES = { 0, 'K', 'M', 'G', 'T' }; }