Java tutorial
// Copyright 2012 Cloudera Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.cloudera.impala.planner; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.Iterator; import java.util.List; import java.util.ListIterator; import java.util.Set; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.cloudera.impala.analysis.AggregateInfo; import com.cloudera.impala.analysis.AnalyticInfo; import com.cloudera.impala.analysis.Analyzer; import com.cloudera.impala.analysis.BaseTableRef; import com.cloudera.impala.analysis.BinaryPredicate; import com.cloudera.impala.analysis.Expr; import com.cloudera.impala.analysis.ExprId; import com.cloudera.impala.analysis.ExprSubstitutionMap; import com.cloudera.impala.analysis.InlineViewRef; import com.cloudera.impala.analysis.JoinOperator; import com.cloudera.impala.analysis.QueryStmt; import com.cloudera.impala.analysis.SelectStmt; import com.cloudera.impala.analysis.SlotDescriptor; import com.cloudera.impala.analysis.SlotId; import com.cloudera.impala.analysis.SlotRef; import com.cloudera.impala.analysis.TableRef; import com.cloudera.impala.analysis.TupleDescriptor; import com.cloudera.impala.analysis.TupleId; import com.cloudera.impala.analysis.UnionStmt; import com.cloudera.impala.analysis.UnionStmt.UnionOperand; import com.cloudera.impala.catalog.ColumnStats; import com.cloudera.impala.catalog.DataSourceTable; import com.cloudera.impala.catalog.HBaseTable; import com.cloudera.impala.catalog.HdfsTable; import com.cloudera.impala.catalog.Type; import com.cloudera.impala.common.ImpalaException; import com.cloudera.impala.common.InternalException; import com.cloudera.impala.common.NotImplementedException; import com.cloudera.impala.common.Pair; import com.cloudera.impala.common.PrintUtils; import com.cloudera.impala.thrift.TExplainLevel; import com.cloudera.impala.thrift.TQueryExecRequest; import com.cloudera.impala.thrift.TTableName; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Sets; /** * Constructs a non-executable single-node plan from an analyzed parse tree. * The single-node plan does not contain data exchanges or data-reduction optimizations * such as local aggregations that are important for distributed execution. * The single-node plan needs to be wrapped in a plan fragment for it to be executable. */ public class SingleNodePlanner { private final static Logger LOG = LoggerFactory.getLogger(SingleNodePlanner.class); private final PlannerContext ctx_; public SingleNodePlanner(PlannerContext ctx) { ctx_ = ctx; } /** * Generates and returns the root of the single-node plan for the analyzed parse tree * in the planner context. The planning process recursively walks the parse tree and * performs the following actions. * In the top-down phase over query statements: * - materialize the slots required for evaluating expressions of that statement * - migrate conjuncts from parent blocks into inline views and union operands * In the bottom-up phase generate the plan tree for every query statement: * - perform join-order optimization when generating the plan of the FROM * clause of a select statement; requires that all materialized slots are known * for an accurate estimate of row sizes needed for cost-based join ordering * - assign conjuncts that can be evaluated at that node and compute the stats * of that node (cardinality, etc.) * - apply combined expression substitution map of child plan nodes; if a plan node * re-maps its input, set a substitution map to be applied by parents */ public PlanNode createSingleNodePlan() throws ImpalaException { QueryStmt queryStmt = ctx_.getQueryStmt(); Analyzer analyzer = ctx_.getRootAnalyzer(); analyzer.computeEquivClasses(); // Mark slots referenced by output exprs as materialized, prior to generating the // plan tree. // We need to mark the result exprs of the topmost select block as materialized, so // that PlanNode.init() can compute the final mem layout of materialized tuples // (the byte size of tuples is needed for cost computations). // TODO: instead of materializing everything produced by the plan root, derive // referenced slots from destination fragment and add a materialization node // if not all output is needed by destination fragment // TODO 2: should the materialization decision be cost-based? if (queryStmt.getBaseTblResultExprs() != null) { analyzer.materializeSlots(queryStmt.getBaseTblResultExprs()); } LOG.trace("desctbl: " + analyzer.getDescTbl().debugString()); PlanNode singleNodePlan = createQueryPlan(queryStmt, analyzer, ctx_.getQueryOptions().isDisable_outermost_topn()); Preconditions.checkNotNull(singleNodePlan); return singleNodePlan; } /** * Return combined explain string for all plan fragments. * Includes the estimated resource requirements from the request if set. */ public String getExplainString(ArrayList<PlanFragment> fragments, TQueryExecRequest request, TExplainLevel explainLevel) { StringBuilder str = new StringBuilder(); boolean hasHeader = false; if (request.isSetPer_host_mem_req() && request.isSetPer_host_vcores()) { str.append(String.format("Estimated Per-Host Requirements: Memory=%s VCores=%s\n", PrintUtils.printBytes(request.getPer_host_mem_req()), request.per_host_vcores)); hasHeader = true; } // Append warning about tables missing stats. if (request.query_ctx.isSetTables_missing_stats() && !request.query_ctx.getTables_missing_stats().isEmpty()) { List<String> tableNames = Lists.newArrayList(); for (TTableName tableName : request.query_ctx.getTables_missing_stats()) { tableNames.add(tableName.db_name + "." + tableName.table_name); } str.append("WARNING: The following tables are missing relevant table " + "and/or column statistics.\n" + Joiner.on(", ").join(tableNames) + "\n"); hasHeader = true; } if (request.query_ctx.isDisable_spilling()) { str.append("WARNING: Spilling is disabled for this query as a safety guard.\n" + "Reason: Query option disable_unsafe_spills is set, at least one table\n" + "is missing relevant stats, and no plan hints were given.\n"); hasHeader = true; } if (hasHeader) str.append("\n"); if (explainLevel.ordinal() < TExplainLevel.VERBOSE.ordinal()) { // Print the non-fragmented parallel plan. str.append(fragments.get(0).getExplainString(explainLevel)); } else { // Print the fragmented parallel plan. for (int i = 0; i < fragments.size(); ++i) { PlanFragment fragment = fragments.get(i); str.append(fragment.getExplainString(explainLevel)); if (explainLevel == TExplainLevel.VERBOSE && i + 1 != fragments.size()) { str.append("\n"); } } } return str.toString(); } /** * Creates an EmptyNode that 'materializes' the tuples of the given stmt. */ private PlanNode createEmptyNode(QueryStmt stmt, Analyzer analyzer) throws InternalException { ArrayList<TupleId> tupleIds = Lists.newArrayList(); stmt.getMaterializedTupleIds(tupleIds); EmptySetNode node = new EmptySetNode(ctx_.getNextNodeId(), tupleIds); node.init(analyzer); return node; } /** * Create plan tree for single-node execution. Generates PlanNodes for the * Select/Project/Join/Union [All]/Group by/Having/Order by clauses of the query stmt. */ private PlanNode createQueryPlan(QueryStmt stmt, Analyzer analyzer, boolean disableTopN) throws ImpalaException { if (analyzer.hasEmptyResultSet()) return createEmptyNode(stmt, analyzer); PlanNode root; if (stmt instanceof SelectStmt) { SelectStmt selectStmt = (SelectStmt) stmt; root = createSelectPlan(selectStmt, analyzer); // insert possible AnalyticEvalNode before SortNode if (((SelectStmt) stmt).getAnalyticInfo() != null) { AnalyticInfo analyticInfo = selectStmt.getAnalyticInfo(); ArrayList<TupleId> stmtTupleIds = Lists.newArrayList(); stmt.getMaterializedTupleIds(stmtTupleIds); AnalyticPlanner analyticPlanner = new AnalyticPlanner(stmtTupleIds, analyticInfo, analyzer, ctx_); List<Expr> inputPartitionExprs = Lists.newArrayList(); AggregateInfo aggInfo = selectStmt.getAggInfo(); root = analyticPlanner.createSingleNodePlan(root, aggInfo != null ? aggInfo.getGroupingExprs() : null, inputPartitionExprs); if (aggInfo != null && !inputPartitionExprs.isEmpty()) { // analytic computation will benefit from a partition on inputPartitionExprs aggInfo.setPartitionExprs(inputPartitionExprs); } } } else { Preconditions.checkState(stmt instanceof UnionStmt); root = createUnionPlan((UnionStmt) stmt, analyzer); } // Avoid adding a sort node if the sort tuple has no materialized slots. boolean sortHasMaterializedSlots = false; if (stmt.evaluateOrderBy()) { for (SlotDescriptor sortSlotDesc : stmt.getSortInfo().getSortTupleDescriptor().getSlots()) { if (sortSlotDesc.isMaterialized()) { sortHasMaterializedSlots = true; break; } } } if (stmt.evaluateOrderBy() && sortHasMaterializedSlots) { long limit = stmt.getLimit(); // TODO: External sort could be used for very large limits // not just unlimited order-by boolean useTopN = stmt.hasLimit() && !disableTopN; root = new SortNode(ctx_.getNextNodeId(), root, stmt.getSortInfo(), useTopN, stmt.getOffset()); Preconditions.checkState(root.hasValidStats()); root.setLimit(limit); root.init(analyzer); } else { root.setLimit(stmt.getLimit()); root.computeStats(analyzer); } return root; } /** * If there are unassigned conjuncts that are bound by tupleIds or if there are slot * equivalences for tupleIds that have not yet been enforced, returns a SelectNode on * top of root that evaluates those conjuncts; otherwise returns root unchanged. * TODO: change this to assign the unassigned conjuncts to root itself, if that is * semantically correct */ private PlanNode addUnassignedConjuncts(Analyzer analyzer, List<TupleId> tupleIds, PlanNode root) throws InternalException { // No point in adding SelectNode on top of an EmptyNode. if (root instanceof EmptySetNode) return root; Preconditions.checkNotNull(root); // Gather unassigned conjuncts and generate predicates to enfore // slot equivalences for each tuple id. List<Expr> conjuncts = analyzer.getUnassignedConjuncts(root); for (TupleId tid : tupleIds) { analyzer.createEquivConjuncts(tid, conjuncts); } if (conjuncts.isEmpty()) return root; // evaluate conjuncts in SelectNode SelectNode selectNode = new SelectNode(ctx_.getNextNodeId(), root, conjuncts); // init() marks conjuncts as assigned selectNode.init(analyzer); Preconditions.checkState(selectNode.hasValidStats()); return selectNode; } /** * Return the cheapest plan that materializes the joins of all TblRefs in refPlans. * Assumes that refPlans are in the order as they originally appeared in the query. * For this plan: * - the plan is executable, ie, all non-cross joins have equi-join predicates * - the leftmost scan is over the largest of the inputs for which we can still * construct an executable plan * - all rhs's are in decreasing order of selectiveness (percentage of rows they * eliminate) * - outer/cross/semi joins: rhs serialized size is < lhs serialized size; * enforced via join inversion, if necessary * Returns null if we can't create an executable plan. */ private PlanNode createCheapestJoinPlan(Analyzer analyzer, List<Pair<TableRef, PlanNode>> refPlans) throws ImpalaException { LOG.trace("createCheapestJoinPlan"); if (refPlans.size() == 1) return refPlans.get(0).second; // collect eligible candidates for the leftmost input; list contains // (plan, materialized size) ArrayList<Pair<TableRef, Long>> candidates = Lists.newArrayList(); for (Pair<TableRef, PlanNode> entry : refPlans) { TableRef ref = entry.first; JoinOperator joinOp = ref.getJoinOp(); // The rhs table of an outer/semi join can appear as the left-most input if we // invert the lhs/rhs and the join op. However, we may only consider this inversion // for the very first join in refPlans, otherwise we could reorder tables/joins // across outer/semi joins which is generally incorrect. The null-aware // left anti-join operator is never considered for inversion because we can't // execute the null-aware right anti-join efficiently. // TODO: Allow the rhs of any cross join as the leftmost table. This needs careful // consideration of the joinOps that result from such a re-ordering (IMPALA-1281). if (((joinOp.isOuterJoin() || joinOp.isSemiJoin() || joinOp.isCrossJoin()) && ref != refPlans.get(1).first) || joinOp.isNullAwareLeftAntiJoin()) { // ref cannot appear as the leftmost input continue; } PlanNode plan = entry.second; if (plan.getCardinality() == -1) { // use 0 for the size to avoid it becoming the leftmost input // TODO: Consider raw size of scanned partitions in the absence of stats. candidates.add(new Pair(ref, new Long(0))); LOG.trace("candidate " + ref.getAlias() + ": 0"); continue; } Preconditions.checkNotNull(ref.getDesc()); long materializedSize = (long) Math.ceil(plan.getAvgRowSize() * (double) plan.getCardinality()); candidates.add(new Pair(ref, new Long(materializedSize))); LOG.trace("candidate " + ref.getAlias() + ": " + Long.toString(materializedSize)); } if (candidates.isEmpty()) return null; // order candidates by descending materialized size; we want to minimize the memory // consumption of the materialized hash tables required for the join sequence Collections.sort(candidates, new Comparator<Pair<TableRef, Long>>() { public int compare(Pair<TableRef, Long> a, Pair<TableRef, Long> b) { long diff = b.second - a.second; return (diff < 0 ? -1 : (diff > 0 ? 1 : 0)); } }); for (Pair<TableRef, Long> candidate : candidates) { PlanNode result = createJoinPlan(analyzer, candidate.first, refPlans); if (result != null) return result; } return null; } /** * Returns a plan with leftmostRef's plan as its leftmost input; the joins * are in decreasing order of selectiveness (percentage of rows they eliminate). * The leftmostRef's join will be inverted if it is an outer/semi/cross join. */ private PlanNode createJoinPlan(Analyzer analyzer, TableRef leftmostRef, List<Pair<TableRef, PlanNode>> refPlans) throws ImpalaException { LOG.trace("createJoinPlan: " + leftmostRef.getAlias()); // the refs that have yet to be joined List<Pair<TableRef, PlanNode>> remainingRefs = Lists.newArrayList(); PlanNode root = null; // root of accumulated join plan for (Pair<TableRef, PlanNode> entry : refPlans) { if (entry.first == leftmostRef) { root = entry.second; } else { remainingRefs.add(entry); } } Preconditions.checkNotNull(root); // refs that have been joined. The union of joinedRefs and the refs in remainingRefs // are the set of all table refs. Set<TableRef> joinedRefs = Sets.newHashSet(); joinedRefs.add(leftmostRef); // If the leftmostTblRef is an outer/semi/cross join, we must invert it. boolean planHasInvertedJoin = false; if (leftmostRef.getJoinOp().isOuterJoin() || leftmostRef.getJoinOp().isSemiJoin() || leftmostRef.getJoinOp().isCrossJoin()) { // TODO: Revisit the interaction of join inversion here and the analysis state // that is changed in analyzer.invertOuterJoin(). Changing the analysis state // should not be necessary because the semantics of an inverted outer join do // not change. leftmostRef.invertJoin(refPlans, analyzer); planHasInvertedJoin = true; } long numOps = 0; int i = 0; while (!remainingRefs.isEmpty()) { // we minimize the resulting cardinality at each step in the join chain, // which minimizes the total number of hash table lookups PlanNode newRoot = null; Pair<TableRef, PlanNode> minEntry = null; for (Pair<TableRef, PlanNode> entry : remainingRefs) { TableRef ref = entry.first; LOG.trace(Integer.toString(i) + " considering ref " + ref.getAlias()); // Determine whether we can or must consider this join at this point in the plan. // Place outer/semi joins at a fixed position in the plan tree (IMPALA-860), // s.t. all the tables appearing to the left/right of an outer/semi join in // the original query still remain to the left/right after join ordering. This // prevents join re-ordering across outer/semi joins which is generally wrong. // The checks below relies on remainingRefs being in the order as they originally // appeared in the query. JoinOperator joinOp = ref.getJoinOp(); if (joinOp.isOuterJoin() || joinOp.isSemiJoin()) { List<TupleId> currentTids = Lists.newArrayList(root.getTblRefIds()); currentTids.add(ref.getId()); // Place outer/semi joins at a fixed position in the plan tree. We know that // the join resulting from 'ref' must become the new root if the current // root materializes exactly those tuple ids corresponding to TableRefs // appearing to the left of 'ref' in the original query. List<TupleId> tableRefTupleIds = ref.getAllTupleIds(); if (!currentTids.containsAll(tableRefTupleIds) || !tableRefTupleIds.containsAll(currentTids)) { // Do not consider the remaining table refs to prevent incorrect re-ordering // of tables across outer/semi/anti joins. break; } } else if (ref.getJoinOp().isCrossJoin()) { if (!joinedRefs.contains(ref.getLeftTblRef())) continue; } PlanNode rhsPlan = entry.second; analyzer.setAssignedConjuncts(root.getAssignedConjuncts()); boolean invertJoin = false; if (joinOp.isOuterJoin() || joinOp.isSemiJoin() || joinOp.isCrossJoin()) { // Invert the join if doing so reduces the size of build-side hash table // (may also reduce network costs depending on the join strategy). // Only consider this optimization if both the lhs/rhs cardinalities are known. // The null-aware left anti-join operator is never considered for inversion // because we can't execute the null-aware right anti-join efficiently. long lhsCard = root.getCardinality(); long rhsCard = rhsPlan.getCardinality(); if (lhsCard != -1 && rhsCard != -1 && lhsCard * root.getAvgRowSize() < rhsCard * rhsPlan.getAvgRowSize() && !joinOp.isNullAwareLeftAntiJoin()) { invertJoin = true; } } PlanNode candidate = null; if (invertJoin) { ref.setJoinOp(ref.getJoinOp().invert()); candidate = createJoinNode(analyzer, rhsPlan, root, ref, null); planHasInvertedJoin = true; } else { candidate = createJoinNode(analyzer, root, rhsPlan, null, ref); } if (candidate == null) continue; LOG.trace("cardinality=" + Long.toString(candidate.getCardinality())); // Use 'candidate' as the new root; don't consider any other table refs at this // position in the plan. if (joinOp.isOuterJoin() || joinOp.isSemiJoin()) { newRoot = candidate; minEntry = entry; break; } // Always prefer Hash Join over Cross Join due to limited costing infrastructure if (newRoot == null || (candidate.getClass().equals(newRoot.getClass()) && candidate.getCardinality() < newRoot.getCardinality()) || (candidate instanceof HashJoinNode && newRoot instanceof CrossJoinNode)) { newRoot = candidate; minEntry = entry; } } if (newRoot == null) { // Currently, it should not be possible to invert a join for a plan that turns // out to be non-executable because (1) the joins we consider for inversion are // barriers in the join order, and (2) the caller of this function only considers // other leftmost table refs if a plan turns out to be non-executable. // TODO: This preconditions check will need to be changed to undo the in-place // modifications made to table refs for join inversion, if the caller decides to // explore more leftmost table refs. Preconditions.checkState(!planHasInvertedJoin); return null; } // we need to insert every rhs row into the hash table and then look up // every lhs row long lhsCardinality = root.getCardinality(); long rhsCardinality = minEntry.second.getCardinality(); numOps += lhsCardinality + rhsCardinality; LOG.debug(Integer.toString(i) + " chose " + minEntry.first.getAlias() + " #lhs=" + Long.toString(lhsCardinality) + " #rhs=" + Long.toString(rhsCardinality) + " #ops=" + Long.toString(numOps)); remainingRefs.remove(minEntry); joinedRefs.add(minEntry.first); root = newRoot; // assign id_ after running through the possible choices in order to end up // with a dense sequence of node ids root.setId(ctx_.getNextNodeId()); analyzer.setAssignedConjuncts(root.getAssignedConjuncts()); ++i; } return root; } /** * Return a plan with joins in the order of refPlans (= FROM clause order). */ private PlanNode createFromClauseJoinPlan(Analyzer analyzer, List<Pair<TableRef, PlanNode>> refPlans) throws ImpalaException { // create left-deep sequence of binary hash joins; assign node ids as we go along Preconditions.checkState(!refPlans.isEmpty()); PlanNode root = refPlans.get(0).second; for (int i = 1; i < refPlans.size(); ++i) { TableRef innerRef = refPlans.get(i).first; PlanNode innerPlan = refPlans.get(i).second; root = createJoinNode(analyzer, root, innerPlan, null, innerRef); root.setId(ctx_.getNextNodeId()); } return root; } /** * Create tree of PlanNodes that implements the Select/Project/Join/Group by/Having * of the selectStmt query block. */ private PlanNode createSelectPlan(SelectStmt selectStmt, Analyzer analyzer) throws ImpalaException { // no from clause -> materialize the select's exprs with a UnionNode if (selectStmt.getTableRefs().isEmpty()) { return createConstantSelectPlan(selectStmt, analyzer); } // collect output tuples of subtrees ArrayList<TupleId> rowTuples = Lists.newArrayList(); for (TableRef tblRef : selectStmt.getTableRefs()) { rowTuples.addAll(tblRef.getMaterializedTupleIds()); } // Slot materialization: // We need to mark all slots as materialized that are needed during the execution // of selectStmt, and we need to do that prior to creating plans for the TableRefs // (because createTableRefNode() might end up calling computeMemLayout() on one or // more TupleDescriptors, at which point all referenced slots need to be marked). // // For non-join predicates, slots are marked as follows: // - for base table scan predicates, this is done directly by ScanNode.init(), which // can do a better job because it doesn't need to materialize slots that are only // referenced for partition pruning, for instance // - for inline views, non-join predicates are pushed down, at which point the // process repeats itself. selectStmt.materializeRequiredSlots(analyzer); // return a plan that feeds the aggregation of selectStmt with an empty set, // if the selectStmt's select-project-join portion returns an empty result set if (analyzer.hasEmptySpjResultSet()) { PlanNode emptySetNode = new EmptySetNode(ctx_.getNextNodeId(), rowTuples); emptySetNode.init(analyzer); return createAggregationPlan(selectStmt, analyzer, emptySetNode); } // create plans for our table refs; use a list here instead of a map to // maintain a deterministic order of traversing the TableRefs during join // plan generation (helps with tests) List<Pair<TableRef, PlanNode>> refPlans = Lists.newArrayList(); for (TableRef ref : selectStmt.getTableRefs()) { PlanNode plan = createTableRefNode(analyzer, ref); Preconditions.checkState(plan != null); refPlans.add(new Pair(ref, plan)); } // save state of conjunct assignment; needed for join plan generation for (Pair<TableRef, PlanNode> entry : refPlans) { entry.second.setAssignedConjuncts(analyzer.getAssignedConjuncts()); } PlanNode root = null; if (!selectStmt.getSelectList().isStraightJoin()) { Set<ExprId> assignedConjuncts = analyzer.getAssignedConjuncts(); root = createCheapestJoinPlan(analyzer, refPlans); if (root == null) analyzer.setAssignedConjuncts(assignedConjuncts); } if (selectStmt.getSelectList().isStraightJoin() || root == null) { // we didn't have enough stats to do a cost-based join plan, or the STRAIGHT_JOIN // keyword was in the select list: use the FROM clause order instead root = createFromClauseJoinPlan(analyzer, refPlans); Preconditions.checkNotNull(root); } // add aggregation, if any if (selectStmt.getAggInfo() != null) { root = createAggregationPlan(selectStmt, analyzer, root); } // All the conjuncts_ should be assigned at this point. // TODO: Re-enable this check here and/or elswehere. //Preconditions.checkState(!analyzer.hasUnassignedConjuncts()); return root; } /** * Returns a new AggregationNode that materializes the aggregation of the given stmt. * Assigns conjuncts from the Having clause to the returned node. */ private PlanNode createAggregationPlan(SelectStmt selectStmt, Analyzer analyzer, PlanNode root) throws InternalException { Preconditions.checkState(selectStmt.getAggInfo() != null); // add aggregation, if required AggregateInfo aggInfo = selectStmt.getAggInfo(); root = new AggregationNode(ctx_.getNextNodeId(), root, aggInfo); root.init(analyzer); Preconditions.checkState(root.hasValidStats()); // if we're computing DISTINCT agg fns, the analyzer already created the // 2nd phase agginfo if (aggInfo.isDistinctAgg()) { ((AggregationNode) root).unsetNeedsFinalize(); // The output of the 1st phase agg is the 1st phase intermediate. ((AggregationNode) root).setIntermediateTuple(); root = new AggregationNode(ctx_.getNextNodeId(), root, aggInfo.getSecondPhaseDistinctAggInfo()); root.init(analyzer); Preconditions.checkState(root.hasValidStats()); } // add Having clause root.assignConjuncts(analyzer); return root; } /** * Returns a UnionNode that materializes the exprs of the constant selectStmt. * Replaces the resultExprs of the selectStmt with SlotRefs into the materialized tuple. */ private PlanNode createConstantSelectPlan(SelectStmt selectStmt, Analyzer analyzer) throws InternalException { Preconditions.checkState(selectStmt.getTableRefs().isEmpty()); ArrayList<Expr> resultExprs = selectStmt.getBaseTblResultExprs(); ArrayList<String> colLabels = selectStmt.getColLabels(); // Create tuple descriptor for materialized tuple. TupleDescriptor tupleDesc = analyzer.getDescTbl().createTupleDescriptor("union"); tupleDesc.setIsMaterialized(true); UnionNode unionNode = new UnionNode(ctx_.getNextNodeId(), tupleDesc.getId()); // Analysis guarantees that selects without a FROM clause only have constant exprs. unionNode.addConstExprList(Lists.newArrayList(resultExprs)); // Replace the select stmt's resultExprs with SlotRefs into tupleDesc. for (int i = 0; i < resultExprs.size(); ++i) { SlotDescriptor slotDesc = analyzer.addSlotDescriptor(tupleDesc); slotDesc.setLabel(colLabels.get(i)); slotDesc.setType(resultExprs.get(i).getType()); slotDesc.setStats(ColumnStats.fromExpr(resultExprs.get(i))); slotDesc.setIsMaterialized(true); SlotRef slotRef = new SlotRef(slotDesc); resultExprs.set(i, slotRef); } tupleDesc.computeMemLayout(); // UnionNode.init() needs tupleDesc to have been initialized unionNode.init(analyzer); return unionNode; } /** * Transform '=', '<[=]' and '>[=]' comparisons for given slot into * ValueRange. Also removes those predicates which were used for the construction * of ValueRange from 'conjuncts_'. Only looks at comparisons w/ string constants * (ie, the bounds of the result can be evaluated with Expr::GetValue(NULL)). * HBase row key filtering works only if the row key is mapped to a string column and * the expression is a string constant expression. * If there are multiple competing comparison predicates that could be used * to construct a ValueRange, only the first one from each category is chosen. */ private ValueRange createHBaseValueRange(SlotDescriptor d, List<Expr> conjuncts) { ListIterator<Expr> i = conjuncts.listIterator(); ValueRange result = null; while (i.hasNext()) { Expr e = i.next(); if (!(e instanceof BinaryPredicate)) continue; BinaryPredicate comp = (BinaryPredicate) e; if (comp.getOp() == BinaryPredicate.Operator.NE) continue; Expr slotBinding = comp.getSlotBinding(d.getId()); if (slotBinding == null || !slotBinding.isConstant() || !slotBinding.getType().equals(Type.STRING)) { continue; } if (comp.getOp() == BinaryPredicate.Operator.EQ) { i.remove(); return ValueRange.createEqRange(slotBinding); } if (result == null) result = new ValueRange(); // TODO: do we need copies here? if (comp.getOp() == BinaryPredicate.Operator.GT || comp.getOp() == BinaryPredicate.Operator.GE) { if (result.getLowerBound() == null) { result.setLowerBound(slotBinding); result.setLowerBoundInclusive(comp.getOp() == BinaryPredicate.Operator.GE); i.remove(); } } else { if (result.getUpperBound() == null) { result.setUpperBound(slotBinding); result.setUpperBoundInclusive(comp.getOp() == BinaryPredicate.Operator.LE); i.remove(); } } } return result; } /** * Returns plan tree for an inline view ref: * - predicates from the enclosing scope that can be evaluated directly within * the inline-view plan are pushed down * - predicates that cannot be evaluated directly within the inline-view plan * but only apply to the inline view are evaluated in a SelectNode placed * on top of the inline view plan * - all slots that are referenced by predicates from the enclosing scope that cannot * be pushed down are marked as materialized (so that when computeMemLayout() is * called on the base table descriptors materialized by the inline view it has a * complete picture) */ private PlanNode createInlineViewPlan(Analyzer analyzer, InlineViewRef inlineViewRef) throws ImpalaException { // If possible, "push down" view predicates; this is needed in order to ensure // that predicates such as "x + y = 10" are evaluated in the view's plan tree // rather than a SelectNode grafted on top of that plan tree. // This doesn't prevent predicate propagation, because predicates like // "x = 10" that get pushed down are still connected to equivalent slots // via the equality predicates created for the view's select list. // Include outer join conjuncts here as well because predicates from the // On-clause of an outer join may be pushed into the inline view as well. // // Limitations on predicate propagation into inline views: // If the inline view computes analytic functions, we cannot push any // predicate into the inline view tree (see IMPALA-1243). The reason is that // analytic functions compute aggregates over their entire input, and applying // filters from the enclosing scope *before* the aggregate computation would // alter the results. This is unlike regular aggregate computation, which only // makes the *output* of the computation visible to the enclosing scope, so that // filters from the enclosing scope can be safely applied (to the grouping cols, say) List<Expr> unassigned = analyzer.getUnassignedConjuncts(inlineViewRef.getId().asList(), true); boolean migrateConjuncts = !inlineViewRef.getViewStmt().hasLimit() && !inlineViewRef.getViewStmt().hasOffset() && (!(inlineViewRef.getViewStmt() instanceof SelectStmt) || !((SelectStmt) (inlineViewRef.getViewStmt())).hasAnalyticInfo()); if (migrateConjuncts) { // check if we can evaluate them List<Expr> preds = Lists.newArrayList(); for (Expr e : unassigned) { if (analyzer.canEvalPredicate(inlineViewRef.getId().asList(), e)) preds.add(e); } unassigned.removeAll(preds); // Generate predicates to enforce equivalences among slots of the inline view // tuple. These predicates are also migrated into the inline view. analyzer.createEquivConjuncts(inlineViewRef.getId(), preds); // create new predicates against the inline view's unresolved result exprs, not // the resolved result exprs, in order to avoid skipping scopes (and ignoring // limit clauses on the way) List<Expr> viewPredicates = Expr.substituteList(preds, inlineViewRef.getSmap(), analyzer, false); // Remove unregistered predicates that reference the same slot on // both sides (e.g. a = a). Such predicates have been generated from slot // equivalences and may incorrectly reject rows with nulls (IMPALA-1412). Predicate<Expr> isIdentityPredicate = new Predicate<Expr>() { @Override public boolean apply(Expr expr) { if (!(expr instanceof BinaryPredicate) || ((BinaryPredicate) expr).getOp() != BinaryPredicate.Operator.EQ) { return false; } if (!expr.isRegisteredPredicate() && expr.getChild(0) instanceof SlotRef && expr.getChild(1) instanceof SlotRef && (((SlotRef) expr.getChild(0)) .getSlotId() == ((SlotRef) expr.getChild(1)).getSlotId())) { return true; } return false; } }; Iterables.removeIf(viewPredicates, isIdentityPredicate); // "migrate" conjuncts_ by marking them as assigned and re-registering them with // new ids. // Mark pre-substitution conjuncts as assigned, since the ids of the new exprs may // have changed. analyzer.markConjunctsAssigned(preds); inlineViewRef.getAnalyzer().registerConjuncts(viewPredicates); } // mark (fully resolve) slots referenced by remaining unassigned conjuncts_ as // materialized List<Expr> substUnassigned = Expr.substituteList(unassigned, inlineViewRef.getBaseTblSmap(), analyzer, false); analyzer.materializeSlots(substUnassigned); // Turn a constant select into a UnionNode that materializes the exprs. // TODO: unify this with createConstantSelectPlan(), this is basically the // same thing QueryStmt viewStmt = inlineViewRef.getViewStmt(); if (viewStmt instanceof SelectStmt) { SelectStmt selectStmt = (SelectStmt) viewStmt; if (selectStmt.getTableRefs().isEmpty()) { if (inlineViewRef.getAnalyzer().hasEmptyResultSet()) { return createEmptyNode(viewStmt, inlineViewRef.getAnalyzer()); } // Analysis should have generated a tuple id_ into which to materialize the exprs. Preconditions.checkState(inlineViewRef.getMaterializedTupleIds().size() == 1); // we need to materialize all slots of our inline view tuple analyzer.getTupleDesc(inlineViewRef.getId()).materializeSlots(); UnionNode unionNode = new UnionNode(ctx_.getNextNodeId(), inlineViewRef.getMaterializedTupleIds().get(0)); if (analyzer.hasEmptyResultSet()) return unionNode; unionNode.setTblRefIds(Lists.newArrayList(inlineViewRef.getId())); unionNode.addConstExprList(selectStmt.getBaseTblResultExprs()); unionNode.init(analyzer); return unionNode; } } PlanNode rootNode = createQueryPlan(inlineViewRef.getViewStmt(), inlineViewRef.getAnalyzer(), false); // TODO: we should compute the "physical layout" of the view's descriptor, so that // the avg row size is availble during optimization; however, that means we need to // select references to its resultExprs from the enclosing scope(s) rootNode.setTblRefIds(Lists.newArrayList(inlineViewRef.getId())); // Set smap *before* creating a SelectNode in order to allow proper resolution. // Analytics have an additional level of logical to physical slot remapping. // The composition creates a mapping from the logical output of the inline view // to the physical analytic output. In addition, it retains the logical to // physical analytic slot mappings which are needed to resolve exprs that already // reference the logical analytic tuple (and not the inline view tuple), e.g., // the result exprs set in the coordinator fragment. rootNode.setOutputSmap( ExprSubstitutionMap.compose(inlineViewRef.getBaseTblSmap(), rootNode.getOutputSmap(), analyzer)); // if the view has a limit we may have conjuncts_ from the enclosing scope left if (!migrateConjuncts) { rootNode = addUnassignedConjuncts(analyzer, inlineViewRef.getDesc().getId().asList(), rootNode); } return rootNode; } /** * Create node for scanning all data files of a particular table. */ private PlanNode createScanNode(Analyzer analyzer, TableRef tblRef) throws InternalException { ScanNode scanNode = null; if (tblRef.getTable() instanceof HdfsTable) { scanNode = new HdfsScanNode(ctx_.getNextNodeId(), tblRef.getDesc(), (HdfsTable) tblRef.getTable()); scanNode.init(analyzer); return scanNode; } else if (tblRef.getTable() instanceof DataSourceTable) { scanNode = new DataSourceScanNode(ctx_.getNextNodeId(), tblRef.getDesc()); scanNode.init(analyzer); return scanNode; } else if (tblRef.getTable() instanceof HBaseTable) { // HBase table scanNode = new HBaseScanNode(ctx_.getNextNodeId(), tblRef.getDesc()); } else { throw new InternalException("Invalid table ref class: " + tblRef.getClass()); } // TODO: move this to HBaseScanNode.init(); Preconditions.checkState(scanNode instanceof HBaseScanNode); List<Expr> conjuncts = analyzer.getUnassignedConjuncts(scanNode); // mark conjuncts_ assigned here; they will either end up inside a // ValueRange or will be evaluated directly by the node analyzer.markConjunctsAssigned(conjuncts); List<ValueRange> keyRanges = Lists.newArrayList(); // determine scan predicates for clustering cols for (int i = 0; i < tblRef.getTable().getNumClusteringCols(); ++i) { SlotDescriptor slotDesc = analyzer.getColumnSlot(tblRef.getDesc(), tblRef.getTable().getColumns().get(i)); if (slotDesc == null || !slotDesc.getType().isStringType()) { // the hbase row key is mapped to a non-string type // (since it's stored in ascii it will be lexicographically ordered, // and non-string comparisons won't work) keyRanges.add(null); } else { // create ValueRange from conjuncts_ for slot; also removes conjuncts_ that were // used as input for filter keyRanges.add(createHBaseValueRange(slotDesc, conjuncts)); } } ((HBaseScanNode) scanNode).setKeyRanges(keyRanges); scanNode.addConjuncts(conjuncts); scanNode.init(analyzer); return scanNode; } /** * Return all applicable conjuncts for join between a plan tree and a single TableRef; * the conjuncts can be used for hash table lookups. * - for inner joins, those are equi-join predicates in which one side is fully bound * by planIds and the other by joinedTblRef.id_; * - for outer joins: same type of conjuncts_ as inner joins, but only from the JOIN * clause * Returns the conjuncts_ in 'joinConjuncts' (in which "<lhs> = <rhs>" is returned * as BinaryPredicate and also in their original form in 'joinPredicates'. * Each lhs is bound by planIds, and each rhs by the tuple id of joinedTblRef. * Predicates that are redundant based on equivalences classes are intentionally * returneded by this function because the removal of redundant predicates * and the creation of new predicates for enforcing slot equivalences go hand-in-hand * (see analyzer.createEquivConjuncts()). */ private void getHashLookupJoinConjuncts(Analyzer analyzer, List<TupleId> planIds, TableRef joinedTblRef, List<BinaryPredicate> joinConjuncts, List<Expr> joinPredicates) { joinConjuncts.clear(); joinPredicates.clear(); TupleId tblRefId = joinedTblRef.getId(); List<TupleId> tblRefIds = tblRefId.asList(); List<Expr> candidates = analyzer.getEqJoinConjuncts(planIds, joinedTblRef); if (candidates == null) return; List<TupleId> joinTupleIds = Lists.newArrayList(); joinTupleIds.addAll(planIds); joinTupleIds.add(tblRefId); for (Expr e : candidates) { // Ignore predicate if one of its children is a constant. if (e.getChild(0).isConstant() || e.getChild(1).isConstant()) continue; Expr rhsExpr = null; if (e.getChild(0).isBoundByTupleIds(tblRefIds)) { rhsExpr = e.getChild(0); } else { Preconditions.checkState(e.getChild(1).isBoundByTupleIds(tblRefIds)); rhsExpr = e.getChild(1); } Expr lhsExpr = null; if (e.getChild(1).isBoundByTupleIds(planIds)) { lhsExpr = e.getChild(1); } else if (e.getChild(0).isBoundByTupleIds(planIds)) { lhsExpr = e.getChild(0); } else { // not an equi-join condition between lhsIds and rhsId continue; } Preconditions.checkState(lhsExpr != rhsExpr); joinPredicates.add(e); BinaryPredicate joinConjunct = new BinaryPredicate(((BinaryPredicate) e).getOp(), lhsExpr, rhsExpr); joinConjunct.analyzeNoThrow(analyzer); joinConjuncts.add(joinConjunct); } if (!joinPredicates.isEmpty()) return; Preconditions.checkState(joinConjuncts.isEmpty()); // construct joinConjunct entries derived from equivalence class membership List<SlotId> lhsSlotIds = Lists.newArrayList(); for (SlotDescriptor slotDesc : joinedTblRef.getDesc().getSlots()) { analyzer.getEquivSlots(slotDesc.getId(), planIds, lhsSlotIds); if (!lhsSlotIds.isEmpty()) { // construct a BinaryPredicates in order to get correct casting; // we only do this for one of the equivalent slots, all the other implied // equalities are redundant BinaryPredicate pred = analyzer.createEqPredicate(lhsSlotIds.get(0), slotDesc.getId()); joinConjuncts.add(pred); } } } /** * Create a node to join outer with inner. Either the outer or the inner may be a plan * created from a table ref (but not both), and the corresponding outer/innerRef * should be non-null. */ private PlanNode createJoinNode(Analyzer analyzer, PlanNode outer, PlanNode inner, TableRef outerRef, TableRef innerRef) throws ImpalaException { Preconditions.checkState(innerRef != null ^ outerRef != null); TableRef tblRef = (innerRef != null) ? innerRef : outerRef; List<BinaryPredicate> eqJoinConjuncts = Lists.newArrayList(); List<Expr> eqJoinPredicates = Lists.newArrayList(); // get eq join predicates for the TableRefs' ids (not the PlanNodes' ids, which // are materialized) if (innerRef != null) { getHashLookupJoinConjuncts(analyzer, outer.getTblRefIds(), innerRef, eqJoinConjuncts, eqJoinPredicates); // Outer joins should only use On-clause predicates as eqJoinConjuncts. if (!innerRef.getJoinOp().isOuterJoin()) { analyzer.createEquivConjuncts(outer.getTblRefIds(), innerRef.getId(), eqJoinConjuncts); } } else { getHashLookupJoinConjuncts(analyzer, inner.getTblRefIds(), outerRef, eqJoinConjuncts, eqJoinPredicates); // Outer joins should only use On-clause predicates as eqJoinConjuncts. if (!outerRef.getJoinOp().isOuterJoin()) { analyzer.createEquivConjuncts(inner.getTblRefIds(), outerRef.getId(), eqJoinConjuncts); } // Reverse the lhs/rhs of the join conjuncts. for (BinaryPredicate eqJoinConjunct : eqJoinConjuncts) { Expr swapTmp = eqJoinConjunct.getChild(0); eqJoinConjunct.setChild(0, eqJoinConjunct.getChild(1)); eqJoinConjunct.setChild(1, swapTmp); } } // Handle implicit cross joins if (eqJoinConjuncts.isEmpty()) { // Since our only implementation of semi and outer joins is hash-based, and we do // not re-order semi and outer joins, we must have eqJoinConjuncts here to execute // this query. // TODO Revisit when we add more semi/join implementations. if (tblRef.getJoinOp().isOuterJoin() || tblRef.getJoinOp().isSemiJoin()) { throw new NotImplementedException( String.format("%s join with '%s' without equi-join " + "conjuncts is not supported.", tblRef.getJoinOp().isOuterJoin() ? "Outer" : "Semi", innerRef.getAliasAsName())); } CrossJoinNode result = new CrossJoinNode(outer, inner); result.init(analyzer); return result; } // Handle explicit cross joins with equi join conditions if (tblRef.getJoinOp() == JoinOperator.CROSS_JOIN) { tblRef.setJoinOp(JoinOperator.INNER_JOIN); } analyzer.markConjunctsAssigned(eqJoinPredicates); List<Expr> otherJoinConjuncts = Lists.newArrayList(); if (tblRef.getJoinOp().isOuterJoin()) { // Also assign conjuncts from On clause. All remaining unassigned conjuncts // that can be evaluated by this join are assigned in createSelectPlan(). otherJoinConjuncts = analyzer.getUnassignedOjConjuncts(tblRef); } else if (tblRef.getJoinOp().isSemiJoin()) { // Unassigned conjuncts bound by the invisible tuple id of a semi join must have // come from the join's On-clause, and therefore, must be added to the other join // conjuncts to produce correct results. otherJoinConjuncts = analyzer.getUnassignedConjuncts(tblRef.getAllTupleIds(), false); if (tblRef.getJoinOp().isNullAwareLeftAntiJoin()) { boolean hasNullMatchingEqOperator = false; // Keep only the null-matching eq conjunct in the eqJoinConjuncts and move // all the others in otherJoinConjuncts. The BE relies on this // separation for correct execution of the null-aware left anti join. Iterator<BinaryPredicate> it = eqJoinConjuncts.iterator(); while (it.hasNext()) { BinaryPredicate conjunct = it.next(); if (!conjunct.isNullMatchingEq()) { otherJoinConjuncts.add(conjunct); it.remove(); } else { // Only one null-matching eq conjunct is allowed Preconditions.checkState(!hasNullMatchingEqOperator); hasNullMatchingEqOperator = true; } } Preconditions.checkState(hasNullMatchingEqOperator); } } analyzer.markConjunctsAssigned(otherJoinConjuncts); HashJoinNode result = new HashJoinNode(outer, inner, tblRef, eqJoinConjuncts, otherJoinConjuncts); result.init(analyzer); return result; } /** * Create a tree of PlanNodes for the given tblRef, which can be a BaseTableRef or a * InlineViewRef */ private PlanNode createTableRefNode(Analyzer analyzer, TableRef tblRef) throws ImpalaException { if (tblRef instanceof BaseTableRef) { return createScanNode(analyzer, tblRef); } if (tblRef instanceof InlineViewRef) { return createInlineViewPlan(analyzer, (InlineViewRef) tblRef); } throw new InternalException("unknown TableRef node"); } /** * Create a plan tree corresponding to 'unionOperands' for the given unionStmt. * The individual operands' plan trees are attached to a single UnionNode. * If unionDistinctPlan is not null, it is expected to contain the plan for the * distinct portion of the given unionStmt. The unionDistinctPlan is then added * as a child of the returned UnionNode. */ private UnionNode createUnionPlan(Analyzer analyzer, UnionStmt unionStmt, List<UnionOperand> unionOperands, PlanNode unionDistinctPlan) throws ImpalaException { UnionNode unionNode = new UnionNode(ctx_.getNextNodeId(), unionStmt.getTupleId()); for (UnionOperand op : unionOperands) { QueryStmt queryStmt = op.getQueryStmt(); if (op.isDropped()) continue; if (queryStmt instanceof SelectStmt) { SelectStmt selectStmt = (SelectStmt) queryStmt; if (selectStmt.getTableRefs().isEmpty()) { unionNode.addConstExprList(selectStmt.getBaseTblResultExprs()); continue; } } PlanNode opPlan = createQueryPlan(queryStmt, analyzer, false); if (opPlan instanceof EmptySetNode) continue; unionNode.addChild(opPlan, op.getQueryStmt().getBaseTblResultExprs()); } if (unionDistinctPlan != null) { Preconditions.checkState(unionStmt.hasDistinctOps()); Preconditions.checkState(unionDistinctPlan instanceof AggregationNode); unionNode.addChild(unionDistinctPlan, unionStmt.getDistinctAggInfo().getGroupingExprs()); } unionNode.init(analyzer); return unionNode; } /** * Returns plan tree for unionStmt: * - distinctOperands' plan trees are collected in a single UnionNode * and duplicates removed via distinct aggregation * - the output of that plus the allOperands' plan trees are collected in * another UnionNode which materializes the result of unionStmt * - if any of the union operands contains analytic exprs, we avoid pushing * predicates directly into the operands and instead evaluate them * *after* the final UnionNode (see createInlineViewPlan() for the reasoning) * TODO: optimize this by still pushing predicates into the union operands * that don't contain analytic exprs and evaluating the conjuncts in Select * directly above the AnalyticEvalNodes */ private PlanNode createUnionPlan(UnionStmt unionStmt, Analyzer analyzer) throws ImpalaException { List<Expr> conjuncts = analyzer.getUnassignedConjuncts(unionStmt.getTupleId().asList(), false); if (!unionStmt.hasAnalyticExprs()) { // Turn unassigned predicates for unionStmt's tupleId_ into predicates for // the individual operands. // Do this prior to creating the operands' plan trees so they get a chance to // pick up propagated predicates. for (UnionOperand op : unionStmt.getOperands()) { List<Expr> opConjuncts = Expr.substituteList(conjuncts, op.getSmap(), analyzer, false); op.getAnalyzer().registerConjuncts(opConjuncts); // Some of the opConjuncts have become constant and eval'd to false, or an // ancestor block is already guaranteed to return empty results. if (op.getAnalyzer().hasEmptyResultSet()) op.drop(); } analyzer.markConjunctsAssigned(conjuncts); } else { // mark slots referenced by the yet-unassigned conjuncts analyzer.materializeSlots(conjuncts); } // mark slots after predicate propagation but prior to plan tree generation unionStmt.materializeRequiredSlots(analyzer); PlanNode result = null; // create DISTINCT tree if (unionStmt.hasDistinctOps()) { result = createUnionPlan(analyzer, unionStmt, unionStmt.getDistinctOperands(), null); result = new AggregationNode(ctx_.getNextNodeId(), result, unionStmt.getDistinctAggInfo()); result.init(analyzer); } // create ALL tree if (unionStmt.hasAllOps()) { result = createUnionPlan(analyzer, unionStmt, unionStmt.getAllOperands(), result); } if (unionStmt.hasAnalyticExprs()) { result = addUnassignedConjuncts(analyzer, unionStmt.getTupleId().asList(), result); } return result; } }