Java tutorial
// Copyright 2012 Cloudera Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.cloudera.impala.analysis; import java.util.ArrayList; import java.util.List; import java.util.Set; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.cloudera.impala.catalog.Column; import com.cloudera.impala.common.AnalysisException; import com.cloudera.impala.common.ColumnAliasGenerator; import com.cloudera.impala.common.TableAliasGenerator; import com.cloudera.impala.common.TreeNode; import com.google.common.base.Preconditions; import com.google.common.base.Predicates; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Sets; /** * Representation of a single select block, including GROUP BY, ORDER BY and HAVING * clauses. */ public class SelectStmt extends QueryStmt { private final static Logger LOG = LoggerFactory.getLogger(SelectStmt.class); protected SelectList selectList_; protected final ArrayList<String> colLabels_; // lower case column labels protected final List<TableRef> tableRefs_; protected Expr whereClause_; protected ArrayList<Expr> groupingExprs_; protected final Expr havingClause_; // original having clause // havingClause with aliases and agg output resolved private Expr havingPred_; // set if we have any kind of aggregation operation, include SELECT DISTINCT private AggregateInfo aggInfo_; // set if we have AnalyticExprs in the select list/order by clause private AnalyticInfo analyticInfo_; // SQL string of this SelectStmt before inline-view expression substitution. // Set in analyze(). protected String sqlString_; // substitutes all exprs in this select block to reference base tables // directly private ExprSubstitutionMap baseTblSmap_ = new ExprSubstitutionMap(); SelectStmt(SelectList selectList, List<TableRef> tableRefList, Expr wherePredicate, ArrayList<Expr> groupingExprs, Expr havingPredicate, ArrayList<OrderByElement> orderByElements, LimitElement limitElement) { super(orderByElements, limitElement); this.selectList_ = selectList; if (tableRefList == null) { this.tableRefs_ = Lists.newArrayList(); } else { this.tableRefs_ = tableRefList; } this.whereClause_ = wherePredicate; this.groupingExprs_ = groupingExprs; this.havingClause_ = havingPredicate; this.colLabels_ = Lists.newArrayList(); this.havingPred_ = null; this.aggInfo_ = null; this.sortInfo_ = null; // Set left table refs to ensure correct toSql() before analysis. for (int i = 1; i < tableRefs_.size(); ++i) { tableRefs_.get(i).setLeftTblRef(tableRefs_.get(i - 1)); } } /** * @return the original select list items from the query */ public SelectList getSelectList() { return selectList_; } /** * @return the HAVING clause post-analysis and with aliases resolved */ public Expr getHavingPred() { return havingPred_; } public List<TableRef> getTableRefs() { return tableRefs_; } public boolean hasWhereClause() { return whereClause_ != null; } public boolean hasGroupByClause() { return groupingExprs_ != null; } public Expr getWhereClause() { return whereClause_; } public void setWhereClause(Expr whereClause) { whereClause_ = whereClause; } public AggregateInfo getAggInfo() { return aggInfo_; } public boolean hasAggInfo() { return aggInfo_ != null; } public AnalyticInfo getAnalyticInfo() { return analyticInfo_; } public boolean hasAnalyticInfo() { return analyticInfo_ != null; } @Override public ArrayList<String> getColLabels() { return colLabels_; } public ExprSubstitutionMap getBaseTblSmap() { return baseTblSmap_; } // Column alias generator used during query rewriting. private ColumnAliasGenerator columnAliasGenerator_ = null; public ColumnAliasGenerator getColumnAliasGenerator() { if (columnAliasGenerator_ == null) { columnAliasGenerator_ = new ColumnAliasGenerator(colLabels_, null); } return columnAliasGenerator_; } // Table alias generator used during query rewriting. private TableAliasGenerator tableAliasGenerator_ = null; public TableAliasGenerator getTableAliasGenerator() { if (tableAliasGenerator_ == null) { tableAliasGenerator_ = new TableAliasGenerator(analyzer_, null); } return tableAliasGenerator_; } /** * Creates resultExprs and baseTblResultExprs. */ @Override public void analyze(Analyzer analyzer) throws AnalysisException { super.analyze(analyzer); // Start out with table refs to establish aliases. TableRef leftTblRef = null; // the one to the left of tblRef for (int i = 0; i < tableRefs_.size(); ++i) { // Resolve and replace non-InlineViewRef table refs with a BaseTableRef or ViewRef. TableRef tblRef = tableRefs_.get(i); tblRef = analyzer.resolveTableRef(tblRef); Preconditions.checkNotNull(tblRef); tableRefs_.set(i, tblRef); tblRef.setLeftTblRef(leftTblRef); try { tblRef.analyze(analyzer); } catch (AnalysisException e) { // Only re-throw the exception if no tables are missing. if (analyzer.getMissingTbls().isEmpty()) throw e; } leftTblRef = tblRef; } // All tableRefs have been analyzed, but at least one table was found missing. // There is no reason to proceed with analysis past this point. if (!analyzer.getMissingTbls().isEmpty()) { throw new AnalysisException("Found missing tables. Aborting analysis."); } // analyze plan hints from select list selectList_.analyzePlanHints(analyzer); // populate resultExprs_, aliasSmap_, and colLabels_ for (int i = 0; i < selectList_.getItems().size(); ++i) { SelectListItem item = selectList_.getItems().get(i); if (item.isStar()) { TableName tblName = item.getTblName(); if (tblName == null) { expandStar(analyzer); } else { expandStar(analyzer, tblName); } } else { // Analyze the resultExpr before generating a label to ensure enforcement // of expr child and depth limits (toColumn() label may call toSql()). item.getExpr().analyze(analyzer); if (item.getExpr().contains(Predicates.instanceOf(Subquery.class))) { throw new AnalysisException("Subqueries are not supported in the select list."); } resultExprs_.add(item.getExpr()); String label = item.toColumnLabel(i, analyzer.useHiveColLabels()); SlotRef aliasRef = new SlotRef(null, label); Expr existingAliasExpr = aliasSmap_.get(aliasRef); if (existingAliasExpr != null && !existingAliasExpr.equals(item.getExpr())) { // If we have already seen this alias, it refers to more than one column and // therefore is ambiguous. ambiguousAliasList_.add(aliasRef); } aliasSmap_.put(aliasRef, item.getExpr().clone()); colLabels_.add(label); } } // The root stmt may not return a complex-typed value directly because we'd need to // serialize it in a meaningful way. We allow complex types in the select list for // non-root stmts to support views. for (Expr expr : resultExprs_) { if (expr.getType().isComplexType() && analyzer.isRootAnalyzer()) { throw new AnalysisException(String.format( "Expr '%s' in select list of root statement returns a complex type '%s'.\n" + "Only scalar types are allowed in the select list of the root statement.", expr.toSql(), expr.getType().toSql())); } } if (TreeNode.contains(resultExprs_, AnalyticExpr.class)) { if (tableRefs_.isEmpty()) { throw new AnalysisException("Analytic expressions require FROM clause."); } // do this here, not after analyzeAggregation(), otherwise the AnalyticExprs // will get substituted away if (selectList_.isDistinct()) { throw new AnalysisException("cannot combine SELECT DISTINCT with analytic functions"); } } if (whereClause_ != null) { whereClause_.analyze(analyzer); if (whereClause_.contains(Expr.isAggregatePredicate())) { throw new AnalysisException("aggregate function not allowed in WHERE clause"); } whereClause_.checkReturnsBool("WHERE clause", false); Expr e = whereClause_.findFirstOf(AnalyticExpr.class); if (e != null) { throw new AnalysisException("WHERE clause must not contain analytic expressions: " + e.toSql()); } analyzer.registerConjuncts(whereClause_, false); } createSortInfo(analyzer); analyzeAggregation(analyzer); analyzeAnalytics(analyzer); if (evaluateOrderBy_) createSortTupleInfo(analyzer); // Remember the SQL string before inline-view expression substitution. sqlString_ = toSql(); resolveInlineViewRefs(analyzer); // If this block's select-project-join portion returns an empty result set and the // block has no aggregation, then mark this block as returning an empty result set. if (analyzer.hasEmptySpjResultSet() && aggInfo_ == null) { analyzer.setHasEmptyResultSet(); } if (aggInfo_ != null) LOG.debug("post-analysis " + aggInfo_.debugString()); } /** * Marks all unassigned join predicates as well as exprs in aggInfo and sortInfo. */ @Override public void materializeRequiredSlots(Analyzer analyzer) { // Mark unassigned join predicates. Some predicates that must be evaluated by a join // can also be safely evaluated below the join (picked up by getBoundPredicates()). // Such predicates will be marked twice and that is ok. List<Expr> unassigned = analyzer.getUnassignedConjuncts(getTableRefIds(), true); List<Expr> unassignedJoinConjuncts = Lists.newArrayList(); for (Expr e : unassigned) { if (analyzer.evalByJoin(e)) unassignedJoinConjuncts.add(e); } List<Expr> baseTblJoinConjuncts = Expr.substituteList(unassignedJoinConjuncts, baseTblSmap_, analyzer, false); materializeSlots(analyzer, baseTblJoinConjuncts); if (evaluateOrderBy_) { // mark ordering exprs before marking agg/analytic exprs because they could contain // agg/analytic exprs that are not referenced anywhere but the ORDER BY clause sortInfo_.materializeRequiredSlots(analyzer, baseTblSmap_); } if (hasAnalyticInfo()) { // Mark analytic exprs before marking agg exprs because they could contain agg // exprs that are not referenced anywhere but the analytic expr. // Gather unassigned predicates and mark their slots. It is not desirable // to account for propagated predicates because if an analytic expr is only // referenced by a propagated predicate, then it's better to not materialize the // analytic expr at all. ArrayList<TupleId> tids = Lists.newArrayList(); getMaterializedTupleIds(tids); // includes the analytic tuple List<Expr> conjuncts = analyzer.getUnassignedConjuncts(tids, false); materializeSlots(analyzer, conjuncts); analyticInfo_.materializeRequiredSlots(analyzer, baseTblSmap_); } if (aggInfo_ != null) { // mark all agg exprs needed for HAVING pred and binding predicates as materialized // before calling AggregateInfo.materializeRequiredSlots(), otherwise they won't // show up in AggregateInfo.getMaterializedAggregateExprs() ArrayList<Expr> havingConjuncts = Lists.newArrayList(); if (havingPred_ != null) havingConjuncts.add(havingPred_); // Ignore predicates bound to a group-by slot because those // are already evaluated below this agg node (e.g., in a scan). Set<SlotId> groupBySlots = Sets.newHashSet(); for (int i = 0; i < aggInfo_.getGroupingExprs().size(); ++i) { groupBySlots.add(aggInfo_.getOutputTupleDesc().getSlots().get(i).getId()); } // Binding predicates are assigned to the final output tuple of the aggregation, // which is the tuple of the 2nd phase agg for distinct aggs. ArrayList<Expr> bindingPredicates = analyzer.getBoundPredicates(aggInfo_.getResultTupleId(), groupBySlots, false); havingConjuncts.addAll(bindingPredicates); havingConjuncts.addAll(analyzer.getUnassignedConjuncts(aggInfo_.getResultTupleId().asList(), false)); materializeSlots(analyzer, havingConjuncts); aggInfo_.materializeRequiredSlots(analyzer, baseTblSmap_); } } /** * Populates baseTblSmap_ with our combined inline view smap and creates * baseTblResultExprs. */ protected void resolveInlineViewRefs(Analyzer analyzer) throws AnalysisException { // Gather the inline view substitution maps from the enclosed inline views for (TableRef tblRef : tableRefs_) { if (tblRef instanceof InlineViewRef) { InlineViewRef inlineViewRef = (InlineViewRef) tblRef; baseTblSmap_ = ExprSubstitutionMap.combine(baseTblSmap_, inlineViewRef.getBaseTblSmap()); } } baseTblResultExprs_ = Expr.trySubstituteList(resultExprs_, baseTblSmap_, analyzer, false); LOG.trace("baseTblSmap_: " + baseTblSmap_.debugString()); LOG.trace("resultExprs: " + Expr.debugString(resultExprs_)); LOG.trace("baseTblResultExprs: " + Expr.debugString(baseTblResultExprs_)); } public List<TupleId> getTableRefIds() { List<TupleId> result = Lists.newArrayList(); for (TableRef ref : tableRefs_) { result.add(ref.getId()); } return result; } /** * Expand "*" select list item, ignoring semi-joined tables. */ private void expandStar(Analyzer analyzer) throws AnalysisException { if (tableRefs_.isEmpty()) { throw new AnalysisException("'*' expression in select list requires FROM clause."); } // expand in From clause order for (TableRef tableRef : tableRefs_) { if (analyzer.isSemiJoined(tableRef.getId())) continue; expandStar(analyzer, tableRef.getAliasAsName(), tableRef.getDesc()); } } /** * Expand "<tbl>.*" select list item. */ private void expandStar(Analyzer analyzer, TableName tblName) throws AnalysisException { TupleDescriptor tupleDesc = analyzer.getDescriptor(tblName); if (tupleDesc == null) { throw new AnalysisException("unknown table alias '" + tblName.toString() + "'"); } if (analyzer.isSemiJoined(tupleDesc.getId())) { throw new AnalysisException(String .format("'*' expression cannot reference semi-/anti-joined table '%s'", tblName.toString())); } expandStar(analyzer, tblName, tupleDesc); } /** * Expand "*" for a particular tuple descriptor by appending analyzed slot refs for * each column to selectListExprs. */ private void expandStar(Analyzer analyzer, TableName tblName, TupleDescriptor desc) throws AnalysisException { Preconditions.checkState(!analyzer.isSemiJoined(desc.getId())); for (Column col : desc.getTable().getColumnsInHiveOrder()) { SlotRef slotRef = new SlotRef(tblName, col.getName()); slotRef.analyze(analyzer); resultExprs_.add(slotRef); colLabels_.add(col.getName().toLowerCase()); } } /** * Analyze aggregation-relevant components of the select block (Group By clause, * select list, Order By clause), substitute AVG with SUM/COUNT, create the * AggregationInfo, including the agg output tuple, and transform all post-agg exprs * given AggregationInfo's smap. */ private void analyzeAggregation(Analyzer analyzer) throws AnalysisException { if (groupingExprs_ == null && !selectList_.isDistinct() && !TreeNode.contains(resultExprs_, Expr.isAggregatePredicate()) && (sortInfo_ == null || !TreeNode.contains(sortInfo_.getOrderingExprs(), Expr.isAggregatePredicate()))) { // we're not computing aggregates return; } // If we're computing an aggregate, we must have a FROM clause. if (tableRefs_.size() == 0) { throw new AnalysisException("aggregation without a FROM clause is not allowed"); } if ((groupingExprs_ != null || TreeNode.contains(resultExprs_, Expr.isAggregatePredicate())) && selectList_.isDistinct()) { throw new AnalysisException("cannot combine SELECT DISTINCT with aggregate functions or GROUP BY"); } // disallow '*' and explicit GROUP BY (we can't group by '*', and if you need to // name all star-expanded cols in the group by clause you might as well do it // in the select list) if (groupingExprs_ != null) { for (SelectListItem item : selectList_.getItems()) { if (item.isStar()) { throw new AnalysisException("cannot combine '*' in select list with GROUP BY: " + item.toSql()); } } } // disallow subqueries in the GROUP BY clause if (groupingExprs_ != null) { for (Expr expr : groupingExprs_) { if (expr.contains(Predicates.instanceOf(Subquery.class))) { throw new AnalysisException("Subqueries are not supported in the GROUP BY clause."); } } } // analyze grouping exprs ArrayList<Expr> groupingExprsCopy = Lists.newArrayList(); if (groupingExprs_ != null) { // make a deep copy here, we don't want to modify the original // exprs during analysis (in case we need to print them later) groupingExprsCopy = Expr.cloneList(groupingExprs_); substituteOrdinals(groupingExprsCopy, "GROUP BY", analyzer); Expr ambiguousAlias = getFirstAmbiguousAlias(groupingExprsCopy); if (ambiguousAlias != null) { throw new AnalysisException( "Column '" + ambiguousAlias.toSql() + "' in GROUP BY clause is ambiguous"); } groupingExprsCopy = Expr.trySubstituteList(groupingExprsCopy, aliasSmap_, analyzer, false); for (int i = 0; i < groupingExprsCopy.size(); ++i) { groupingExprsCopy.get(i).analyze(analyzer); if (groupingExprsCopy.get(i).contains(Expr.isAggregatePredicate())) { // reference the original expr in the error msg throw new AnalysisException("GROUP BY expression must not contain aggregate functions: " + groupingExprs_.get(i).toSql()); } if (groupingExprsCopy.get(i).contains(AnalyticExpr.class)) { // reference the original expr in the error msg throw new AnalysisException("GROUP BY expression must not contain analytic expressions: " + groupingExprsCopy.get(i).toSql()); } } } // analyze having clause if (havingClause_ != null) { if (havingClause_.contains(Predicates.instanceOf(Subquery.class))) { throw new AnalysisException("Subqueries are not supported in the HAVING clause."); } // substitute aliases in place (ordinals not allowed in having clause) havingPred_ = havingClause_.substitute(aliasSmap_, analyzer, false); havingPred_.checkReturnsBool("HAVING clause", true); // can't contain analytic exprs Expr analyticExpr = havingPred_.findFirstOf(AnalyticExpr.class); if (analyticExpr != null) { throw new AnalysisException( "HAVING clause must not contain analytic expressions: " + analyticExpr.toSql()); } } // Collect the aggregate expressions from the SELECT, HAVING and ORDER BY clauses // of this statement. ArrayList<FunctionCallExpr> aggExprs = Lists.newArrayList(); TreeNode.collect(resultExprs_, Expr.isAggregatePredicate(), aggExprs); if (havingPred_ != null) { havingPred_.collect(Expr.isAggregatePredicate(), aggExprs); } if (sortInfo_ != null) { // TODO: Avoid evaluating aggs in ignored order-bys TreeNode.collect(sortInfo_.getOrderingExprs(), Expr.isAggregatePredicate(), aggExprs); } // Optionally rewrite all count(distinct <expr>) into equivalent NDV() calls. ExprSubstitutionMap ndvSmap = null; if (analyzer.getQueryCtx().getRequest().query_options.appx_count_distinct) { ndvSmap = new ExprSubstitutionMap(); for (FunctionCallExpr aggExpr : aggExprs) { if (!aggExpr.isDistinct() || !aggExpr.getFnName().getFunction().equals("count") || aggExpr.getParams().size() != 1) { continue; } FunctionCallExpr ndvFnCall = new FunctionCallExpr("ndv", aggExpr.getParams().exprs()); ndvFnCall.analyzeNoThrow(analyzer); Preconditions.checkState(ndvFnCall.getType().equals(aggExpr.getType())); ndvSmap.put(aggExpr, ndvFnCall); } // Replace all count(distinct <expr>) with NDV(<expr>). List<Expr> substAggExprs = Expr.substituteList(aggExprs, ndvSmap, analyzer, false); aggExprs.clear(); for (Expr aggExpr : substAggExprs) { Preconditions.checkState(aggExpr instanceof FunctionCallExpr); aggExprs.add((FunctionCallExpr) aggExpr); } } // When DISTINCT aggregates are present, non-distinct (i.e. ALL) aggregates are // evaluated in two phases (see AggregateInfo for more details). In particular, // COUNT(c) in "SELECT COUNT(c), AGG(DISTINCT d) from R" is transformed to // "SELECT SUM(cnt) FROM (SELECT COUNT(c) as cnt from R group by d ) S". // Since a group-by expression is added to the inner query it returns no rows if // R is empty, in which case the SUM of COUNTs will return NULL. // However the original COUNT(c) should have returned 0 instead of NULL in this case. // Therefore, COUNT([ALL]) is transformed into zeroifnull(COUNT([ALL]) if // i) There is no GROUP-BY clause, and // ii) Other DISTINCT aggregates are present. ExprSubstitutionMap countAllMap = createCountAllMap(aggExprs, analyzer); countAllMap = ExprSubstitutionMap.compose(ndvSmap, countAllMap, analyzer); List<Expr> substitutedAggs = Expr.substituteList(aggExprs, countAllMap, analyzer, false); aggExprs.clear(); TreeNode.collect(substitutedAggs, Expr.isAggregatePredicate(), aggExprs); createAggInfo(groupingExprsCopy, aggExprs, analyzer); // combine avg smap with the one that produces the final agg output AggregateInfo finalAggInfo = aggInfo_.getSecondPhaseDistinctAggInfo() != null ? aggInfo_.getSecondPhaseDistinctAggInfo() : aggInfo_; ExprSubstitutionMap combinedSmap = ExprSubstitutionMap.compose(countAllMap, finalAggInfo.getOutputSmap(), analyzer); LOG.trace("combined smap: " + combinedSmap.debugString()); // change select list, having and ordering exprs to point to agg output. We need // to reanalyze the exprs at this point. LOG.trace("desctbl: " + analyzer.getDescTbl().debugString()); LOG.trace("resultexprs: " + Expr.debugString(resultExprs_)); resultExprs_ = Expr.substituteList(resultExprs_, combinedSmap, analyzer, false); LOG.trace("post-agg selectListExprs: " + Expr.debugString(resultExprs_)); if (havingPred_ != null) { // Make sure the predicate in the HAVING clause does not contain a // subquery. Preconditions.checkState(!havingPred_.contains(Predicates.instanceOf(Subquery.class))); havingPred_ = havingPred_.substitute(combinedSmap, analyzer, false); analyzer.registerConjuncts(havingPred_, true); LOG.debug("post-agg havingPred: " + havingPred_.debugString()); } if (sortInfo_ != null) { sortInfo_.substituteOrderingExprs(combinedSmap, analyzer); LOG.debug("post-agg orderingExprs: " + Expr.debugString(sortInfo_.getOrderingExprs())); } // check that all post-agg exprs point to agg output for (int i = 0; i < selectList_.getItems().size(); ++i) { if (!resultExprs_.get(i).isBound(finalAggInfo.getOutputTupleId())) { throw new AnalysisException("select list expression not produced by aggregation output " + "(missing from GROUP BY clause?): " + selectList_.getItems().get(i).getExpr().toSql()); } } if (orderByElements_ != null) { for (int i = 0; i < orderByElements_.size(); ++i) { if (!sortInfo_.getOrderingExprs().get(i).isBound(finalAggInfo.getOutputTupleId())) { throw new AnalysisException("ORDER BY expression not produced by aggregation output " + "(missing from GROUP BY clause?): " + orderByElements_.get(i).getExpr().toSql()); } } } if (havingPred_ != null) { if (!havingPred_.isBound(finalAggInfo.getOutputTupleId())) { throw new AnalysisException("HAVING clause not produced by aggregation output " + "(missing from GROUP BY clause?): " + havingClause_.toSql()); } } } /** * Create a map from COUNT([ALL]) -> zeroifnull(COUNT([ALL])) if * i) There is no GROUP-BY, and * ii) There are other distinct aggregates to be evaluated. * This transformation is necessary for COUNT to correctly return 0 for empty * input relations. */ private ExprSubstitutionMap createCountAllMap(List<FunctionCallExpr> aggExprs, Analyzer analyzer) throws AnalysisException { ExprSubstitutionMap scalarCountAllMap = new ExprSubstitutionMap(); if (groupingExprs_ != null && !groupingExprs_.isEmpty()) { // There are grouping expressions, so no substitution needs to be done. return scalarCountAllMap; } com.google.common.base.Predicate<FunctionCallExpr> isNotDistinctPred = new com.google.common.base.Predicate<FunctionCallExpr>() { public boolean apply(FunctionCallExpr expr) { return !expr.isDistinct(); } }; if (Iterables.all(aggExprs, isNotDistinctPred)) { // Only [ALL] aggs, so no substitution needs to be done. return scalarCountAllMap; } com.google.common.base.Predicate<FunctionCallExpr> isCountPred = new com.google.common.base.Predicate<FunctionCallExpr>() { public boolean apply(FunctionCallExpr expr) { return expr.getFnName().getFunction().equals("count"); } }; Iterable<FunctionCallExpr> countAllAggs = Iterables.filter(aggExprs, Predicates.and(isCountPred, isNotDistinctPred)); for (FunctionCallExpr countAllAgg : countAllAggs) { // Replace COUNT(ALL) with zeroifnull(COUNT(ALL)) ArrayList<Expr> zeroIfNullParam = Lists.newArrayList(countAllAgg.clone()); FunctionCallExpr zeroIfNull = new FunctionCallExpr("zeroifnull", zeroIfNullParam); zeroIfNull.analyze(analyzer); scalarCountAllMap.put(countAllAgg, zeroIfNull); } return scalarCountAllMap; } /** * Create aggInfo for the given grouping and agg exprs. */ private void createAggInfo(ArrayList<Expr> groupingExprs, ArrayList<FunctionCallExpr> aggExprs, Analyzer analyzer) throws AnalysisException { if (selectList_.isDistinct()) { // Create aggInfo for SELECT DISTINCT ... stmt: // - all select list items turn into grouping exprs // - there are no aggregate exprs Preconditions.checkState(groupingExprs.isEmpty()); Preconditions.checkState(aggExprs.isEmpty()); ArrayList<Expr> distinctGroupingExprs = Expr.cloneList(resultExprs_); aggInfo_ = AggregateInfo.create(distinctGroupingExprs, null, null, analyzer); } else { aggInfo_ = AggregateInfo.create(groupingExprs, aggExprs, null, analyzer); } } /** * If the select list contains AnalyticExprs, create AnalyticInfo and substitute * AnalyticExprs using the AnalyticInfo's smap. */ private void analyzeAnalytics(Analyzer analyzer) throws AnalysisException { // collect AnalyticExprs from the SELECT and ORDER BY clauses ArrayList<Expr> analyticExprs = Lists.newArrayList(); TreeNode.collect(resultExprs_, AnalyticExpr.class, analyticExprs); if (sortInfo_ != null) { TreeNode.collect(sortInfo_.getOrderingExprs(), AnalyticExpr.class, analyticExprs); } if (analyticExprs.isEmpty()) return; analyticInfo_ = AnalyticInfo.create(analyticExprs, analyzer); // change select list and ordering exprs to point to analytic output. We need // to reanalyze the exprs at this point. resultExprs_ = Expr.substituteList(resultExprs_, analyticInfo_.getSmap(), analyzer, false); LOG.trace("post-analytic selectListExprs: " + Expr.debugString(resultExprs_)); if (sortInfo_ != null) { sortInfo_.substituteOrderingExprs(analyticInfo_.getSmap(), analyzer); LOG.trace("post-analytic orderingExprs: " + Expr.debugString(sortInfo_.getOrderingExprs())); } } /** * Returns the SQL string corresponding to this SelectStmt. */ @Override public String toSql() { // Return the SQL string before inline-view expression substitution. if (sqlString_ != null) return sqlString_; StringBuilder strBuilder = new StringBuilder(); if (withClause_ != null) { strBuilder.append(withClause_.toSql()); strBuilder.append(" "); } // Select list strBuilder.append("SELECT "); if (selectList_.isDistinct()) { strBuilder.append("DISTINCT "); } if (selectList_.hasPlanHints()) { strBuilder.append(ToSqlUtils.getPlanHintsSql(selectList_.getPlanHints()) + " "); } for (int i = 0; i < selectList_.getItems().size(); ++i) { strBuilder.append(selectList_.getItems().get(i).toSql()); strBuilder.append((i + 1 != selectList_.getItems().size()) ? ", " : ""); } // From clause if (!tableRefs_.isEmpty()) { strBuilder.append(" FROM "); for (int i = 0; i < tableRefs_.size(); ++i) { strBuilder.append(tableRefs_.get(i).toSql()); } } // Where clause if (whereClause_ != null) { strBuilder.append(" WHERE "); strBuilder.append(whereClause_.toSql()); } // Group By clause if (groupingExprs_ != null) { strBuilder.append(" GROUP BY "); for (int i = 0; i < groupingExprs_.size(); ++i) { strBuilder.append(groupingExprs_.get(i).toSql()); strBuilder.append((i + 1 != groupingExprs_.size()) ? ", " : ""); } } // Having clause if (havingClause_ != null) { strBuilder.append(" HAVING "); strBuilder.append(havingClause_.toSql()); } // Order By clause if (orderByElements_ != null) { strBuilder.append(" ORDER BY "); for (int i = 0; i < orderByElements_.size(); ++i) { strBuilder.append(orderByElements_.get(i).toSql()); strBuilder.append((i + 1 != orderByElements_.size()) ? ", " : ""); } } // Limit clause. strBuilder.append(limitElement_.toSql()); return strBuilder.toString(); } /** * If the select statement has a sort/top that is evaluated, then the sort tuple * is materialized. Else, if there is aggregation then the aggregate tuple id is * materialized. Otherwise, all referenced tables are materialized as long as they are * not semi-joined. If there are analytics and no sort, then the returned tuple * ids also include the logical analytic output tuple. */ @Override public void getMaterializedTupleIds(ArrayList<TupleId> tupleIdList) { if (evaluateOrderBy_) { tupleIdList.add(sortInfo_.getSortTupleDescriptor().getId()); } else if (aggInfo_ != null) { // Return the tuple id produced in the final aggregation step. tupleIdList.add(aggInfo_.getResultTupleId()); } else { for (TableRef tblRef : tableRefs_) { // Don't include materialized tuple ids from semi-joined table // refs (see IMPALA-1526) if (tblRef.getJoinOp().isLeftSemiJoin()) continue; // Remove the materialized tuple ids of all the table refs that // are semi-joined by the right semi/anti join. if (tblRef.getJoinOp().isRightSemiJoin()) tupleIdList.clear(); tupleIdList.addAll(tblRef.getMaterializedTupleIds()); } } // We materialize the agg tuple or the table refs together with the analytic tuple. if (hasAnalyticInfo() && !evaluateOrderBy_) { tupleIdList.add(analyticInfo_.getOutputTupleId()); } } private ArrayList<TableRef> cloneTableRefs() { ArrayList<TableRef> clone = Lists.newArrayList(); for (TableRef tblRef : tableRefs_) { clone.add(tblRef.clone()); } return clone; } @Override public QueryStmt clone() { SelectStmt selectClone = new SelectStmt(selectList_.clone(), cloneTableRefs(), (whereClause_ != null) ? whereClause_.clone().reset() : null, (groupingExprs_ != null) ? Expr.resetList(Expr.cloneList(groupingExprs_)) : null, (havingClause_ != null) ? havingClause_.clone().reset() : null, cloneOrderByElements(), (limitElement_ != null) ? limitElement_.clone() : null); selectClone.setWithClause(cloneWithClause()); return selectClone; } /** * Check if the stmt returns a single row. This can happen * in the following cases: * 1. select stmt with a 'limit 1' clause * 2. select stmt with an aggregate function and no group by. * 3. select stmt with no from clause. * * This function may produce false negatives because the cardinality of the * result set also depends on the data a stmt is processing. */ public boolean returnsSingleRow() { // limit 1 clause if (limitElement_ != null && limitElement_.getLimit() == 1) return true; // No from clause (base tables or inline views) if (tableRefs_.isEmpty()) return true; // Aggregation with no group by and no DISTINCT if (hasAggInfo() && !hasGroupByClause() && !selectList_.isDistinct()) return true; // In all other cases, return false. return false; } }