org.apache.impala.analysis.StmtRewriter.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.impala.analysis.StmtRewriter.java

Source

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.impala.analysis;

import java.util.ArrayList;
import java.util.List;

import org.apache.impala.analysis.AnalysisContext.AnalysisResult;
import org.apache.impala.analysis.UnionStmt.UnionOperand;
import org.apache.impala.common.AnalysisException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Preconditions;
import com.google.common.base.Predicates;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;

/**
 * Class representing a statement rewriter. A statement rewriter performs subquery
 * unnesting on an analyzed parse tree.
 * TODO: Now that we have a nested-loop join supporting all join modes we could
 * allow more rewrites, although it is not clear we would always want to.
 */
public class StmtRewriter {
    private final static Logger LOG = LoggerFactory.getLogger(StmtRewriter.class);

    /**
     * Rewrite the statement of an analysis result in-place. Assumes that BetweenPredicates
     * have already been rewritten.
     */
    public static void rewrite(AnalysisResult analysisResult) throws AnalysisException {
        // Analyzed stmt that contains a query statement with subqueries to be rewritten.
        StatementBase stmt = analysisResult.getStmt();
        Preconditions.checkState(stmt.isAnalyzed());
        // Analyzed query statement to be rewritten.
        QueryStmt queryStmt = null;
        if (stmt instanceof QueryStmt) {
            queryStmt = (QueryStmt) analysisResult.getStmt();
        } else if (stmt instanceof InsertStmt) {
            queryStmt = ((InsertStmt) analysisResult.getStmt()).getQueryStmt();
        } else if (stmt instanceof CreateTableAsSelectStmt) {
            queryStmt = ((CreateTableAsSelectStmt) analysisResult.getStmt()).getQueryStmt();
        } else if (analysisResult.isUpdateStmt()) {
            queryStmt = ((UpdateStmt) analysisResult.getStmt()).getQueryStmt();
        } else if (analysisResult.isDeleteStmt()) {
            queryStmt = ((DeleteStmt) analysisResult.getStmt()).getQueryStmt();
        } else {
            throw new AnalysisException("Unsupported statement containing subqueries: " + stmt.toSql());
        }
        rewriteQueryStatement(queryStmt, queryStmt.getAnalyzer());
    }

    /**
     * Calls the appropriate rewrite method based on the specific type of query stmt. See
     * rewriteSelectStatement() and rewriteUnionStatement() documentation.
     */
    public static void rewriteQueryStatement(QueryStmt stmt, Analyzer analyzer) throws AnalysisException {
        Preconditions.checkNotNull(stmt);
        Preconditions.checkNotNull(stmt.isAnalyzed());
        if (stmt instanceof SelectStmt) {
            rewriteSelectStatement((SelectStmt) stmt, analyzer);
        } else if (stmt instanceof UnionStmt) {
            rewriteUnionStatement((UnionStmt) stmt, analyzer);
        } else {
            throw new AnalysisException(
                    "Subqueries not supported for " + stmt.getClass().getSimpleName() + " statements");
        }
    }

    /**
     * Rewrite all the subqueries of a SelectStmt in place. Subqueries
     * are currently supported in FROM and WHERE clauses. The rewrite is performed in
     * place and not in a clone of SelectStmt because it requires the stmt to be analyzed.
     */
    private static void rewriteSelectStatement(SelectStmt stmt, Analyzer analyzer) throws AnalysisException {
        // Rewrite all the subqueries in the FROM clause.
        for (TableRef tblRef : stmt.fromClause_) {
            if (!(tblRef instanceof InlineViewRef))
                continue;
            InlineViewRef inlineViewRef = (InlineViewRef) tblRef;
            rewriteQueryStatement(inlineViewRef.getViewStmt(), inlineViewRef.getAnalyzer());
        }
        // Rewrite all the subqueries in the WHERE clause.
        if (stmt.hasWhereClause()) {
            // Push negation to leaf operands.
            stmt.whereClause_ = Expr.pushNegationToOperands(stmt.whereClause_);
            // Check if we can rewrite the subqueries in the WHERE clause. OR predicates with
            // subqueries are not supported.
            if (hasSubqueryInDisjunction(stmt.whereClause_)) {
                throw new AnalysisException(
                        "Subqueries in OR predicates are not supported: " + stmt.whereClause_.toSql());
            }
            rewriteWhereClauseSubqueries(stmt, analyzer);
        }
        stmt.sqlString_ = null;
        if (LOG.isTraceEnabled())
            LOG.trace("rewritten stmt: " + stmt.toSql());
    }

    /**
     * Rewrite all operands in a UNION. The conditions that apply to SelectStmt rewriting
     * also apply here.
     */
    private static void rewriteUnionStatement(UnionStmt stmt, Analyzer analyzer) throws AnalysisException {
        for (UnionOperand operand : stmt.getOperands()) {
            Preconditions.checkState(operand.getQueryStmt() instanceof SelectStmt);
            StmtRewriter.rewriteSelectStatement((SelectStmt) operand.getQueryStmt(), operand.getAnalyzer());
        }
    }

    /**
     * Returns true if the Expr tree rooted at 'expr' has at least one subquery
     * that participates in a disjunction.
     */
    private static boolean hasSubqueryInDisjunction(Expr expr) {
        if (!(expr instanceof CompoundPredicate))
            return false;
        if (Expr.IS_OR_PREDICATE.apply(expr)) {
            return expr.contains(Subquery.class);
        }
        for (Expr child : expr.getChildren()) {
            if (hasSubqueryInDisjunction(child))
                return true;
        }
        return false;
    }

    /**
     * Rewrite all subqueries of a stmt's WHERE clause. Initially, all the
     * conjuncts containing subqueries are extracted from the WHERE clause and are
     * replaced with true BoolLiterals. Subsequently, each extracted conjunct is
     * merged into its parent select block by converting it into a join.
     * Conjuncts with subqueries that themselves contain conjuncts with subqueries are
     * recursively rewritten in a bottom up fashion.
     *
     * The following example illustrates the bottom up rewriting of nested queries.
     * Suppose we have the following three level nested query Q0:
     *
     * SELECT *
     * FROM T1                                            : Q0
     * WHERE T1.a IN (SELECT a
     *                FROM T2 WHERE T2.b IN (SELECT b
     *                                       FROM T3))
     * AND T1.c < 10;
     *
     * This query will be rewritten as follows. Initially, the IN predicate
     * T1.a IN (SELECT a FROM T2 WHERE T2.b IN (SELECT b FROM T3)) is extracted
     * from the top level block (Q0) since it contains a subquery and is
     * replaced by a true BoolLiteral, resulting in the following query Q1:
     *
     * SELECT * FROM T1 WHERE TRUE : Q1
     *
     * Since the stmt in the extracted predicate contains a conjunct with a subquery,
     * it is also rewritten. As before, rewriting stmt SELECT a FROM T2
     * WHERE T2.b IN (SELECT b FROM T3) works by first extracting the conjunct that
     * contains the subquery (T2.b IN (SELECT b FROM T3)) and substituting it with
     * a true BoolLiteral, producing the following stmt Q2:
     *
     * SELECT a FROM T2 WHERE TRUE : Q2
     *
     * The predicate T2.b IN (SELECT b FROM T3) is then merged with Q2,
     * producing the following unnested query Q3:
     *
     * SELECT a FROM T2 LEFT SEMI JOIN (SELECT b FROM T3) $a$1 ON T2.b = $a$1.b : Q3
     *
     * The extracted IN predicate becomes:
     *
     * T1.a IN (SELECT a FROM T2 LEFT SEMI JOIN (SELECT b FROM T3) $a$1 ON T2.b = $a$1.b)
     *
     * Finally, the rewritten IN predicate is merged with query block Q1,
     * producing the following unnested query (WHERE clauses that contain only
     * conjunctions of true BoolLiterals are eliminated):
     *
     * SELECT *
     * FROM T1 LEFT SEMI JOIN (SELECT a
     *                         FROM T2 LEFT SEMI JOIN (SELECT b FROM T3) $a$1
     *                         ON T2.b = $a$1.b) $a$1
     * ON $a$1.a = T1.a
     * WHERE T1.c < 10;
     *
     */
    private static void rewriteWhereClauseSubqueries(SelectStmt stmt, Analyzer analyzer) throws AnalysisException {
        int numTableRefs = stmt.fromClause_.size();
        ArrayList<Expr> exprsWithSubqueries = Lists.newArrayList();
        ExprSubstitutionMap smap = new ExprSubstitutionMap();
        // Check if all the conjuncts in the WHERE clause that contain subqueries
        // can currently be rewritten as a join.
        for (Expr conjunct : stmt.whereClause_.getConjuncts()) {
            List<Subquery> subqueries = Lists.newArrayList();
            conjunct.collectAll(Predicates.instanceOf(Subquery.class), subqueries);
            if (subqueries.size() == 0)
                continue;
            if (subqueries.size() > 1) {
                throw new AnalysisException(
                        "Multiple subqueries are not supported in " + "expression: " + conjunct.toSql());
            }
            if (!(conjunct instanceof InPredicate) && !(conjunct instanceof ExistsPredicate)
                    && !(conjunct instanceof BinaryPredicate) && !conjunct.contains(Expr.IS_SCALAR_SUBQUERY)) {
                throw new AnalysisException(
                        "Non-scalar subquery is not supported in " + "expression: " + conjunct.toSql());
            }

            if (conjunct instanceof ExistsPredicate) {
                // Check if we can determine the result of an ExistsPredicate during analysis.
                // If so, replace the predicate with a BoolLiteral predicate and remove it from
                // the list of predicates to be rewritten.
                BoolLiteral boolLiteral = replaceExistsPredicate((ExistsPredicate) conjunct);
                if (boolLiteral != null) {
                    boolLiteral.analyze(analyzer);
                    smap.put(conjunct, boolLiteral);
                    continue;
                }
            }

            // Replace all the supported exprs with subqueries with true BoolLiterals
            // using an smap.
            BoolLiteral boolLiteral = new BoolLiteral(true);
            boolLiteral.analyze(analyzer);
            smap.put(conjunct, boolLiteral);
            exprsWithSubqueries.add(conjunct);
        }
        stmt.whereClause_ = stmt.whereClause_.substitute(smap, analyzer, false);

        boolean hasNewVisibleTuple = false;
        // Recursively rewrite all the exprs that contain subqueries and merge them
        // with 'stmt'.
        for (Expr expr : exprsWithSubqueries) {
            if (mergeExpr(stmt, rewriteExpr(expr, analyzer), analyzer)) {
                hasNewVisibleTuple = true;
            }
        }
        if (canEliminate(stmt.whereClause_))
            stmt.whereClause_ = null;
        if (hasNewVisibleTuple)
            replaceUnqualifiedStarItems(stmt, numTableRefs);
    }

    /**
     * Replace an ExistsPredicate that contains a subquery with a BoolLiteral if we
     * can determine its result without evaluating it. Return null if the result of the
     * ExistsPredicate can only be determined at run-time.
     */
    private static BoolLiteral replaceExistsPredicate(ExistsPredicate predicate) {
        Subquery subquery = predicate.getSubquery();
        Preconditions.checkNotNull(subquery);
        SelectStmt subqueryStmt = (SelectStmt) subquery.getStatement();
        BoolLiteral boolLiteral = null;
        if (subqueryStmt.getAnalyzer().hasEmptyResultSet()) {
            boolLiteral = new BoolLiteral(predicate.isNotExists());
        } else if (subqueryStmt.hasAggInfo() && subqueryStmt.getAggInfo().hasAggregateExprs()
                && !subqueryStmt.hasAnalyticInfo() && subqueryStmt.getHavingPred() == null) {
            boolLiteral = new BoolLiteral(!predicate.isNotExists());
        }
        return boolLiteral;
    }

    /**
     * Modifies in place an expr that contains a subquery by rewriting its
     * subquery stmt. The modified analyzed expr is returned.
     */
    private static Expr rewriteExpr(Expr expr, Analyzer analyzer) throws AnalysisException {
        // Extract the subquery and rewrite it.
        Subquery subquery = expr.getSubquery();
        Preconditions.checkNotNull(subquery);
        rewriteSelectStatement((SelectStmt) subquery.getStatement(), subquery.getAnalyzer());
        // Create a new Subquery with the rewritten stmt and use a substitution map
        // to replace the original subquery from the expr.
        QueryStmt rewrittenStmt = subquery.getStatement().clone();
        rewrittenStmt.reset();
        Subquery newSubquery = new Subquery(rewrittenStmt);
        newSubquery.analyze(analyzer);
        ExprSubstitutionMap smap = new ExprSubstitutionMap();
        smap.put(subquery, newSubquery);
        return expr.substitute(smap, analyzer, false);
    }

    /**
     * Merge an expr containing a subquery with a SelectStmt 'stmt' by
     * converting the subquery stmt of the former into an inline view and
     * creating a join between the new inline view and the right-most table
     * from 'stmt'. Return true if the rewrite introduced a new visible tuple
     * due to a CROSS JOIN or a LEFT OUTER JOIN.
     *
     * This process works as follows:
     * 1. Create a new inline view with the subquery as the view's stmt. Changes
     *    made to the subquery's stmt will affect the inline view.
     * 2. Extract all correlated predicates from the subquery's WHERE
     *    clause; the subquery's select list may be extended with new items and a
     *    GROUP BY clause may be added.
     * 3. Add the inline view to stmt's tableRefs and create a
     *    join (left semi join, anti-join, left outer join for agg functions
     *    that return a non-NULL value for an empty input, or cross-join) with
     *    stmt's right-most table.
     * 4. Initialize the ON clause of the new join from the original subquery
     *    predicate and the new inline view.
     * 5. Apply expr substitutions such that the extracted correlated predicates
     *    refer to columns of the new inline view.
     * 6. Add all extracted correlated predicates to the ON clause.
     */
    private static boolean mergeExpr(SelectStmt stmt, Expr expr, Analyzer analyzer) throws AnalysisException {
        Preconditions.checkNotNull(expr);
        Preconditions.checkNotNull(analyzer);
        boolean updateSelectList = false;

        SelectStmt subqueryStmt = (SelectStmt) expr.getSubquery().getStatement();
        // Create a new inline view from the subquery stmt. The inline view will be added
        // to the stmt's table refs later. Explicitly set the inline view's column labels
        // to eliminate any chance that column aliases from the parent query could reference
        // select items from the inline view after the rewrite.
        List<String> colLabels = Lists.newArrayList();
        for (int i = 0; i < subqueryStmt.getColLabels().size(); ++i) {
            colLabels.add(subqueryStmt.getColumnAliasGenerator().getNextAlias());
        }
        InlineViewRef inlineView = new InlineViewRef(stmt.getTableAliasGenerator().getNextAlias(), subqueryStmt,
                colLabels);

        // Extract all correlated predicates from the subquery.
        List<Expr> onClauseConjuncts = extractCorrelatedPredicates(subqueryStmt);
        if (!onClauseConjuncts.isEmpty()) {
            canRewriteCorrelatedSubquery(expr, onClauseConjuncts);
            // For correlated subqueries that are eligible for rewrite by transforming
            // into a join, a LIMIT clause has no effect on the results, so we can
            // safely remove it.
            subqueryStmt.limitElement_ = new LimitElement(null, null);
        }

        // Update the subquery's select list and/or its GROUP BY clause by adding
        // exprs from the extracted correlated predicates.
        boolean updateGroupBy = expr.getSubquery().isScalarSubquery() || (expr instanceof ExistsPredicate
                && !subqueryStmt.getSelectList().isDistinct() && subqueryStmt.hasAggInfo());
        List<Expr> lhsExprs = Lists.newArrayList();
        List<Expr> rhsExprs = Lists.newArrayList();
        for (Expr conjunct : onClauseConjuncts) {
            updateInlineView(inlineView, conjunct, stmt.getTableRefIds(), lhsExprs, rhsExprs, updateGroupBy);
        }

        // Analyzing the inline view triggers reanalysis of the subquery's select statement.
        // However the statement is already analyzed and since statement analysis is not
        // idempotent, the analysis needs to be reset.
        inlineView.reset();
        inlineView.analyze(analyzer);
        inlineView.setLeftTblRef(stmt.fromClause_.get(stmt.fromClause_.size() - 1));
        stmt.fromClause_.add(inlineView);
        JoinOperator joinOp = JoinOperator.LEFT_SEMI_JOIN;

        // Create a join conjunct from the expr that contains a subquery.
        Expr joinConjunct = createJoinConjunct(expr, inlineView, analyzer, !onClauseConjuncts.isEmpty());
        if (joinConjunct != null) {
            SelectListItem firstItem = ((SelectStmt) inlineView.getViewStmt()).getSelectList().getItems().get(0);
            if (!onClauseConjuncts.isEmpty() && firstItem.getExpr().contains(Expr.NON_NULL_EMPTY_AGG)) {
                // Correlated subqueries with an aggregate function that returns non-null on
                // an empty input are rewritten using a LEFT OUTER JOIN because we
                // need to ensure that there is one agg value for every tuple of 'stmt'
                // (parent select block), even for those tuples of 'stmt' that get rejected
                // by the subquery due to some predicate. The new join conjunct is added to
                // stmt's WHERE clause because it needs to be applied to the result of the
                // LEFT OUTER JOIN (both matched and unmatched tuples).
                //
                // TODO Handle other aggregate functions and UDAs that return a non-NULL value
                // on an empty set.
                // TODO Handle count aggregate functions in an expression in subqueries
                // select list.
                stmt.whereClause_ = CompoundPredicate.createConjunction(joinConjunct, stmt.whereClause_);
                joinConjunct = null;
                joinOp = JoinOperator.LEFT_OUTER_JOIN;
                updateSelectList = true;
            }

            if (joinConjunct != null)
                onClauseConjuncts.add(joinConjunct);
        }

        // Create the ON clause from the extracted correlated predicates.
        Expr onClausePredicate = CompoundPredicate.createConjunctivePredicate(onClauseConjuncts);

        if (onClausePredicate == null) {
            Preconditions.checkState(expr instanceof ExistsPredicate);
            ExistsPredicate existsPred = (ExistsPredicate) expr;
            // TODO This is very expensive if uncorrelated. Remove it when we implement
            // independent subquery evaluation.
            if (existsPred.isNotExists()) {
                inlineView.setJoinOp(JoinOperator.LEFT_ANTI_JOIN);
            } else {
                inlineView.setJoinOp(JoinOperator.LEFT_SEMI_JOIN);
            }
            // Note that the concept of a 'correlated inline view' is similar but not the same
            // as a 'correlated subquery', i.e., a subquery with a correlated predicate.
            if (!inlineView.isCorrelated()) {
                // For uncorrelated subqueries, we limit the number of rows returned by the
                // subquery.
                subqueryStmt.setLimit(1);
                inlineView.setOnClause(new BoolLiteral(true));
            }
            return false;
        }

        // Create an smap from the original select-list exprs of the select list to
        // the corresponding inline-view columns.
        ExprSubstitutionMap smap = new ExprSubstitutionMap();
        Preconditions.checkState(lhsExprs.size() == rhsExprs.size());
        for (int i = 0; i < lhsExprs.size(); ++i) {
            Expr lhsExpr = lhsExprs.get(i);
            Expr rhsExpr = rhsExprs.get(i);
            rhsExpr.analyze(analyzer);
            smap.put(lhsExpr, rhsExpr);
        }
        onClausePredicate = onClausePredicate.substitute(smap, analyzer, false);

        // Check for references to ancestor query blocks (cycles in the dependency
        // graph of query blocks are not supported).
        if (!onClausePredicate.isBoundByTupleIds(stmt.getTableRefIds())) {
            throw new AnalysisException("Unsupported correlated subquery: " + subqueryStmt.toSql());
        }

        // Check if we have a valid ON clause for an equi-join.
        boolean hasEqJoinPred = false;
        for (Expr conjunct : onClausePredicate.getConjuncts()) {
            if (!(conjunct instanceof BinaryPredicate))
                continue;
            BinaryPredicate.Operator operator = ((BinaryPredicate) conjunct).getOp();
            if (!operator.isEquivalence())
                continue;
            List<TupleId> lhsTupleIds = Lists.newArrayList();
            conjunct.getChild(0).getIds(lhsTupleIds, null);
            if (lhsTupleIds.isEmpty())
                continue;
            List<TupleId> rhsTupleIds = Lists.newArrayList();
            conjunct.getChild(1).getIds(rhsTupleIds, null);
            if (rhsTupleIds.isEmpty())
                continue;
            // Check if columns from the outer query block (stmt) appear in both sides
            // of the binary predicate.
            if ((lhsTupleIds.contains(inlineView.getDesc().getId()) && lhsTupleIds.size() > 1)
                    || (rhsTupleIds.contains(inlineView.getDesc().getId()) && rhsTupleIds.size() > 1)) {
                continue;
            }
            hasEqJoinPred = true;
            break;
        }

        if (!hasEqJoinPred && !inlineView.isCorrelated()) {
            // TODO: Remove this when independent subquery evaluation is implemented.
            // TODO: Requires support for non-equi joins.
            boolean hasGroupBy = ((SelectStmt) inlineView.getViewStmt()).hasGroupByClause();
            if (!expr.getSubquery().isScalarSubquery()
                    || (!(hasGroupBy && stmt.selectList_.isDistinct()) && hasGroupBy)) {
                throw new AnalysisException("Unsupported predicate with subquery: " + expr.toSql());
            }

            // TODO: Requires support for null-aware anti-join mode in nested-loop joins
            if (expr.getSubquery().isScalarSubquery() && expr instanceof InPredicate
                    && ((InPredicate) expr).isNotIn()) {
                throw new AnalysisException("Unsupported NOT IN predicate with subquery: " + expr.toSql());
            }

            // We can rewrite the aggregate subquery using a cross join. All conjuncts
            // that were extracted from the subquery are added to stmt's WHERE clause.
            stmt.whereClause_ = CompoundPredicate.createConjunction(onClausePredicate, stmt.whereClause_);
            inlineView.setJoinOp(JoinOperator.CROSS_JOIN);
            // Indicate that the CROSS JOIN may add a new visible tuple to stmt's
            // select list (if the latter contains an unqualified star item '*')
            return true;
        }

        // We have a valid equi-join conjunct or the inline view is correlated.
        if (expr instanceof InPredicate && ((InPredicate) expr).isNotIn()
                || expr instanceof ExistsPredicate && ((ExistsPredicate) expr).isNotExists()) {
            // For the case of a NOT IN with an eq join conjunct, replace the join
            // conjunct with a conjunct that uses the null-matching eq operator.
            if (expr instanceof InPredicate) {
                joinOp = JoinOperator.NULL_AWARE_LEFT_ANTI_JOIN;
                List<TupleId> tIds = Lists.newArrayList();
                joinConjunct.getIds(tIds, null);
                if (tIds.size() <= 1 || !tIds.contains(inlineView.getDesc().getId())) {
                    throw new AnalysisException("Unsupported NOT IN predicate with subquery: " + expr.toSql());
                }
                // Replace the EQ operator in the generated join conjunct with a
                // null-matching EQ operator.
                for (Expr conjunct : onClausePredicate.getConjuncts()) {
                    if (conjunct.equals(joinConjunct)) {
                        Preconditions.checkState(conjunct instanceof BinaryPredicate);
                        BinaryPredicate binaryPredicate = (BinaryPredicate) conjunct;
                        Preconditions.checkState(binaryPredicate.getOp().isEquivalence());
                        binaryPredicate.setOp(BinaryPredicate.Operator.NULL_MATCHING_EQ);
                        break;
                    }
                }
            } else {
                joinOp = JoinOperator.LEFT_ANTI_JOIN;
            }
        }
        inlineView.setJoinOp(joinOp);
        inlineView.setOnClause(onClausePredicate);
        return updateSelectList;
    }

    /**
     * Replace all unqualified star exprs ('*') from stmt's select list with qualified
     * ones, i.e. tbl_1.*,...,tbl_n.*, where tbl_1,...,tbl_n are the visible tablerefs
     * in stmt. 'tableIndx' indicates the maximum tableRef ordinal to consider when
     * replacing an unqualified star item.
     */
    private static void replaceUnqualifiedStarItems(SelectStmt stmt, int tableIdx) {
        Preconditions.checkState(tableIdx < stmt.fromClause_.size());
        ArrayList<SelectListItem> newItems = Lists.newArrayList();
        for (int i = 0; i < stmt.selectList_.getItems().size(); ++i) {
            SelectListItem item = stmt.selectList_.getItems().get(i);
            if (!item.isStar() || item.getRawPath() != null) {
                newItems.add(item);
                continue;
            }
            // '*' needs to be replaced by tbl1.*,...,tbln.*, where
            // tbl1,...,tbln are the visible tableRefs in stmt.
            for (int j = 0; j < tableIdx; ++j) {
                TableRef tableRef = stmt.fromClause_.get(j);
                if (tableRef.getJoinOp() == JoinOperator.LEFT_SEMI_JOIN
                        || tableRef.getJoinOp() == JoinOperator.LEFT_ANTI_JOIN) {
                    continue;
                }
                newItems.add(SelectListItem.createStarItem(Lists.newArrayList(tableRef.getUniqueAlias())));
            }
        }
        Preconditions.checkState(!newItems.isEmpty());
        boolean isDistinct = stmt.selectList_.isDistinct();
        stmt.selectList_ = new SelectList(newItems, isDistinct, stmt.selectList_.getPlanHints());
    }

    /**
     * Return true if the Expr tree rooted at 'expr' can be safely
     * eliminated, i.e. it only consists of conjunctions of true BoolLiterals.
     */
    private static boolean canEliminate(Expr expr) {
        for (Expr conjunct : expr.getConjuncts()) {
            if (!Expr.IS_TRUE_LITERAL.apply(conjunct))
                return false;
        }
        return true;
    }

    /**
     * Extract all correlated predicates of a subquery.
     *
     * TODO Handle correlated predicates in a HAVING clause.
     */
    private static ArrayList<Expr> extractCorrelatedPredicates(SelectStmt subqueryStmt) throws AnalysisException {
        List<TupleId> subqueryTupleIds = subqueryStmt.getTableRefIds();
        ArrayList<Expr> correlatedPredicates = Lists.newArrayList();

        if (subqueryStmt.hasWhereClause()) {
            if (!canExtractCorrelatedPredicates(subqueryStmt.getWhereClause(), subqueryTupleIds)) {
                throw new AnalysisException("Disjunctions with correlated predicates " + "are not supported: "
                        + subqueryStmt.getWhereClause().toSql());
            }
            // Extract the correlated predicates from the subquery's WHERE clause and
            // replace them with true BoolLiterals.
            Expr newWhereClause = extractCorrelatedPredicates(subqueryStmt.getWhereClause(), subqueryTupleIds,
                    correlatedPredicates);
            if (canEliminate(newWhereClause))
                newWhereClause = null;
            subqueryStmt.setWhereClause(newWhereClause);
        }

        // Process all correlated predicates from subquery's ON clauses.
        for (TableRef tableRef : subqueryStmt.getTableRefs()) {
            if (tableRef.getOnClause() == null)
                continue;

            ArrayList<Expr> onClauseCorrelatedPreds = Lists.newArrayList();
            Expr newOnClause = extractCorrelatedPredicates(tableRef.getOnClause(), subqueryTupleIds,
                    onClauseCorrelatedPreds);
            if (onClauseCorrelatedPreds.isEmpty())
                continue;

            correlatedPredicates.addAll(onClauseCorrelatedPreds);
            if (canEliminate(newOnClause)) {
                // After the extraction of correlated predicates from an ON clause,
                // the latter may only contain conjunctions of True BoolLiterals. In
                // this case, we can eliminate the ON clause and set the join type to
                // CROSS JOIN.
                tableRef.setJoinOp(JoinOperator.CROSS_JOIN);
                tableRef.setOnClause(null);
            } else {
                tableRef.setOnClause(newOnClause);
            }
        }
        return correlatedPredicates;
    }

    /**
     * Extract all correlated predicates from the expr tree rooted at 'root' and
     * replace them with true BoolLiterals. The modified expr tree is returned
     * and the extracted correlated predicates are added to 'matches'.
     */
    private static Expr extractCorrelatedPredicates(Expr root, List<TupleId> tupleIds, ArrayList<Expr> matches) {
        if (isCorrelatedPredicate(root, tupleIds)) {
            matches.add(root);
            return new BoolLiteral(true);
        }
        for (int i = 0; i < root.getChildren().size(); ++i) {
            root.getChildren().set(i, extractCorrelatedPredicates(root.getChild(i), tupleIds, matches));
        }
        return root;
    }

    /**
     * Checks if an expr containing a correlated subquery is eligible for rewrite by
     * tranforming into a join. 'correlatedPredicates' contains the correlated
     * predicates identified in the subquery. Throws an AnalysisException if 'expr'
     * is not eligible for rewrite.
     * TODO: Merge all the rewrite eligibility tests into a single function.
     */
    private static void canRewriteCorrelatedSubquery(Expr expr, List<Expr> correlatedPredicates)
            throws AnalysisException {
        Preconditions.checkNotNull(expr);
        Preconditions.checkNotNull(correlatedPredicates);
        Preconditions.checkState(expr.contains(Subquery.class));
        SelectStmt stmt = (SelectStmt) expr.getSubquery().getStatement();
        Preconditions.checkNotNull(stmt);
        // Grouping and/or aggregation is not allowed on correlated scalar and IN subqueries
        if ((expr instanceof BinaryPredicate && (stmt.hasGroupByClause() || stmt.hasAnalyticInfo()))
                || (expr instanceof InPredicate && (stmt.hasAggInfo() || stmt.hasAnalyticInfo()))) {
            throw new AnalysisException(
                    "Unsupported correlated subquery with grouping " + "and/or aggregation: " + stmt.toSql());
        }

        final com.google.common.base.Predicate<Expr> isSingleSlotRef = new com.google.common.base.Predicate<Expr>() {
            @Override
            public boolean apply(Expr arg) {
                return arg.unwrapSlotRef(false) != null;
            }
        };

        // A HAVING clause is only allowed on correlated EXISTS subqueries with
        // correlated binary predicates of the form Slot = Slot (see IMPALA-2734)
        // TODO Handle binary predicates with IS NOT DISTINCT op
        if (expr instanceof ExistsPredicate && stmt.hasHavingClause() && !correlatedPredicates.isEmpty()
                && (!stmt.hasAggInfo() || !Iterables.all(correlatedPredicates,
                        Predicates.or(Expr.IS_EQ_BINARY_PREDICATE, isSingleSlotRef)))) {
            throw new AnalysisException(
                    "Unsupported correlated EXISTS subquery with a " + "HAVING clause: " + stmt.toSql());
        }

        // The following correlated subqueries with a limit clause are supported:
        // 1. EXISTS subqueries
        // 2. Scalar subqueries with aggregation
        if (stmt.hasLimit()
                && (!(expr instanceof BinaryPredicate) || !stmt.hasAggInfo() || stmt.selectList_.isDistinct())
                && !(expr instanceof ExistsPredicate)) {
            throw new AnalysisException(
                    "Unsupported correlated subquery with a " + "LIMIT clause: " + stmt.toSql());
        }
    }

    /**
     * Update the subquery within an inline view by expanding its select list with exprs
     * from a correlated predicate 'expr' that will be 'moved' to an ON clause in the
     * subquery's parent query block. We need to make sure that every expr extracted from
     * the subquery references an item in the subquery's select list. If 'updateGroupBy'
     * is true, the exprs extracted from 'expr' are also added in stmt's GROUP BY clause.
     * Throws an AnalysisException if we need to update the GROUP BY clause but
     * both the lhs and rhs of 'expr' reference a tuple of the subquery stmt.
     */
    private static void updateInlineView(InlineViewRef inlineView, Expr expr, List<TupleId> parentQueryTids,
            List<Expr> lhsExprs, List<Expr> rhsExprs, boolean updateGroupBy) throws AnalysisException {
        SelectStmt stmt = (SelectStmt) inlineView.getViewStmt();
        List<TupleId> subqueryTblIds = stmt.getTableRefIds();
        ArrayList<Expr> groupByExprs = null;
        if (updateGroupBy)
            groupByExprs = Lists.newArrayList();

        List<SelectListItem> items = stmt.selectList_.getItems();
        // Collect all the SlotRefs from 'expr' and identify those that are bound by
        // subquery tuple ids.
        ArrayList<Expr> slotRefs = Lists.newArrayList();
        expr.collectAll(Predicates.instanceOf(SlotRef.class), slotRefs);
        List<Expr> exprsBoundBySubqueryTids = Lists.newArrayList();
        for (Expr slotRef : slotRefs) {
            if (slotRef.isBoundByTupleIds(subqueryTblIds)) {
                exprsBoundBySubqueryTids.add(slotRef);
            }
        }
        // The correlated predicate only references slots from a parent block,
        // no need to update the subquery's select or group by list.
        if (exprsBoundBySubqueryTids.isEmpty())
            return;
        if (updateGroupBy) {
            Preconditions.checkState(expr instanceof BinaryPredicate);
            Expr exprBoundBySubqueryTids = null;
            if (exprsBoundBySubqueryTids.size() > 1) {
                // If the predicate contains multiple SlotRefs bound by subquery tuple
                // ids, they must all be on the same side of that predicate.
                if (expr.getChild(0).isBoundByTupleIds(subqueryTblIds)
                        && expr.getChild(1).isBoundByTupleIds(parentQueryTids)) {
                    exprBoundBySubqueryTids = expr.getChild(0);
                } else if (expr.getChild(0).isBoundByTupleIds(parentQueryTids)
                        && expr.getChild(1).isBoundByTupleIds(subqueryTblIds)) {
                    exprBoundBySubqueryTids = expr.getChild(1);
                } else {
                    throw new AnalysisException(
                            "All subquery columns " + "that participate in a predicate must be on the same side of "
                                    + "that predicate: " + expr.toSql());
                }
            } else {
                Preconditions.checkState(exprsBoundBySubqueryTids.size() == 1);
                exprBoundBySubqueryTids = exprsBoundBySubqueryTids.get(0);
            }
            exprsBoundBySubqueryTids.clear();
            exprsBoundBySubqueryTids.add(exprBoundBySubqueryTids);
        }

        // Add the exprs bound by subquery tuple ids to the select list and
        // register it for substitution. We use a temporary substitution map
        // because we cannot at this point analyze the new select list expr. Once
        // the new inline view is analyzed, the entries from this map will be
        // added to an ExprSubstitutionMap.
        for (Expr boundExpr : exprsBoundBySubqueryTids) {
            String colAlias = stmt.getColumnAliasGenerator().getNextAlias();
            items.add(new SelectListItem(boundExpr, null));
            inlineView.getExplicitColLabels().add(colAlias);
            lhsExprs.add(boundExpr);
            rhsExprs.add(new SlotRef(Lists.newArrayList(inlineView.getUniqueAlias(), colAlias)));
            if (groupByExprs != null)
                groupByExprs.add(boundExpr);
        }

        // Update the subquery's select list.
        boolean isDistinct = stmt.selectList_.isDistinct();
        stmt.selectList_ = new SelectList(items, isDistinct, stmt.selectList_.getPlanHints());
        // Update subquery's GROUP BY clause
        if (groupByExprs != null && !groupByExprs.isEmpty()) {
            if (stmt.hasGroupByClause()) {
                stmt.groupingExprs_.addAll(groupByExprs);
            } else {
                stmt.groupingExprs_ = groupByExprs;
            }
        }
    }

    /**
     * Returns true if we can extract the correlated predicates from 'expr'. A
     * correlated predicate cannot be extracted if it is part of a disjunction.
     */
    private static boolean canExtractCorrelatedPredicates(Expr expr, List<TupleId> subqueryTupleIds) {
        if (!(expr instanceof CompoundPredicate))
            return true;
        if (Expr.IS_OR_PREDICATE.apply(expr)) {
            return !containsCorrelatedPredicate(expr, subqueryTupleIds);
        }
        for (Expr child : expr.getChildren()) {
            if (!canExtractCorrelatedPredicates(child, subqueryTupleIds)) {
                return false;
            }
        }
        return true;
    }

    /**
     * Return true if the expr tree rooted at 'root' contains a correlated
     * predicate.
     */
    private static boolean containsCorrelatedPredicate(Expr root, List<TupleId> tupleIds) {
        if (isCorrelatedPredicate(root, tupleIds))
            return true;
        for (Expr child : root.getChildren()) {
            if (containsCorrelatedPredicate(child, tupleIds))
                return true;
        }
        return false;
    }

    /**
     * Returns true if 'expr' is a correlated predicate. A predicate is
     * correlated if at least one of its SlotRefs belongs to an ancestor
     * query block (i.e. is not bound by the given 'tupleIds').
     */
    private static boolean isCorrelatedPredicate(Expr expr, List<TupleId> tupleIds) {
        return (expr instanceof BinaryPredicate || expr instanceof SlotRef) && !expr.isBoundByTupleIds(tupleIds);
    }

    /**
     * Converts an expr containing a subquery into an analyzed conjunct to be
     * used in a join. The conversion is performed in place by replacing the
     * subquery with the first expr from the select list of 'inlineView'.
     * If 'isCorrelated' is true and the first expr from the inline view contains
     * an aggregate function that returns non-null on an empty input,
     * the aggregate function is wrapped into a 'zeroifnull' function.
     */
    private static Expr createJoinConjunct(Expr exprWithSubquery, InlineViewRef inlineView, Analyzer analyzer,
            boolean isCorrelated) throws AnalysisException {
        Preconditions.checkNotNull(exprWithSubquery);
        Preconditions.checkNotNull(inlineView);
        Preconditions.checkState(exprWithSubquery.contains(Subquery.class));
        if (exprWithSubquery instanceof ExistsPredicate)
            return null;
        // Create a SlotRef from the first item of inlineView's select list
        SlotRef slotRef = new SlotRef(
                Lists.newArrayList(inlineView.getUniqueAlias(), inlineView.getColLabels().get(0)));
        slotRef.analyze(analyzer);
        Expr subquerySubstitute = slotRef;
        if (exprWithSubquery instanceof InPredicate) {
            BinaryPredicate pred = new BinaryPredicate(BinaryPredicate.Operator.EQ, exprWithSubquery.getChild(0),
                    slotRef);
            pred.analyze(analyzer);
            return pred;
        }
        // Only scalar subqueries are supported
        Subquery subquery = exprWithSubquery.getSubquery();
        if (!subquery.isScalarSubquery()) {
            throw new AnalysisException("Unsupported predicate with a non-scalar subquery: " + subquery.toSql());
        }
        ExprSubstitutionMap smap = new ExprSubstitutionMap();
        SelectListItem item = ((SelectStmt) inlineView.getViewStmt()).getSelectList().getItems().get(0);
        if (isCorrelated && !item.getExpr().contains(Expr.IS_BUILTIN_AGG_FN)) {
            throw new AnalysisException(
                    "UDAs are not supported in the select list of " + "correlated subqueries: " + subquery.toSql());
        }
        if (isCorrelated && item.getExpr().contains(Expr.NON_NULL_EMPTY_AGG)) {
            // TODO: Add support for multiple agg functions that return non-null on an
            // empty input, by wrapping them with zeroifnull functions before the inline
            // view is analyzed.
            if (!Expr.NON_NULL_EMPTY_AGG.apply(item.getExpr()) && (!(item.getExpr() instanceof CastExpr)
                    || !Expr.NON_NULL_EMPTY_AGG.apply(item.getExpr().getChild(0)))) {
                throw new AnalysisException("Aggregate function that returns non-null on "
                        + "an empty input cannot be used in an expression in a "
                        + "correlated subquery's select list: " + subquery.toSql());
            }

            List<Expr> aggFns = Lists.newArrayList();
            item.getExpr().collectAll(Expr.NON_NULL_EMPTY_AGG, aggFns);
            // TODO Generalize this by making the aggregate functions aware of the
            // literal expr that they return on empty input, e.g. max returns a
            // NullLiteral whereas count returns a NumericLiteral.
            if (((FunctionCallExpr) aggFns.get(0)).getReturnType().isNumericType()) {
                FunctionCallExpr zeroIfNull = new FunctionCallExpr("zeroifnull",
                        Lists.newArrayList((Expr) slotRef));
                zeroIfNull.analyze(analyzer);
                subquerySubstitute = zeroIfNull;
            } else if (((FunctionCallExpr) aggFns.get(0)).getReturnType().isStringType()) {
                List<Expr> params = Lists.newArrayList();
                params.add(slotRef);
                params.add(new StringLiteral(""));
                FunctionCallExpr ifnull = new FunctionCallExpr("ifnull", params);
                ifnull.analyze(analyzer);
                subquerySubstitute = ifnull;
            } else {
                throw new AnalysisException("Unsupported aggregate function used in "
                        + "a correlated subquery's select list: " + subquery.toSql());
            }
        }
        smap.put(subquery, subquerySubstitute);
        return exprWithSubquery.substitute(smap, analyzer, false);
    }
}