org.apache.hadoop.hive.ql.parse.UpdateDeleteSemanticAnalyzer.java Source code

Introduction

Here is the source code for org.apache.hadoop.hive.ql.parse.UpdateDeleteSemanticAnalyzer.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.ql.parse;

import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;

import org.antlr.runtime.TokenRewriteStream;
import org.antlr.runtime.tree.Tree;
import org.apache.commons.collections.MapUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.DriverContext;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.QueryState;
import org.apache.hadoop.hive.ql.exec.DDLTask;
import org.apache.hadoop.hive.ql.exec.StatsTask;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.TaskFactory;
import org.apache.hadoop.hive.ql.hooks.Entity;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.HiveUtils;
import org.apache.hadoop.hive.ql.metadata.InvalidTableException;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.plan.AlterTableDesc;
import org.apache.hadoop.hive.ql.plan.CreateTableLikeDesc;
import org.apache.hadoop.hive.ql.plan.DDLWork;
import org.apache.hadoop.hive.ql.plan.DropTableDesc;
import org.apache.hadoop.hive.ql.plan.ExportWork;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A subclass of the {@link org.apache.hadoop.hive.ql.parse.SemanticAnalyzer} that just handles
 * update, delete and merge statements.  It works by rewriting the updates and deletes into insert
 * statements (since they are actually inserts) and then doing some patch up to make them work as
 * updates and deletes instead.
 */
public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
    private static final Logger LOG = LoggerFactory.getLogger(UpdateDeleteSemanticAnalyzer.class);

    private boolean useSuper = false;

    UpdateDeleteSemanticAnalyzer(QueryState queryState) throws SemanticException {
        super(queryState);
    }

    @Override
    public void analyzeInternal(ASTNode tree) throws SemanticException {
        if (useSuper) {
            super.analyzeInternal(tree);
        } else {
            if (!getTxnMgr().supportsAcid()) {
                throw new SemanticException(ErrorMsg.ACID_OP_ON_NONACID_TXNMGR.getMsg());
            }
            switch (tree.getToken().getType()) {
            case HiveParser.TOK_DELETE_FROM:
                analyzeDelete(tree);
                break;
            case HiveParser.TOK_UPDATE_TABLE:
                analyzeUpdate(tree);
                break;
            case HiveParser.TOK_MERGE:
                analyzeMerge(tree);
                break;
            case HiveParser.TOK_EXPORT:
                analyzeAcidExport(tree);
                break;
            default:
                throw new RuntimeException(
                        "Asked to parse token " + tree.getName() + " in " + "UpdateDeleteSemanticAnalyzer");
            }
            cleanUpMetaColumnAccessControl();

        }
    }

    private boolean updating() {
        return currentOperation == Context.Operation.UPDATE;
    }

    private boolean deleting() {
        return currentOperation == Context.Operation.DELETE;
    }

    /**
     * Exporting an Acid table is more complicated than a flat table.  It may contains delete events,
     * which can only be interpreted properly withing the context of the table/metastore where they
     * were generated.  It may also contain insert events that belong to transactions that aborted
     * where the same constraints apply.
     * In order to make the export artifact free of these constraints, the export does a
     * insert into tmpTable select * from <export table> to filter/apply the events in current
     * context and then export the tmpTable.  This export artifact can now be imported into any
     * table on any cluster (subject to schema checks etc).
     * See {@link #analyzeAcidExport(ASTNode)}
     * @param tree Export statement
     * @return true if exporting an Acid table.
     */
    public static boolean isAcidExport(ASTNode tree) throws SemanticException {
        assert tree != null && tree.getToken() != null && tree.getToken().getType() == HiveParser.TOK_EXPORT;
        Tree tokTab = tree.getChild(0);
        assert tokTab != null && tokTab.getType() == HiveParser.TOK_TAB;
        Table tableHandle = null;
        try {
            tableHandle = getTable((ASTNode) tokTab.getChild(0), Hive.get(), false);
        } catch (HiveException ex) {
            throw new SemanticException(ex);
        }

        //tableHandle can be null if table doesn't exist
        return tableHandle != null && AcidUtils.isFullAcidTable(tableHandle);
    }

    private static String getTmptTableNameForExport(Table exportTable) {
        String tmpTableDb = exportTable.getDbName();
        String tmpTableName = exportTable.getTableName() + "_" + UUID.randomUUID().toString().replace('-', '_');
        return Warehouse.getQualifiedName(tmpTableDb, tmpTableName);
    }

    /**
     * See {@link #isAcidExport(ASTNode)}
     * 1. create the temp table T
     * 2. compile 'insert into T select * from acidTable'
     * 3. compile 'export acidTable'  (acidTable will be replaced with T during execution)
     * 4. create task to drop T
     *
     * Using a true temp (session level) table means it should not affect replication and the table
     * is not visible outside the Session that created for security
     */
    private void analyzeAcidExport(ASTNode ast) throws SemanticException {
        assert ast != null && ast.getToken() != null && ast.getToken().getType() == HiveParser.TOK_EXPORT;
        ASTNode tableTree = (ASTNode) ast.getChild(0);
        assert tableTree != null && tableTree.getType() == HiveParser.TOK_TAB;
        ASTNode tokRefOrNameExportTable = (ASTNode) tableTree.getChild(0);
        Table exportTable = getTargetTable(tokRefOrNameExportTable);
        assert AcidUtils.isFullAcidTable(exportTable);

        //need to create the table "manually" rather than creating a task since it has to exist to
        // compile the insert into T...
        String newTableName = getTmptTableNameForExport(exportTable); //this is db.table
        Map<String, String> tblProps = new HashMap<>();
        tblProps.put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, Boolean.FALSE.toString());
        String location;

        // for temporary tables we set the location to something in the session's scratch dir
        // it has the same life cycle as the tmp table
        try {
            // Generate a unique ID for temp table path.
            // This path will be fixed for the life of the temp table.
            Path path = new Path(SessionState.getTempTableSpace(conf), UUID.randomUUID().toString());
            path = Warehouse.getDnsPath(path, conf);
            location = path.toString();
        } catch (MetaException err) {
            throw new SemanticException("Error while generating temp table path:", err);
        }

        CreateTableLikeDesc ctlt = new CreateTableLikeDesc(newTableName, false, true, null, null, location, null,
                null, tblProps, true, //important so we get an exception on name collision
                Warehouse.getQualifiedName(exportTable.getTTable()), false);
        Table newTable;
        try {
            ReadEntity dbForTmpTable = new ReadEntity(db.getDatabase(exportTable.getDbName()));
            inputs.add(dbForTmpTable); //so the plan knows we are 'reading' this db - locks, security...
            DDLTask createTableTask = (DDLTask) TaskFactory.get(new DDLWork(new HashSet<>(), new HashSet<>(), ctlt),
                    conf);
            createTableTask.setConf(conf); //above get() doesn't set it
            createTableTask.execute(new DriverContext(new Context(conf)));
            newTable = db.getTable(newTableName);
        } catch (IOException | HiveException ex) {
            throw new SemanticException(ex);
        }

        //now generate insert statement
        //insert into newTableName select * from ts <where partition spec>
        StringBuilder rewrittenQueryStr = generateExportQuery(newTable.getPartCols(), tokRefOrNameExportTable,
                tableTree, newTableName);
        ReparseResult rr = parseRewrittenQuery(rewrittenQueryStr, ctx.getCmd());
        Context rewrittenCtx = rr.rewrittenCtx;
        rewrittenCtx.setIsUpdateDeleteMerge(false); //it's set in parseRewrittenQuery()
        ASTNode rewrittenTree = rr.rewrittenTree;
        try {
            useSuper = true;
            //newTable has to exist at this point to compile
            super.analyze(rewrittenTree, rewrittenCtx);
        } finally {
            useSuper = false;
        }
        //now we have the rootTasks set up for Insert ... Select
        removeStatsTasks(rootTasks);
        //now make an ExportTask from temp table
        /*analyzeExport() creates TableSpec which in turn tries to build
         "public List<Partition> partitions" by looking in the metastore to find Partitions matching
         the partition spec in the Export command.  These of course don't exist yet since we've not
         ran the insert stmt yet!!!!!!!
          */
        Task<ExportWork> exportTask = ExportSemanticAnalyzer.analyzeExport(ast, newTableName, db, conf, inputs,
                outputs);

        AlterTableDesc alterTblDesc = null;
        {
            /**
             * add an alter table task to set transactional props
             * do it after populating temp table so that it's written as non-transactional table but
             * update props before export so that export archive metadata has these props.  This way when
             * IMPORT is done for this archive and target table doesn't exist, it will be created as Acid.
             */
            alterTblDesc = new AlterTableDesc(AlterTableDesc.AlterTableTypes.ADDPROPS);
            HashMap<String, String> mapProps = new HashMap<>();
            mapProps.put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, Boolean.TRUE.toString());
            alterTblDesc.setProps(mapProps);
            alterTblDesc.setOldName(newTableName);
        }
        addExportTask(rootTasks, exportTask, TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc)));

        {
            /**
             * Now make a task to drop temp table
             * {@link DDLSemanticAnalyzer#analyzeDropTable(ASTNode ast, TableType expectedType)
             */
            ReplicationSpec replicationSpec = new ReplicationSpec();
            DropTableDesc dropTblDesc = new DropTableDesc(newTableName, TableType.MANAGED_TABLE, false, true,
                    replicationSpec);
            Task<DDLWork> dropTask = TaskFactory.get(new DDLWork(new HashSet<>(), new HashSet<>(), dropTblDesc),
                    conf);
            exportTask.addDependentTask(dropTask);
        }
        markReadEntityForUpdate();
        if (ctx.isExplainPlan()) {
            try {
                //so that "explain" doesn't "leak" tmp tables
                // TODO: catalog
                db.dropTable(newTable.getDbName(), newTable.getTableName(), true, true, true);
            } catch (HiveException ex) {
                LOG.warn("Unable to drop " + newTableName + " due to: " + ex.getMessage(), ex);
            }
        }
    }

    /**
     * generate
     * insert into newTableName select * from ts <where partition spec>
     * for EXPORT command
     */
    private StringBuilder generateExportQuery(List<FieldSchema> partCols, ASTNode tokRefOrNameExportTable,
            ASTNode tableTree, String newTableName) throws SemanticException {
        StringBuilder rewrittenQueryStr = new StringBuilder("insert into ").append(newTableName);
        addPartitionColsToInsert(partCols, rewrittenQueryStr);
        rewrittenQueryStr.append(" select * from ").append(getFullTableNameForSQL(tokRefOrNameExportTable));
        //builds partition spec so we can build suitable WHERE clause
        TableSpec exportTableSpec = new TableSpec(db, conf, tableTree, false, true);
        if (exportTableSpec.getPartSpec() != null) {
            StringBuilder whereClause = null;
            int partColsIdx = -1; //keep track of corresponding col in partCols
            for (Map.Entry<String, String> ent : exportTableSpec.getPartSpec().entrySet()) {
                partColsIdx++;
                if (ent.getValue() == null) {
                    continue; //partial spec
                }
                if (whereClause == null) {
                    whereClause = new StringBuilder(" WHERE ");
                }
                if (whereClause.length() > " WHERE ".length()) {
                    whereClause.append(" AND ");
                }
                whereClause.append(HiveUtils.unparseIdentifier(ent.getKey(), conf)).append(" = ")
                        .append(genPartValueString(partCols.get(partColsIdx).getType(), ent.getValue()));
            }
            if (whereClause != null) {
                rewrittenQueryStr.append(whereClause);
            }
        }
        return rewrittenQueryStr;
    }

    /**
     * Makes the exportTask run after all other tasks of the "insert into T ..." are done.
     */
    private void addExportTask(List<Task<?>> rootTasks, Task<ExportWork> exportTask, Task<DDLWork> alterTable) {
        for (Task<? extends Serializable> t : rootTasks) {
            if (t.getNumChild() <= 0) {
                //todo: ConditionalTask#addDependentTask(Task) doesn't do the right thing: HIVE-18978
                t.addDependentTask(alterTable);
                //this is a leaf so add exportTask to follow it
                alterTable.addDependentTask(exportTask);
            } else {
                addExportTask(t.getDependentTasks(), exportTask, alterTable);
            }
        }
    }

    private List<Task<?>> findStatsTasks(List<Task<?>> rootTasks, List<Task<?>> statsTasks) {
        for (Task<? extends Serializable> t : rootTasks) {
            if (t instanceof StatsTask) {
                if (statsTasks == null) {
                    statsTasks = new ArrayList<>();
                }
                statsTasks.add(t);
            }
            if (t.getDependentTasks() != null) {
                statsTasks = findStatsTasks(t.getDependentTasks(), statsTasks);
            }
        }
        return statsTasks;
    }

    private void removeStatsTasks(List<Task<?>> rootTasks) {
        List<Task<?>> statsTasks = findStatsTasks(rootTasks, null);
        if (statsTasks == null) {
            return;
        }
        for (Task<?> statsTask : statsTasks) {
            if (statsTask.getParentTasks() == null) {
                continue; //should never happen
            }
            for (Task<?> t : new ArrayList<>(statsTask.getParentTasks())) {
                t.removeDependentTask(statsTask);
            }
        }
    }

    private void analyzeUpdate(ASTNode tree) throws SemanticException {
        currentOperation = Context.Operation.UPDATE;
        reparseAndSuperAnalyze(tree);
    }

    private void analyzeDelete(ASTNode tree) throws SemanticException {
        currentOperation = Context.Operation.DELETE;
        reparseAndSuperAnalyze(tree);
    }

    /**
     * Append list of partition columns to Insert statement, i.e. the 2nd set of partCol1,partCol2
     * INSERT INTO T PARTITION(partCol1,partCol2...) SELECT col1, ... partCol1,partCol2...
     * @param target target table
     */
    private void addPartitionColsToSelect(List<FieldSchema> partCols, StringBuilder rewrittenQueryStr,
            ASTNode target) throws SemanticException {
        String targetName = target != null ? getSimpleTableName(target) : null;

        // If the table is partitioned, we need to select the partition columns as well.
        if (partCols != null) {
            for (FieldSchema fschema : partCols) {
                rewrittenQueryStr.append(", ");
                //would be nice if there was a way to determine if quotes are needed
                if (targetName != null) {
                    rewrittenQueryStr.append(targetName).append('.');
                }
                rewrittenQueryStr.append(HiveUtils.unparseIdentifier(fschema.getName(), this.conf));
            }
        }
    }

    /**
     * Assert that we are not asked to update a bucketing column or partition column
     * @param colName it's the A in "SET A = B"
     */
    private void checkValidSetClauseTarget(ASTNode colName, Table targetTable) throws SemanticException {
        String columnName = normalizeColName(colName.getText());

        // Make sure this isn't one of the partitioning columns, that's not supported.
        for (FieldSchema fschema : targetTable.getPartCols()) {
            if (fschema.getName().equalsIgnoreCase(columnName)) {
                throw new SemanticException(ErrorMsg.UPDATE_CANNOT_UPDATE_PART_VALUE.getMsg());
            }
        }
        //updating bucket column should move row from one file to another - not supported
        if (targetTable.getBucketCols() != null && targetTable.getBucketCols().contains(columnName)) {
            throw new SemanticException(ErrorMsg.UPDATE_CANNOT_UPDATE_BUCKET_VALUE, columnName);
        }
        boolean foundColumnInTargetTable = false;
        for (FieldSchema col : targetTable.getCols()) {
            if (columnName.equalsIgnoreCase(col.getName())) {
                foundColumnInTargetTable = true;
                break;
            }
        }
        if (!foundColumnInTargetTable) {
            throw new SemanticException(ErrorMsg.INVALID_TARGET_COLUMN_IN_SET_CLAUSE, colName.getText(),
                    targetTable.getFullyQualifiedName());
        }
    }

    private ASTNode findLHSofAssignment(ASTNode assignment) {
        assert assignment.getToken()
                .getType() == HiveParser.EQUAL : "Expected set assignments to use equals operator but found "
                        + assignment.getName();
        ASTNode tableOrColTok = (ASTNode) assignment.getChildren().get(0);
        assert tableOrColTok.getToken()
                .getType() == HiveParser.TOK_TABLE_OR_COL : "Expected left side of assignment to be table or column";
        ASTNode colName = (ASTNode) tableOrColTok.getChildren().get(0);
        assert colName.getToken().getType() == HiveParser.Identifier : "Expected column name";
        return colName;
    }

    private Map<String, ASTNode> collectSetColumnsAndExpressions(ASTNode setClause, Set<String> setRCols,
            Table targetTable) throws SemanticException {
        // An update needs to select all of the columns, as we rewrite the entire row.  Also,
        // we need to figure out which columns we are going to replace.
        assert setClause.getToken()
                .getType() == HiveParser.TOK_SET_COLUMNS_CLAUSE : "Expected second child of update token to be set token";

        // Get the children of the set clause, each of which should be a column assignment
        List<? extends Node> assignments = setClause.getChildren();
        // Must be deterministic order map for consistent q-test output across Java versions
        Map<String, ASTNode> setCols = new LinkedHashMap<String, ASTNode>(assignments.size());
        for (Node a : assignments) {
            ASTNode assignment = (ASTNode) a;
            ASTNode colName = findLHSofAssignment(assignment);
            if (setRCols != null) {
                addSetRCols((ASTNode) assignment.getChildren().get(1), setRCols);
            }
            checkValidSetClauseTarget(colName, targetTable);

            String columnName = normalizeColName(colName.getText());
            // This means that in UPDATE T SET x = _something_
            // _something_ can be whatever is supported in SELECT _something_
            setCols.put(columnName, (ASTNode) assignment.getChildren().get(1));
        }
        return setCols;
    }

    /**
     * @return the Metastore representation of the target table
     */
    private Table getTargetTable(ASTNode tabRef) throws SemanticException {
        return getTable(tabRef, db, true);
    }

    /**
     * @param throwException if false, return null if table doesn't exist, else throw
     */
    private static Table getTable(ASTNode tabRef, Hive db, boolean throwException) throws SemanticException {
        String[] tableName;
        Table mTable;
        switch (tabRef.getType()) {
        case HiveParser.TOK_TABREF:
            tableName = getQualifiedTableName((ASTNode) tabRef.getChild(0));
            break;
        case HiveParser.TOK_TABNAME:
            tableName = getQualifiedTableName(tabRef);
            break;
        default:
            throw raiseWrongType("TOK_TABREF|TOK_TABNAME", tabRef);
        }
        try {
            mTable = db.getTable(tableName[0], tableName[1], throwException);
        } catch (InvalidTableException e) {
            LOG.error("Failed to find table " + getDotName(tableName) + " got exception " + e.getMessage());
            throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(getDotName(tableName)), e);
        } catch (HiveException e) {
            LOG.error("Failed to find table " + getDotName(tableName) + " got exception " + e.getMessage());
            throw new SemanticException(e.getMessage(), e);
        }
        return mTable;
    }

    // Walk through all our inputs and set them to note that this read is part of an update or a
    // delete.
    private void markReadEntityForUpdate() {
        for (ReadEntity input : inputs) {
            if (isWritten(input)) {
                //todo: this is actually not adding anything since LockComponent uses a Trie to "promote" a lock
                //except by accident - when we have a partitioned target table we have a ReadEntity and WriteEntity
                //for the table, so we mark ReadEntity and then delete WriteEntity (replace with Partition entries)
                //so DbTxnManager skips Read lock on the ReadEntity....
                input.setUpdateOrDelete(true);//input.noLockNeeded()?
            }
        }
    }

    /**
     *  For updates, we need to set the column access info so that it contains information on
     *  the columns we are updating.
     *  (But not all the columns of the target table even though the rewritten query writes
     *  all columns of target table since that is an implmentation detail)
     */
    private void setUpAccessControlInfoForUpdate(Table mTable, Map<String, ASTNode> setCols) {
        ColumnAccessInfo cai = new ColumnAccessInfo();
        for (String colName : setCols.keySet()) {
            cai.add(Table.getCompleteName(mTable.getDbName(), mTable.getTableName()), colName);
        }
        setUpdateColumnAccessInfo(cai);
    }

    /**
     * We need to weed ROW__ID out of the input column info, as it doesn't make any sense to
     * require the user to have authorization on that column.
     */
    private void cleanUpMetaColumnAccessControl() {
        //we do this for Update/Delete (incl Merge) because we introduce this column into the query
        //as part of rewrite
        if (columnAccessInfo != null) {
            columnAccessInfo.stripVirtualColumn(VirtualColumn.ROWID);
        }
    }

    /**
     * Parse the newly generated SQL statement to get a new AST
     */
    private ReparseResult parseRewrittenQuery(StringBuilder rewrittenQueryStr, String originalQuery)
            throws SemanticException {
        // Set dynamic partitioning to nonstrict so that queries do not need any partition
        // references.
        // todo: this may be a perf issue as it prevents the optimizer.. or not
        HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
        // Disable LLAP IO wrapper; doesn't propagate extra ACID columns correctly.
        HiveConf.setBoolVar(conf, ConfVars.LLAP_IO_ROW_WRAPPER_ENABLED, false);
        // Parse the rewritten query string
        Context rewrittenCtx;
        try {
            rewrittenCtx = new Context(conf);
            rewrittenCtx.setHDFSCleanup(true);
            // We keep track of all the contexts that are created by this query
            // so we can clear them when we finish execution
            ctx.addRewrittenStatementContext(rewrittenCtx);
        } catch (IOException e) {
            throw new SemanticException(ErrorMsg.UPDATEDELETE_IO_ERROR.getMsg());
        }
        rewrittenCtx.setExplainConfig(ctx.getExplainConfig());
        rewrittenCtx.setExplainPlan(ctx.isExplainPlan());
        rewrittenCtx.setStatsSource(ctx.getStatsSource());
        rewrittenCtx.setPlanMapper(ctx.getPlanMapper());
        rewrittenCtx.setIsUpdateDeleteMerge(true);
        rewrittenCtx.setCmd(rewrittenQueryStr.toString());

        ASTNode rewrittenTree;
        try {
            LOG.info("Going to reparse <" + originalQuery + "> as \n<" + rewrittenQueryStr.toString() + ">");
            rewrittenTree = ParseUtils.parse(rewrittenQueryStr.toString(), rewrittenCtx);
        } catch (ParseException e) {
            throw new SemanticException(ErrorMsg.UPDATEDELETE_PARSE_ERROR.getMsg(), e);
        }
        return new ReparseResult(rewrittenTree, rewrittenCtx);
    }

    /**
     * Assert it supports Acid write
     */
    private void validateTargetTable(Table mTable) throws SemanticException {
        if (mTable.getTableType() == TableType.VIRTUAL_VIEW
                || mTable.getTableType() == TableType.MATERIALIZED_VIEW) {
            LOG.error("Table " + mTable.getFullyQualifiedName() + " is a view or materialized view");
            throw new SemanticException(ErrorMsg.UPDATE_DELETE_VIEW.getMsg());
        }
    }

    /**
     * This supports update and delete statements
     */
    private void reparseAndSuperAnalyze(ASTNode tree) throws SemanticException {
        List<? extends Node> children = tree.getChildren();
        // The first child should be the table we are deleting from
        ASTNode tabName = (ASTNode) children.get(0);
        assert tabName.getToken().getType() == HiveParser.TOK_TABNAME : "Expected tablename as first child of "
                + operation() + " but found " + tabName.getName();

        // Rewrite the delete or update into an insert.  Crazy, but it works as deletes and update
        // actually are inserts into the delta file in Hive.  A delete
        // DELETE FROM _tablename_ [WHERE ...]
        // will be rewritten as
        // INSERT INTO TABLE _tablename_ [PARTITION (_partcols_)] SELECT ROW__ID[,
        // _partcols_] from _tablename_ SORT BY ROW__ID
        // An update
        // UPDATE _tablename_ SET x = _expr_ [WHERE...]
        // will be rewritten as
        // INSERT INTO TABLE _tablename_ [PARTITION (_partcols_)] SELECT _all_,
        // _partcols_from _tablename_ SORT BY ROW__ID
        // where _all_ is all the non-partition columns.  The expressions from the set clause will be
        // re-attached later.
        // The where clause will also be re-attached later.
        // The sort by clause is put in there so that records come out in the right order to enable
        // merge on read.

        StringBuilder rewrittenQueryStr = new StringBuilder();
        Table mTable = getTargetTable(tabName);
        validateTargetTable(mTable);

        rewrittenQueryStr.append("insert into table ");
        rewrittenQueryStr.append(getFullTableNameForSQL(tabName));

        addPartitionColsToInsert(mTable.getPartCols(), rewrittenQueryStr);

        rewrittenQueryStr.append(" select ROW__ID");

        Map<Integer, ASTNode> setColExprs = null;
        Map<String, ASTNode> setCols = null;
        // Must be deterministic order set for consistent q-test output across Java versions
        Set<String> setRCols = new LinkedHashSet<String>();
        if (updating()) {
            // We won't write the set
            // expressions in the rewritten query.  We'll patch that up later.
            // The set list from update should be the second child (index 1)
            assert children.size() >= 2 : "Expected update token to have at least two children";
            ASTNode setClause = (ASTNode) children.get(1);
            setCols = collectSetColumnsAndExpressions(setClause, setRCols, mTable);
            setColExprs = new HashMap<>(setClause.getChildCount());

            List<FieldSchema> nonPartCols = mTable.getCols();
            for (int i = 0; i < nonPartCols.size(); i++) {
                rewrittenQueryStr.append(',');
                String name = nonPartCols.get(i).getName();
                ASTNode setCol = setCols.get(name);
                rewrittenQueryStr.append(HiveUtils.unparseIdentifier(name, this.conf));
                if (setCol != null) {
                    // This is one of the columns we're setting, record it's position so we can come back
                    // later and patch it up.
                    // Add one to the index because the select has the ROW__ID as the first column.
                    setColExprs.put(i + 1, setCol);
                }
            }
        }

        addPartitionColsToSelect(mTable.getPartCols(), rewrittenQueryStr, null);
        rewrittenQueryStr.append(" from ");
        rewrittenQueryStr.append(getFullTableNameForSQL(tabName));

        ASTNode where = null;
        int whereIndex = deleting() ? 1 : 2;
        if (children.size() > whereIndex) {
            where = (ASTNode) children.get(whereIndex);
            assert where.getToken().getType() == HiveParser.TOK_WHERE : "Expected where clause, but found "
                    + where.getName();
        }

        // Add a sort by clause so that the row ids come out in the correct order
        rewrittenQueryStr.append(" sort by ROW__ID ");

        ReparseResult rr = parseRewrittenQuery(rewrittenQueryStr, ctx.getCmd());
        Context rewrittenCtx = rr.rewrittenCtx;
        ASTNode rewrittenTree = rr.rewrittenTree;

        ASTNode rewrittenInsert = (ASTNode) rewrittenTree.getChildren().get(1);
        assert rewrittenInsert.getToken()
                .getType() == HiveParser.TOK_INSERT : "Expected TOK_INSERT as second child of TOK_QUERY but found "
                        + rewrittenInsert.getName();

        if (updating()) {
            rewrittenCtx.setOperation(Context.Operation.UPDATE);
            rewrittenCtx.addDestNamePrefix(1, Context.DestClausePrefix.UPDATE);
        } else if (deleting()) {
            rewrittenCtx.setOperation(Context.Operation.DELETE);
            rewrittenCtx.addDestNamePrefix(1, Context.DestClausePrefix.DELETE);
        }

        if (where != null) {
            // The structure of the AST for the rewritten insert statement is:
            // TOK_QUERY -> TOK_FROM
            //          \-> TOK_INSERT -> TOK_INSERT_INTO
            //                        \-> TOK_SELECT
            //                        \-> TOK_SORTBY
            // The following adds the TOK_WHERE and its subtree from the original query as a child of
            // TOK_INSERT, which is where it would have landed if it had been there originally in the
            // string.  We do it this way because it's easy then turning the original AST back into a
            // string and reparsing it.  We have to move the SORT_BY over one,
            // so grab it and then push it to the second slot, and put the where in the first slot
            ASTNode sortBy = (ASTNode) rewrittenInsert.getChildren().get(2);
            assert sortBy.getToken()
                    .getType() == HiveParser.TOK_SORTBY : "Expected TOK_SORTBY to be first child of TOK_SELECT, but found "
                            + sortBy.getName();
            rewrittenInsert.addChild(sortBy);
            rewrittenInsert.setChild(2, where);
        }

        // Patch up the projection list for updates, putting back the original set expressions.
        if (updating() && setColExprs != null) {
            // Walk through the projection list and replace the column names with the
            // expressions from the original update.  Under the TOK_SELECT (see above) the structure
            // looks like:
            // TOK_SELECT -> TOK_SELEXPR -> expr
            //           \-> TOK_SELEXPR -> expr ...
            ASTNode rewrittenSelect = (ASTNode) rewrittenInsert.getChildren().get(1);
            assert rewrittenSelect.getToken()
                    .getType() == HiveParser.TOK_SELECT : "Expected TOK_SELECT as second child of TOK_INSERT but found "
                            + rewrittenSelect.getName();
            for (Map.Entry<Integer, ASTNode> entry : setColExprs.entrySet()) {
                ASTNode selExpr = (ASTNode) rewrittenSelect.getChildren().get(entry.getKey());
                assert selExpr.getToken()
                        .getType() == HiveParser.TOK_SELEXPR : "Expected child of TOK_SELECT to be TOK_SELEXPR but was "
                                + selExpr.getName();
                // Now, change it's child
                selExpr.setChild(0, entry.getValue());
            }
        }

        try {
            useSuper = true;
            super.analyze(rewrittenTree, rewrittenCtx);
        } finally {
            useSuper = false;
        }

        updateOutputs(mTable);

        if (updating()) {
            setUpAccessControlInfoForUpdate(mTable, setCols);

            // Add the setRCols to the input list
            for (String colName : setRCols) {
                if (columnAccessInfo != null) {//assuming this means we are not doing Auth
                    columnAccessInfo.add(Table.getCompleteName(mTable.getDbName(), mTable.getTableName()), colName);
                }
            }
        }
    }

    /**
     * Check that {@code readEntity} is also being written
     */
    private boolean isWritten(Entity readEntity) {
        for (Entity writeEntity : outputs) {
            //make sure to compare them as Entity, i.e. that it's the same table or partition, etc
            if (writeEntity.toString().equalsIgnoreCase(readEntity.toString())) {
                return true;
            }
        }
        return false;
    }

    private String operation() {
        if (currentOperation == Context.Operation.OTHER) {
            throw new IllegalStateException(
                    "UpdateDeleteSemanticAnalyzer neither updating nor " + "deleting, operation not known.");
        }
        return currentOperation.toString();
    }

    // This method finds any columns on the right side of a set statement (thus rcols) and puts them
    // in a set so we can add them to the list of input cols to check.
    private void addSetRCols(ASTNode node, Set<String> setRCols) {

        // See if this node is a TOK_TABLE_OR_COL.  If so, find the value and put it in the list.  If
        // not, recurse on any children
        if (node.getToken().getType() == HiveParser.TOK_TABLE_OR_COL) {
            ASTNode colName = (ASTNode) node.getChildren().get(0);
            assert colName.getToken().getType() == HiveParser.Identifier : "Expected column name";
            setRCols.add(normalizeColName(colName.getText()));
        } else if (node.getChildren() != null) {
            for (Node n : node.getChildren()) {
                addSetRCols((ASTNode) n, setRCols);
            }
        }
    }

    /**
     * Column names are stored in metastore in lower case, regardless of the CREATE TABLE statement.
     * Unfortunately there is no single place that normalizes the input query.
     * @param colName not null
     */
    private static String normalizeColName(String colName) {
        return colName.toLowerCase();
    }

    private Context.Operation currentOperation = Context.Operation.OTHER;
    private static final String Indent = "  ";

    private IdentifierQuoter quotedIdenfierHelper;

    /**
     * This allows us to take an arbitrary ASTNode and turn it back into SQL that produced it.
     * Since HiveLexer.g is written such that it strips away any ` (back ticks) around
     * quoted identifiers we need to add those back to generated SQL.
     * Additionally, the parser only produces tokens of type Identifier and never
     * QuotedIdentifier (HIVE-6013).  So here we just quote all identifiers.
     * (') around String literals are retained w/o issues
     */
    private static class IdentifierQuoter {
        private final TokenRewriteStream trs;
        private final IdentityHashMap<ASTNode, ASTNode> visitedNodes = new IdentityHashMap<>();

        IdentifierQuoter(TokenRewriteStream trs) {
            this.trs = trs;
            if (trs == null) {
                throw new IllegalArgumentException("Must have a TokenRewriteStream");
            }
        }

        private void visit(ASTNode n) {
            if (n.getType() == HiveParser.Identifier) {
                if (visitedNodes.containsKey(n)) {
                    /**
                     * Since we are modifying the stream, it's not idempotent.  Ideally, the caller would take
                     * care to only quote Identifiers in each subtree once, but this makes it safe
                     */
                    return;
                }
                visitedNodes.put(n, n);
                trs.insertBefore(n.getToken(), "`");
                trs.insertAfter(n.getToken(), "`");
            }
            if (n.getChildCount() <= 0) {
                return;
            }
            for (Node c : n.getChildren()) {
                visit((ASTNode) c);
            }
        }
    }

    /**
     * This allows us to take an arbitrary ASTNode and turn it back into SQL that produced it without
     * needing to understand what it is (except for QuotedIdentifiers)
     *
     */
    private String getMatchedText(ASTNode n) {
        quotedIdenfierHelper.visit(n);
        return ctx.getTokenRewriteStream().toString(n.getTokenStartIndex(), n.getTokenStopIndex() + 1).trim();
    }

    /**
     * Here we take a Merge statement AST and generate a semantically equivalent multi-insert
     * statement to execute.  Each Insert leg represents a single WHEN clause.  As much as possible,
     * the new SQL statement is made to look like the input SQL statement so that it's easier to map
     * Query Compiler errors from generated SQL to original one this way.
     * The generated SQL is a complete representation of the original input for the same reason.
     * In many places SemanticAnalyzer throws exceptions that contain (line, position) coordinates.
     * If generated SQL doesn't have everything and is patched up later, these coordinates point to
     * the wrong place.
     *
     * @throws SemanticException
     */
    private void analyzeMerge(ASTNode tree) throws SemanticException {
        currentOperation = Context.Operation.MERGE;
        quotedIdenfierHelper = new IdentifierQuoter(ctx.getTokenRewriteStream());
        /*
         * See org.apache.hadoop.hive.ql.parse.TestMergeStatement for some examples of the merge AST
          For example, given:
          merge into acidTbl using nonAcidPart2 source ON acidTbl.a = source.a2
          WHEN MATCHED THEN UPDATE set b = source.b2
          WHEN NOT MATCHED THEN INSERT VALUES(source.a2, source.b2)
            
          We get AST like this:
          "(tok_merge " +
            "(tok_tabname acidtbl) (tok_tabref (tok_tabname nonacidpart2) source) " +
            "(= (. (tok_table_or_col acidtbl) a) (. (tok_table_or_col source) a2)) " +
            "(tok_matched " +
            "(tok_update " +
            "(tok_set_columns_clause (= (tok_table_or_col b) (. (tok_table_or_col source) b2))))) " +
            "(tok_not_matched " +
            "tok_insert " +
            "(tok_value_row (. (tok_table_or_col source) a2) (. (tok_table_or_col source) b2))))");
            
            And need to produce a multi-insert like this to execute:
            FROM acidTbl right outer join nonAcidPart2 ON acidTbl.a = source.a2
            Insert into table acidTbl select nonAcidPart2.a2, nonAcidPart2.b2 where acidTbl.a is null
            INSERT INTO TABLE acidTbl select target.ROW__ID, nonAcidPart2.a2, nonAcidPart2.b2 where nonAcidPart2.a2=acidTbl.a sort by acidTbl.ROW__ID
        */
        /*todo: we need some sort of validation phase over original AST to make things user friendly; for example, if
         original command refers to a column that doesn't exist, this will be caught when processing the rewritten query but
         the errors will point at locations that the user can't map to anything
         - VALUES clause must have the same number of values as target table (including partition cols).  Part cols go last in Select clause of Insert as Select
         todo: do we care to preserve comments in original SQL?
         todo: check if identifiers are propertly escaped/quoted in the generated SQL - it's currently inconsistent
          Look at UnparseTranslator.addIdentifierTranslation() - it does unescape + unparse...
         todo: consider "WHEN NOT MATCHED BY SOURCE THEN UPDATE SET TargetTable.Col1 = SourceTable.Col1 "; what happens when source is empty?  This should be a runtime error - maybe not
          the outer side of ROJ is empty => the join produces 0 rows.  If supporting WHEN NOT MATCHED BY SOURCE, then this should be a runtime error
        */
        ASTNode target = (ASTNode) tree.getChild(0);
        ASTNode source = (ASTNode) tree.getChild(1);
        String targetName = getSimpleTableName(target);
        String sourceName = getSimpleTableName(source);
        ASTNode onClause = (ASTNode) tree.getChild(2);
        String onClauseAsText = getMatchedText(onClause);

        int whenClauseBegins = 3;
        boolean hasHint = false;
        // query hint
        ASTNode qHint = (ASTNode) tree.getChild(3);
        if (qHint.getType() == HiveParser.QUERY_HINT) {
            hasHint = true;
            whenClauseBegins++;
        }
        Table targetTable = getTargetTable(target);
        validateTargetTable(targetTable);
        List<ASTNode> whenClauses = findWhenClauses(tree, whenClauseBegins);

        StringBuilder rewrittenQueryStr = new StringBuilder("FROM\n");

        rewrittenQueryStr.append(Indent).append(getFullTableNameForSQL(target));
        if (isAliased(target)) {
            rewrittenQueryStr.append(" ").append(targetName);
        }
        rewrittenQueryStr.append('\n');
        rewrittenQueryStr.append(Indent).append(chooseJoinType(whenClauses)).append("\n");
        if (source.getType() == HiveParser.TOK_SUBQUERY) {
            //this includes the mandatory alias
            rewrittenQueryStr.append(Indent).append(getMatchedText(source));
        } else {
            rewrittenQueryStr.append(Indent).append(getFullTableNameForSQL(source));
            if (isAliased(source)) {
                rewrittenQueryStr.append(" ").append(sourceName);
            }
        }
        rewrittenQueryStr.append('\n');
        rewrittenQueryStr.append(Indent).append("ON ").append(onClauseAsText).append('\n');

        // Add the hint if any
        String hintStr = null;
        if (hasHint) {
            hintStr = " /*+ " + qHint.getText() + " */ ";
        }

        /**
         * We allow at most 2 WHEN MATCHED clause, in which case 1 must be Update the other Delete
         * If we have both update and delete, the 1st one (in SQL code) must have "AND <extra predicate>"
         * so that the 2nd can ensure not to process the same rows.
         * Update and Delete may be in any order.  (Insert is always last)
         */
        String extraPredicate = null;
        int numWhenMatchedUpdateClauses = 0, numWhenMatchedDeleteClauses = 0;
        int numInsertClauses = 0;
        boolean hintProcessed = false;
        for (ASTNode whenClause : whenClauses) {
            switch (getWhenClauseOperation(whenClause).getType()) {
            case HiveParser.TOK_INSERT:
                numInsertClauses++;
                handleInsert(whenClause, rewrittenQueryStr, target, onClause, targetTable, targetName,
                        onClauseAsText, hintProcessed ? null : hintStr);
                hintProcessed = true;
                break;
            case HiveParser.TOK_UPDATE:
                numWhenMatchedUpdateClauses++;
                String s = handleUpdate(whenClause, rewrittenQueryStr, target, onClauseAsText, targetTable,
                        extraPredicate, hintProcessed ? null : hintStr);
                hintProcessed = true;
                if (numWhenMatchedUpdateClauses + numWhenMatchedDeleteClauses == 1) {
                    extraPredicate = s;//i.e. it's the 1st WHEN MATCHED
                }
                break;
            case HiveParser.TOK_DELETE:
                numWhenMatchedDeleteClauses++;
                String s1 = handleDelete(whenClause, rewrittenQueryStr, target, onClauseAsText, targetTable,
                        extraPredicate, hintProcessed ? null : hintStr);
                hintProcessed = true;
                if (numWhenMatchedUpdateClauses + numWhenMatchedDeleteClauses == 1) {
                    extraPredicate = s1;//i.e. it's the 1st WHEN MATCHED
                }
                break;
            default:
                throw new IllegalStateException(
                        "Unexpected WHEN clause type: " + whenClause.getType() + addParseInfo(whenClause));
            }
            if (numWhenMatchedDeleteClauses > 1) {
                throw new SemanticException(ErrorMsg.MERGE_TOO_MANY_DELETE, ctx.getCmd());
            }
            if (numWhenMatchedUpdateClauses > 1) {
                throw new SemanticException(ErrorMsg.MERGE_TOO_MANY_UPDATE, ctx.getCmd());
            }
            assert numInsertClauses < 2 : "too many Insert clauses";
        }
        if (numWhenMatchedDeleteClauses + numWhenMatchedUpdateClauses == 2 && extraPredicate == null) {
            throw new SemanticException(ErrorMsg.MERGE_PREDIACTE_REQUIRED, ctx.getCmd());
        }

        boolean validating = handleCardinalityViolation(rewrittenQueryStr, target, onClauseAsText, targetTable,
                numWhenMatchedDeleteClauses == 0 && numWhenMatchedUpdateClauses == 0);
        ReparseResult rr = parseRewrittenQuery(rewrittenQueryStr, ctx.getCmd());
        Context rewrittenCtx = rr.rewrittenCtx;
        ASTNode rewrittenTree = rr.rewrittenTree;
        rewrittenCtx.setOperation(Context.Operation.MERGE);

        //set dest name mapping on new context; 1st chid is TOK_FROM
        for (int insClauseIdx = 1, whenClauseIdx = 0; insClauseIdx < rewrittenTree.getChildCount()
                - (validating ? 1 : 0/*skip cardinality violation clause*/); insClauseIdx++, whenClauseIdx++) {
            //we've added Insert clauses in order or WHEN items in whenClauses
            switch (getWhenClauseOperation(whenClauses.get(whenClauseIdx)).getType()) {
            case HiveParser.TOK_INSERT:
                rewrittenCtx.addDestNamePrefix(insClauseIdx, Context.DestClausePrefix.INSERT);
                break;
            case HiveParser.TOK_UPDATE:
                rewrittenCtx.addDestNamePrefix(insClauseIdx, Context.DestClausePrefix.UPDATE);
                break;
            case HiveParser.TOK_DELETE:
                rewrittenCtx.addDestNamePrefix(insClauseIdx, Context.DestClausePrefix.DELETE);
                break;
            default:
                assert false;
            }
        }
        if (validating) {
            //here means the last branch of the multi-insert is Cardinality Validation
            rewrittenCtx.addDestNamePrefix(rewrittenTree.getChildCount() - 1, Context.DestClausePrefix.INSERT);
        }

        try {
            useSuper = true;
            super.analyze(rewrittenTree, rewrittenCtx);
        } finally {
            useSuper = false;
        }
        updateOutputs(targetTable);
    }

    /**
     * SemanticAnalyzer will generate a WriteEntity for the target table since it doesn't know/check
     * if the read and write are of the same table in "insert ... select ....".  Since DbTxnManager
     * uses Read/WriteEntity objects to decide which locks to acquire, we get more concurrency if we
     * have change the table WriteEntity to a set of partition WriteEntity objects based on
     * ReadEntity objects computed for this table.
     */
    private void updateOutputs(Table targetTable) {
        markReadEntityForUpdate();

        if (targetTable.isPartitioned()) {
            List<ReadEntity> partitionsRead = getRestrictedPartitionSet(targetTable);
            if (!partitionsRead.isEmpty()) {
                //if there is WriteEntity with WriteType=UPDATE/DELETE for target table, replace it with
                //WriteEntity for each partition
                List<WriteEntity> toRemove = new ArrayList<>();
                for (WriteEntity we : outputs) {
                    WriteEntity.WriteType wt = we.getWriteType();
                    if (isTargetTable(we, targetTable)
                            && (wt == WriteEntity.WriteType.UPDATE || wt == WriteEntity.WriteType.DELETE)) {
                        /**
                         * The assumption here is that SemanticAnalyzer will will generate ReadEntity for each
                         * partition that exists and is matched by the WHERE clause (which may be all of them).
                         * Since we don't allow updating the value of a partition column, we know that we always
                         * write the same (or fewer) partitions than we read.  Still, the write is a Dynamic
                         * Partition write - see HIVE-15032.
                         */
                        toRemove.add(we);
                    }
                }
                outputs.removeAll(toRemove);
                // TODO: why is this like that?
                for (ReadEntity re : partitionsRead) {
                    for (WriteEntity original : toRemove) {
                        //since we may have both Update and Delete branches, Auth needs to know
                        WriteEntity we = new WriteEntity(re.getPartition(), original.getWriteType());
                        we.setDynamicPartitionWrite(original.isDynamicPartitionWrite());
                        outputs.add(we);
                    }
                }
            }
        }
    }

    /**
     * If the optimizer has determined that it only has to read some of the partitions of the
     * target table to satisfy the query, then we know that the write side of update/delete
     * (and update/delete parts of merge)
     * can only write (at most) that set of partitions (since we currently don't allow updating
     * partition (or bucket) columns).  So we want to replace the table level
     * WriteEntity in the outputs with WriteEntity for each of these partitions
     * ToDo: see if this should be moved to SemanticAnalyzer itself since it applies to any
     * insert which does a select against the same table.  Then SemanticAnalyzer would also
     * be able to not use DP for the Insert...
     *
     * Note that the Insert of Merge may be creating new partitions and writing to partitions
     * which were not read  (WHEN NOT MATCHED...).  WriteEntity for that should be created
     * in MoveTask (or some other task after the query is complete)
     */
    private List<ReadEntity> getRestrictedPartitionSet(Table targetTable) {
        List<ReadEntity> partitionsRead = new ArrayList<>();
        for (ReadEntity re : inputs) {
            if (re.isFromTopLevelQuery && re.getType() == Entity.Type.PARTITION && isTargetTable(re, targetTable)) {
                partitionsRead.add(re);
            }
        }
        return partitionsRead;
    }

    /**
     * if there is no WHEN NOT MATCHED THEN INSERT, we don't outer join
     */
    private String chooseJoinType(List<ASTNode> whenClauses) {
        for (ASTNode whenClause : whenClauses) {
            if (getWhenClauseOperation(whenClause).getType() == HiveParser.TOK_INSERT) {
                return "RIGHT OUTER JOIN";
            }
        }
        return "INNER JOIN";
    }

    /**
     * does this Entity belong to target table (partition)
     */
    private boolean isTargetTable(Entity entity, Table targetTable) {
        //todo: https://issues.apache.org/jira/browse/HIVE-15048
        /**
         * is this the right way to compare?  Should it just compare paths?
         * equals() impl looks heavy weight
         */
        return targetTable.equals(entity.getTable());
    }

    /**
     * Per SQL Spec ISO/IEC 9075-2:2011(E) Section 14.2 under "General Rules" Item 6/Subitem a/Subitem 2/Subitem B,
     * an error should be raised if > 1 row of "source" matches the same row in "target".
     * This should not affect the runtime of the query as it's running in parallel with other
     * branches of the multi-insert.  It won't actually write any data to merge_tmp_table since the
     * cardinality_violation() UDF throws an error whenever it's called killing the query
     * @return true if another Insert clause was added
     */
    private boolean handleCardinalityViolation(StringBuilder rewrittenQueryStr, ASTNode target,
            String onClauseAsString, Table targetTable, boolean onlyHaveWhenNotMatchedClause)
            throws SemanticException {
        if (!conf.getBoolVar(HiveConf.ConfVars.MERGE_CARDINALITY_VIOLATION_CHECK)) {
            LOG.info("Merge statement cardinality violation check is disabled: "
                    + HiveConf.ConfVars.MERGE_CARDINALITY_VIOLATION_CHECK.varname);
            return false;
        }
        if (onlyHaveWhenNotMatchedClause) {
            //if no update or delete in Merge, there is no need to to do cardinality check
            return false;
        }
        //this is a tmp table and thus Session scoped and acid requires SQL statement to be serial in a
        // given session, i.e. the name can be fixed across all invocations
        String tableName = "merge_tmp_table";
        rewrittenQueryStr.append("\nINSERT INTO ").append(tableName).append("\n  SELECT cardinality_violation(")
                .append(getSimpleTableName(target)).append(".ROW__ID");
        addPartitionColsToSelect(targetTable.getPartCols(), rewrittenQueryStr, target);

        rewrittenQueryStr.append(")\n WHERE ").append(onClauseAsString).append(" GROUP BY ")
                .append(getSimpleTableName(target)).append(".ROW__ID");

        addPartitionColsToSelect(targetTable.getPartCols(), rewrittenQueryStr, target);

        rewrittenQueryStr.append(" HAVING count(*) > 1");
        //say table T has partition p, we are generating
        //select cardinality_violation(ROW_ID, p) WHERE ... GROUP BY ROW__ID, p
        //the Group By args are passed to cardinality_violation to add the violating value to the error msg
        try {
            if (null == db.getTable(tableName, false)) {
                StorageFormat format = new StorageFormat(conf);
                format.processStorageFormat("TextFile");
                Table table = db.newTable(tableName);
                table.setSerializationLib(format.getSerde());
                List<FieldSchema> fields = new ArrayList<FieldSchema>();
                fields.add(new FieldSchema("val", "int", null));
                table.setFields(fields);
                table.setDataLocation(
                        Warehouse.getDnsPath(new Path(SessionState.get().getTempTableSpace(), tableName), conf));
                table.getTTable().setTemporary(true);
                table.setStoredAsSubDirectories(false);
                table.setInputFormatClass(format.getInputFormat());
                table.setOutputFormatClass(format.getOutputFormat());
                db.createTable(table, true);
            }
        } catch (HiveException | MetaException e) {
            throw new SemanticException(e.getMessage(), e);
        }
        return true;
    }

    /**
     * @param onClauseAsString - because there is no clone() and we need to use in multiple places
     * @param deleteExtraPredicate - see notes at caller
     */
    private String handleUpdate(ASTNode whenMatchedUpdateClause, StringBuilder rewrittenQueryStr, ASTNode target,
            String onClauseAsString, Table targetTable, String deleteExtraPredicate, String hintStr)
            throws SemanticException {
        assert whenMatchedUpdateClause.getType() == HiveParser.TOK_MATCHED;
        assert getWhenClauseOperation(whenMatchedUpdateClause).getType() == HiveParser.TOK_UPDATE;
        String targetName = getSimpleTableName(target);
        rewrittenQueryStr.append("INSERT INTO ").append(getFullTableNameForSQL(target));
        addPartitionColsToInsert(targetTable.getPartCols(), rewrittenQueryStr);
        rewrittenQueryStr.append("    -- update clause\n SELECT ");
        if (hintStr != null) {
            rewrittenQueryStr.append(hintStr);
        }
        rewrittenQueryStr.append(targetName).append(".ROW__ID");

        ASTNode setClause = (ASTNode) getWhenClauseOperation(whenMatchedUpdateClause).getChild(0);
        //columns being updated -> update expressions; "setRCols" (last param) is null because we use actual expressions
        //before reparsing, i.e. they are known to SemanticAnalyzer logic
        Map<String, ASTNode> setColsExprs = collectSetColumnsAndExpressions(setClause, null, targetTable);
        //if target table has cols c1,c2,c3 and p1 partition col and we had "SET c2 = 5, c1 = current_date()" we want to end up with
        //insert into target (p1) select current_date(), 5, c3, p1 where ....
        //since we take the RHS of set exactly as it was in Input, we don't need to deal with quoting/escaping column/table names
        List<FieldSchema> nonPartCols = targetTable.getCols();
        for (FieldSchema fs : nonPartCols) {
            rewrittenQueryStr.append(", ");
            String name = fs.getName();
            if (setColsExprs.containsKey(name)) {
                String rhsExp = getMatchedText(setColsExprs.get(name));
                //"set a=5, b=8" - rhsExp picks up the next char (e.g. ',') from the token stream
                switch (rhsExp.charAt(rhsExp.length() - 1)) {
                case ',':
                case '\n':
                    rhsExp = rhsExp.substring(0, rhsExp.length() - 1);
                }
                rewrittenQueryStr.append(rhsExp);
            } else {
                rewrittenQueryStr.append(getSimpleTableName(target)).append(".")
                        .append(HiveUtils.unparseIdentifier(name, this.conf));
            }
        }
        addPartitionColsToSelect(targetTable.getPartCols(), rewrittenQueryStr, target);
        rewrittenQueryStr.append("\n   WHERE ").append(onClauseAsString);
        String extraPredicate = getWhenClausePredicate(whenMatchedUpdateClause);
        if (extraPredicate != null) {
            //we have WHEN MATCHED AND <boolean expr> THEN DELETE
            rewrittenQueryStr.append(" AND ").append(extraPredicate);
        }
        if (deleteExtraPredicate != null) {
            rewrittenQueryStr.append(" AND NOT(").append(deleteExtraPredicate).append(")");
        }
        rewrittenQueryStr.append("\n SORT BY ");
        rewrittenQueryStr.append(targetName).append(".ROW__ID \n");

        setUpAccessControlInfoForUpdate(targetTable, setColsExprs);
        //we don't deal with columns on RHS of SET expression since the whole expr is part of the
        //rewritten SQL statement and is thus handled by SemanticAnalzyer.  Nor do we have to
        //figure which cols on RHS are from source and which from target

        return extraPredicate;
    }

    /**
     * @param onClauseAsString - because there is no clone() and we need to use in multiple places
     * @param updateExtraPredicate - see notes at caller
     */
    private String handleDelete(ASTNode whenMatchedDeleteClause, StringBuilder rewrittenQueryStr, ASTNode target,
            String onClauseAsString, Table targetTable, String updateExtraPredicate, String hintStr)
            throws SemanticException {
        assert whenMatchedDeleteClause.getType() == HiveParser.TOK_MATCHED;
        assert getWhenClauseOperation(whenMatchedDeleteClause).getType() == HiveParser.TOK_DELETE;
        List<FieldSchema> partCols = targetTable.getPartCols();
        String targetName = getSimpleTableName(target);
        rewrittenQueryStr.append("INSERT INTO ").append(getFullTableNameForSQL(target));
        addPartitionColsToInsert(partCols, rewrittenQueryStr);

        rewrittenQueryStr.append("    -- delete clause\n SELECT ");
        if (hintStr != null) {
            rewrittenQueryStr.append(hintStr);
        }
        rewrittenQueryStr.append(targetName).append(".ROW__ID ");
        addPartitionColsToSelect(partCols, rewrittenQueryStr, target);
        rewrittenQueryStr.append("\n   WHERE ").append(onClauseAsString);
        String extraPredicate = getWhenClausePredicate(whenMatchedDeleteClause);
        if (extraPredicate != null) {
            //we have WHEN MATCHED AND <boolean expr> THEN DELETE
            rewrittenQueryStr.append(" AND ").append(extraPredicate);
        }
        if (updateExtraPredicate != null) {
            rewrittenQueryStr.append(" AND NOT(").append(updateExtraPredicate).append(")");
        }
        rewrittenQueryStr.append("\n SORT BY ");
        rewrittenQueryStr.append(targetName).append(".ROW__ID \n");
        return extraPredicate;
    }

    private static String addParseInfo(ASTNode n) {
        return " at " + ErrorMsg.renderPosition(n);
    }

    /**
     * Returns the table name to use in the generated query preserving original quotes/escapes if any
     * @see #getFullTableNameForSQL(ASTNode)
     */
    private String getSimpleTableName(ASTNode n) throws SemanticException {
        return HiveUtils.unparseIdentifier(getSimpleTableNameBase(n), this.conf);
    }

    private String getSimpleTableNameBase(ASTNode n) throws SemanticException {
        switch (n.getType()) {
        case HiveParser.TOK_TABREF:
            int aliasIndex = findTabRefIdxs(n)[0];
            if (aliasIndex != 0) {
                return n.getChild(aliasIndex).getText();//the alias
            }
            return getSimpleTableNameBase((ASTNode) n.getChild(0));
        case HiveParser.TOK_TABNAME:
            if (n.getChildCount() == 2) {
                //db.table -> return table
                return n.getChild(1).getText();
            }
            return n.getChild(0).getText();
        case HiveParser.TOK_SUBQUERY:
            return n.getChild(1).getText();//the alias
        default:
            throw raiseWrongType("TOK_TABREF|TOK_TABNAME|TOK_SUBQUERY", n);
        }
    }

    private static final class ReparseResult {
        private final ASTNode rewrittenTree;
        private final Context rewrittenCtx;

        ReparseResult(ASTNode n, Context c) {
            rewrittenTree = n;
            rewrittenCtx = c;
        }
    }

    private boolean isAliased(ASTNode n) {
        switch (n.getType()) {
        case HiveParser.TOK_TABREF:
            return findTabRefIdxs(n)[0] != 0;
        case HiveParser.TOK_TABNAME:
            return false;
        case HiveParser.TOK_SUBQUERY:
            assert n.getChildCount() > 1 : "Expected Derived Table to be aliased";
            return true;
        default:
            throw raiseWrongType("TOK_TABREF|TOK_TABNAME", n);
        }
    }

    /**
     * Collect WHEN clauses from Merge statement AST
     */
    private List<ASTNode> findWhenClauses(ASTNode tree, int start) throws SemanticException {
        assert tree.getType() == HiveParser.TOK_MERGE;
        List<ASTNode> whenClauses = new ArrayList<>();
        for (int idx = start; idx < tree.getChildCount(); idx++) {
            ASTNode whenClause = (ASTNode) tree.getChild(idx);
            assert whenClause.getType() == HiveParser.TOK_MATCHED
                    || whenClause.getType() == HiveParser.TOK_NOT_MATCHED : "Unexpected node type found: "
                            + whenClause.getType() + addParseInfo(whenClause);
            whenClauses.add(whenClause);
        }
        if (whenClauses.size() <= 0) {
            //Futureproofing: the parser will actually not allow this
            throw new SemanticException("Must have at least 1 WHEN clause in MERGE statement");
        }
        return whenClauses;
    }

    private ASTNode getWhenClauseOperation(ASTNode whenClause) {
        if (!(whenClause.getType() == HiveParser.TOK_MATCHED
                || whenClause.getType() == HiveParser.TOK_NOT_MATCHED)) {
            throw raiseWrongType("Expected TOK_MATCHED|TOK_NOT_MATCHED", whenClause);
        }
        return (ASTNode) whenClause.getChild(0);
    }

    /**
     * returns the <boolean predicate> as in WHEN MATCHED AND <boolean predicate> THEN...
     * @return may be null
     */
    private String getWhenClausePredicate(ASTNode whenClause) {
        if (!(whenClause.getType() == HiveParser.TOK_MATCHED
                || whenClause.getType() == HiveParser.TOK_NOT_MATCHED)) {
            throw raiseWrongType("Expected TOK_MATCHED|TOK_NOT_MATCHED", whenClause);
        }
        if (whenClause.getChildCount() == 2) {
            return getMatchedText((ASTNode) whenClause.getChild(1));
        }
        return null;
    }

    /**
     * Generates the Insert leg of the multi-insert SQL to represent WHEN NOT MATCHED THEN INSERT clause
     * @param targetTableNameInSourceQuery - simple name/alias
     * @throws SemanticException
     */
    private void handleInsert(ASTNode whenNotMatchedClause, StringBuilder rewrittenQueryStr, ASTNode target,
            ASTNode onClause, Table targetTable, String targetTableNameInSourceQuery, String onClauseAsString,
            String hintStr) throws SemanticException {
        ASTNode whenClauseOperation = getWhenClauseOperation(whenNotMatchedClause);
        assert whenNotMatchedClause.getType() == HiveParser.TOK_NOT_MATCHED;
        assert whenClauseOperation.getType() == HiveParser.TOK_INSERT;

        // identify the node that contains the values to insert and the optional column list node
        ArrayList<Node> children = whenClauseOperation.getChildren();
        ASTNode valuesNode = (ASTNode) children.stream()
                .filter(n -> ((ASTNode) n).getType() == HiveParser.TOK_FUNCTION).findFirst().get();
        ASTNode columnListNode = (ASTNode) children.stream()
                .filter(n -> ((ASTNode) n).getType() == HiveParser.TOK_TABCOLNAME).findFirst().orElse(null);

        // if column list is specified, then it has to have the same number of elements as the values
        // valuesNode has a child for struct, the rest are the columns
        if (columnListNode != null && columnListNode.getChildCount() != (valuesNode.getChildCount() - 1)) {
            throw new SemanticException(
                    String.format("Column schema must have the same length as values (%d vs %d)",
                            columnListNode.getChildCount(), valuesNode.getChildCount() - 1));
        }

        rewrittenQueryStr.append("INSERT INTO ").append(getFullTableNameForSQL(target));
        if (columnListNode != null) {
            rewrittenQueryStr.append(' ').append(getMatchedText(columnListNode));
        }
        addPartitionColsToInsert(targetTable.getPartCols(), rewrittenQueryStr);

        rewrittenQueryStr.append("    -- insert clause\n  SELECT ");
        if (hintStr != null) {
            rewrittenQueryStr.append(hintStr);
        }

        OnClauseAnalyzer oca = new OnClauseAnalyzer(onClause, targetTable, targetTableNameInSourceQuery, conf,
                onClauseAsString);
        oca.analyze();

        String valuesClause = getMatchedText(valuesNode);
        valuesClause = valuesClause.substring(1, valuesClause.length() - 1);//strip '(' and ')'
        valuesClause = replaceDefaultKeywordForMerge(valuesClause, targetTable, columnListNode);
        rewrittenQueryStr.append(valuesClause).append("\n   WHERE ").append(oca.getPredicate());

        String extraPredicate = getWhenClausePredicate(whenNotMatchedClause);
        if (extraPredicate != null) {
            //we have WHEN NOT MATCHED AND <boolean expr> THEN INSERT
            rewrittenQueryStr.append(" AND ").append(getMatchedText(((ASTNode) whenNotMatchedClause.getChild(1))))
                    .append('\n');
        }
    }

    private String replaceDefaultKeywordForMerge(String valueClause, Table table, ASTNode columnListNode)
            throws SemanticException {
        if (!valueClause.toLowerCase().contains("`default`")) {
            return valueClause;
        }

        Map<String, String> colNameToDefaultConstraint = getColNameToDefaultValueMap(table);
        String[] values = valueClause.trim().split(",");
        String[] replacedValues = new String[values.length];

        // the list of the column names may be set in the query
        String[] columnNames = columnListNode == null
                ? table.getAllCols().stream().map(f -> f.getName()).toArray(size -> new String[size])
                : columnListNode.getChildren().stream().map(n -> ((ASTNode) n).toString())
                        .toArray(size -> new String[size]);

        for (int i = 0; i < values.length; i++) {
            if (values[i].trim().toLowerCase().equals("`default`")) {
                replacedValues[i] = MapUtils.getString(colNameToDefaultConstraint, columnNames[i], "null");
            } else {
                replacedValues[i] = values[i];
            }
        }
        return StringUtils.join(replacedValues, ',');
    }

    /**
     * Suppose the input Merge statement has ON target.a = source.b and c = d.  Assume, that 'c' is from
     * target table and 'd' is from source expression.  In order to properly
     * generate the Insert for WHEN NOT MATCHED THEN INSERT, we need to make sure that the Where
     * clause of this Insert contains "target.a is null and target.c is null"  This ensures that this
     * Insert leg does not receive any rows that are processed by Insert corresponding to
     * WHEN MATCHED THEN ... clauses.  (Implicit in this is a mini resolver that figures out if an
     * unqualified column is part of the target table.  We can get away with this simple logic because
     * we know that target is always a table (as opposed to some derived table).
     * The job of this class is to generate this predicate.
     *
     * Note that is this predicate cannot simply be NOT(on-clause-expr).  IF on-clause-expr evaluates
     * to Unknown, it will be treated as False in the WHEN MATCHED Inserts but NOT(Unknown) = Unknown,
     * and so it will be False for WHEN NOT MATCHED Insert...
     */
    private static final class OnClauseAnalyzer {
        private final ASTNode onClause;
        private final Map<String, List<String>> table2column = new HashMap<>();
        private final List<String> unresolvedColumns = new ArrayList<>();
        private final List<FieldSchema> allTargetTableColumns = new ArrayList<>();
        private final Set<String> tableNamesFound = new HashSet<>();
        private final String targetTableNameInSourceQuery;
        private final HiveConf conf;
        private final String onClauseAsString;

        /**
         * @param targetTableNameInSourceQuery alias or simple name
         */
        OnClauseAnalyzer(ASTNode onClause, Table targetTable, String targetTableNameInSourceQuery, HiveConf conf,
                String onClauseAsString) {
            this.onClause = onClause;
            allTargetTableColumns.addAll(targetTable.getCols());
            allTargetTableColumns.addAll(targetTable.getPartCols());
            this.targetTableNameInSourceQuery = unescapeIdentifier(targetTableNameInSourceQuery);
            this.conf = conf;
            this.onClauseAsString = onClauseAsString;
        }

        /**
         * finds all columns and groups by table ref (if there is one)
         */
        private void visit(ASTNode n) {
            if (n.getType() == HiveParser.TOK_TABLE_OR_COL) {
                ASTNode parent = (ASTNode) n.getParent();
                if (parent != null && parent.getType() == HiveParser.DOT) {
                    //the ref must be a table, so look for column name as right child of DOT
                    if (parent.getParent() != null && parent.getParent().getType() == HiveParser.DOT) {
                        //I don't think this can happen... but just in case
                        throw new IllegalArgumentException(
                                "Found unexpected db.table.col reference in " + onClauseAsString);
                    }
                    addColumn2Table(n.getChild(0).getText(), parent.getChild(1).getText());
                } else {
                    //must be just a column name
                    unresolvedColumns.add(n.getChild(0).getText());
                }
            }
            if (n.getChildCount() == 0) {
                return;
            }
            for (Node child : n.getChildren()) {
                visit((ASTNode) child);
            }
        }

        private void analyze() {
            visit(onClause);
            if (tableNamesFound.size() > 2) {
                throw new IllegalArgumentException(
                        "Found > 2 table refs in ON clause.  Found " + tableNamesFound + " in " + onClauseAsString);
            }
            handleUnresolvedColumns();
            if (tableNamesFound.size() > 2) {
                throw new IllegalArgumentException("Found > 2 table refs in ON clause (incl unresolved).  "
                        + "Found " + tableNamesFound + " in " + onClauseAsString);
            }
        }

        /**
         * Find those that belong to target table
         */
        private void handleUnresolvedColumns() {
            if (unresolvedColumns.isEmpty()) {
                return;
            }
            for (String c : unresolvedColumns) {
                for (FieldSchema fs : allTargetTableColumns) {
                    if (c.equalsIgnoreCase(fs.getName())) {
                        //c belongs to target table; strictly speaking there maybe an ambiguous ref but
                        //this will be caught later when multi-insert is parsed
                        addColumn2Table(targetTableNameInSourceQuery.toLowerCase(), c);
                        break;
                    }
                }
            }
        }

        private void addColumn2Table(String tableName, String columnName) {
            tableName = tableName.toLowerCase();//normalize name for mapping
            tableNamesFound.add(tableName);
            List<String> cols = table2column.get(tableName);
            if (cols == null) {
                cols = new ArrayList<>();
                table2column.put(tableName, cols);
            }
            //we want to preserve 'columnName' as it was in original input query so that rewrite
            //looks as much as possible like original query
            cols.add(columnName);
        }

        /**
         * Now generate the predicate for Where clause
         */
        private String getPredicate() {
            //normilize table name for mapping
            List<String> targetCols = table2column.get(targetTableNameInSourceQuery.toLowerCase());
            if (targetCols == null) {
                /*e.g. ON source.t=1
                * this is not strictly speaking invalid but it does ensure that all columns from target
                * table are all NULL for every row.  This would make any WHEN MATCHED clause invalid since
                * we don't have a ROW__ID.  The WHEN NOT MATCHED could be meaningful but it's just data from
                * source satisfying source.t=1...  not worth the effort to support this*/
                throw new IllegalArgumentException(ErrorMsg.INVALID_TABLE_IN_ON_CLAUSE_OF_MERGE
                        .format(targetTableNameInSourceQuery, onClauseAsString));
            }
            StringBuilder sb = new StringBuilder();
            for (String col : targetCols) {
                if (sb.length() > 0) {
                    sb.append(" AND ");
                }
                //but preserve table name in SQL
                sb.append(HiveUtils.unparseIdentifier(targetTableNameInSourceQuery, conf)).append(".")
                        .append(HiveUtils.unparseIdentifier(col, conf)).append(" IS NULL");
            }
            return sb.toString();
        }
    }
}