org.apache.hadoop.hive.ql.parse.ImportSemanticAnalyzer.java Source code

Introduction

Here is the source code for org.apache.hadoop.hive.ql.parse.ImportSemanticAnalyzer.java
Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.parse;

import java.io.IOException;
import java.io.Serializable;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

import org.antlr.runtime.tree.Tree;
import org.apache.commons.lang.ObjectUtils;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Order;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.QueryState;
import org.apache.hadoop.hive.ql.exec.ReplCopyTask;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.TaskFactory;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.InvalidTableException;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.repl.load.MetaData;
import org.apache.hadoop.hive.ql.plan.AddPartitionDesc;
import org.apache.hadoop.hive.ql.plan.ImportTableDesc;
import org.apache.hadoop.hive.ql.plan.DDLWork;
import org.apache.hadoop.hive.ql.plan.DropTableDesc;
import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
import org.apache.hadoop.hive.ql.plan.MoveWork;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.mapred.OutputFormat;

/**
 * ImportSemanticAnalyzer.
 *
 */
public class ImportSemanticAnalyzer extends BaseSemanticAnalyzer {

    public ImportSemanticAnalyzer(QueryState queryState) throws SemanticException {
        super(queryState);
    }

    // Note that the tableExists flag as used by Auth is kinda a hack and
    // assumes only 1 table will ever be imported - this assumption is broken by
    // REPL LOAD.
    //
    // However, we've not chosen to expand this to a map of tables/etc, since
    // we have expanded how auth works with REPL DUMP / REPL LOAD to simply
    // require ADMIN privileges, rather than checking each object, which
    // quickly becomes untenable, and even more so, costly on memory.
    private boolean tableExists = false;

    public boolean existsTable() {
        return tableExists;
    }

    @Override
    public void analyzeInternal(ASTNode ast) throws SemanticException {
        try {
            Tree fromTree = ast.getChild(0);

            boolean isLocationSet = false;
            boolean isExternalSet = false;
            boolean isPartSpecSet = false;
            String parsedLocation = null;
            String parsedTableName = null;
            String parsedDbName = null;
            LinkedHashMap<String, String> parsedPartSpec = new LinkedHashMap<String, String>();

            // waitOnPrecursor determines whether or not non-existence of
            // a dependent object is an error. For regular imports, it is.
            // for now, the only thing this affects is whether or not the
            // db exists.
            boolean waitOnPrecursor = false;

            for (int i = 1; i < ast.getChildCount(); ++i) {
                ASTNode child = (ASTNode) ast.getChild(i);
                switch (child.getToken().getType()) {
                case HiveParser.KW_EXTERNAL:
                    isExternalSet = true;
                    break;
                case HiveParser.TOK_TABLELOCATION:
                    isLocationSet = true;
                    parsedLocation = EximUtil.relativeToAbsolutePath(conf,
                            unescapeSQLString(child.getChild(0).getText()));
                    break;
                case HiveParser.TOK_TAB:
                    ASTNode tableNameNode = (ASTNode) child.getChild(0);
                    Map.Entry<String, String> dbTablePair = getDbTableNamePair(tableNameNode);
                    parsedDbName = dbTablePair.getKey();
                    parsedTableName = dbTablePair.getValue();
                    // get partition metadata if partition specified
                    if (child.getChildCount() == 2) {
                        ASTNode partspec = (ASTNode) child.getChild(1);
                        isPartSpecSet = true;
                        parsePartitionSpec(child, parsedPartSpec);
                    }
                    break;
                }
            }

            // parsing statement is now done, on to logic.
            tableExists = prepareImport(isLocationSet, isExternalSet, isPartSpecSet, waitOnPrecursor,
                    parsedLocation, parsedTableName, parsedDbName, parsedPartSpec, fromTree.getText(),
                    new EximUtil.SemanticAnalyzerWrapperContext(conf, db, inputs, outputs, rootTasks, LOG, ctx),
                    null, null);

        } catch (SemanticException e) {
            throw e;
        } catch (Exception e) {
            throw new SemanticException(ErrorMsg.IMPORT_SEMANTIC_ERROR.getMsg(), e);
        }
    }

    private void parsePartitionSpec(ASTNode tableNode, LinkedHashMap<String, String> partSpec)
            throws SemanticException {
        // get partition metadata if partition specified
        if (tableNode.getChildCount() == 2) {
            ASTNode partspec = (ASTNode) tableNode.getChild(1);
            // partSpec is a mapping from partition column name to its value.
            for (int j = 0; j < partspec.getChildCount(); ++j) {
                ASTNode partspec_val = (ASTNode) partspec.getChild(j);
                String val = null;
                String colName = unescapeIdentifier(partspec_val.getChild(0).getText().toLowerCase());
                if (partspec_val.getChildCount() < 2) { // DP in the form of T
                    // partition (ds, hr)
                    throw new SemanticException(
                            ErrorMsg.INVALID_PARTITION.getMsg(" - Dynamic partitions not allowed"));
                } else { // in the form of T partition (ds="2010-03-03")
                    val = stripQuotes(partspec_val.getChild(1).getText());
                }
                partSpec.put(colName, val);
            }
        }
    }

    public static boolean prepareImport(boolean isLocationSet, boolean isExternalSet, boolean isPartSpecSet,
            boolean waitOnPrecursor, String parsedLocation, String parsedTableName, String parsedDbName,
            LinkedHashMap<String, String> parsedPartSpec, String fromLocn,
            EximUtil.SemanticAnalyzerWrapperContext x, Map<String, Long> dbsUpdated,
            Map<String, Long> tablesUpdated) throws IOException, MetaException, HiveException, URISyntaxException {

        // initialize load path
        URI fromURI = EximUtil.getValidatedURI(x.getConf(), stripQuotes(fromLocn));
        Path fromPath = new Path(fromURI.getScheme(), fromURI.getAuthority(), fromURI.getPath());

        FileSystem fs = FileSystem.get(fromURI, x.getConf());
        x.getInputs().add(toReadEntity(fromPath, x.getConf()));

        MetaData rv = new MetaData();
        try {
            rv = EximUtil.readMetaData(fs, new Path(fromPath, EximUtil.METADATA_NAME));
        } catch (IOException e) {
            throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(), e);
        }

        ReplicationSpec replicationSpec = rv.getReplicationSpec();
        if (replicationSpec.isNoop()) {
            // nothing to do here, silently return.
            return false;
        }

        String dbname = SessionState.get().getCurrentDatabase();
        if ((parsedDbName != null) && (!parsedDbName.isEmpty())) {
            // If the parsed statement contained a db.tablename specification, prefer that.
            dbname = parsedDbName;
        }
        if (dbsUpdated != null) {
            dbsUpdated.put(dbname, Long.valueOf(replicationSpec.get(ReplicationSpec.KEY.EVENT_ID)));
        }

        // Create table associated with the import
        // Executed if relevant, and used to contain all the other details about the table if not.
        ImportTableDesc tblDesc;
        try {
            tblDesc = getBaseCreateTableDescFromTable(dbname, rv.getTable());
        } catch (Exception e) {
            throw new HiveException(e);
        }

        if ((replicationSpec != null) && replicationSpec.isInReplicationScope()) {
            tblDesc.setReplicationSpec(replicationSpec);
        }

        if (isExternalSet) {
            tblDesc.setExternal(isExternalSet);
            // This condition-check could have been avoided, but to honour the old
            // default of not calling if it wasn't set, we retain that behaviour.
            // TODO:cleanup after verification that the outer if isn't really needed here
        }

        if (isLocationSet) {
            tblDesc.setLocation(parsedLocation);
            x.getInputs().add(toReadEntity(new Path(parsedLocation), x.getConf()));
        }

        if ((parsedTableName != null) && (!parsedTableName.isEmpty())) {
            tblDesc.setTableName(parsedTableName);
        }
        if (tablesUpdated != null) {
            tablesUpdated.put(dbname + "." + tblDesc.getTableName(),
                    Long.valueOf(replicationSpec.get(ReplicationSpec.KEY.EVENT_ID)));
        }

        List<AddPartitionDesc> partitionDescs = new ArrayList<AddPartitionDesc>();
        Iterable<Partition> partitions = rv.getPartitions();
        for (Partition partition : partitions) {
            // TODO: this should ideally not create AddPartitionDesc per partition
            AddPartitionDesc partsDesc = getBaseAddPartitionDescFromPartition(fromPath, dbname, tblDesc, partition);
            partitionDescs.add(partsDesc);
        }

        if (isPartSpecSet) {
            // The import specification asked for only a particular partition to be loaded
            // We load only that, and ignore all the others.
            boolean found = false;
            for (Iterator<AddPartitionDesc> partnIter = partitionDescs.listIterator(); partnIter.hasNext();) {
                AddPartitionDesc addPartitionDesc = partnIter.next();
                if (!found && addPartitionDesc.getPartition(0).getPartSpec().equals(parsedPartSpec)) {
                    found = true;
                } else {
                    partnIter.remove();
                }
            }
            if (!found) {
                throw new SemanticException(
                        ErrorMsg.INVALID_PARTITION.getMsg(" - Specified partition not found in import directory"));
            }
        }

        if (tblDesc.getTableName() == null) {
            // Either we got the tablename from the IMPORT statement (first priority)
            // or from the export dump.
            throw new SemanticException(ErrorMsg.NEED_TABLE_SPECIFICATION.getMsg());
        } else {
            x.getConf().set("import.destination.table", tblDesc.getTableName());
            for (AddPartitionDesc addPartitionDesc : partitionDescs) {
                addPartitionDesc.setTableName(tblDesc.getTableName());
            }
        }

        Warehouse wh = new Warehouse(x.getConf());
        Table table = tableIfExists(tblDesc, x.getHive());
        boolean tableExists = false;

        if (table != null) {
            checkTable(table, tblDesc, replicationSpec, x.getConf());
            x.getLOG().debug("table " + tblDesc.getTableName() + " exists: metadata checked");
            tableExists = true;
        }

        if (!replicationSpec.isInReplicationScope()) {
            createRegularImportTasks(tblDesc, partitionDescs, isPartSpecSet, replicationSpec, table, fromURI, fs,
                    wh, x);
        } else {
            createReplImportTasks(tblDesc, partitionDescs, isPartSpecSet, replicationSpec, waitOnPrecursor, table,
                    fromURI, fs, wh, x);
        }
        return tableExists;
    }

    private static AddPartitionDesc getBaseAddPartitionDescFromPartition(Path fromPath, String dbname,
            ImportTableDesc tblDesc, Partition partition) throws MetaException, SemanticException {
        AddPartitionDesc partsDesc = new AddPartitionDesc(dbname, tblDesc.getTableName(),
                EximUtil.makePartSpec(tblDesc.getPartCols(), partition.getValues()),
                partition.getSd().getLocation(), partition.getParameters());
        AddPartitionDesc.OnePartitionDesc partDesc = partsDesc.getPartition(0);
        partDesc.setInputFormat(partition.getSd().getInputFormat());
        partDesc.setOutputFormat(partition.getSd().getOutputFormat());
        partDesc.setNumBuckets(partition.getSd().getNumBuckets());
        partDesc.setCols(partition.getSd().getCols());
        partDesc.setSerializationLib(partition.getSd().getSerdeInfo().getSerializationLib());
        partDesc.setSerdeParams(partition.getSd().getSerdeInfo().getParameters());
        partDesc.setBucketCols(partition.getSd().getBucketCols());
        partDesc.setSortCols(partition.getSd().getSortCols());
        partDesc.setLocation(
                new Path(fromPath, Warehouse.makePartName(tblDesc.getPartCols(), partition.getValues()))
                        .toString());
        return partsDesc;
    }

    private static ImportTableDesc getBaseCreateTableDescFromTable(String dbName,
            org.apache.hadoop.hive.metastore.api.Table tblObj) throws Exception {
        Table table = new Table(tblObj);
        ImportTableDesc tblDesc = new ImportTableDesc(dbName, table);
        return tblDesc;
    }

    private static Task<?> loadTable(URI fromURI, Table table, boolean replace, Path tgtPath,
            ReplicationSpec replicationSpec, EximUtil.SemanticAnalyzerWrapperContext x) {
        Path dataPath = new Path(fromURI.toString(), EximUtil.DATA_PATH_NAME);
        Path tmpPath = x.getCtx().getExternalTmpPath(tgtPath);
        Task<?> copyTask = ReplCopyTask.getLoadCopyTask(replicationSpec, dataPath, tmpPath, x.getConf());
        LoadTableDesc loadTableWork = new LoadTableDesc(tmpPath, Utilities.getTableDesc(table),
                new TreeMap<String, String>(), replace);
        Task<?> loadTableTask = TaskFactory
                .get(new MoveWork(x.getInputs(), x.getOutputs(), loadTableWork, null, false), x.getConf());
        copyTask.addDependentTask(loadTableTask);
        x.getTasks().add(copyTask);
        return loadTableTask;
    }

    private static Task<?> createTableTask(ImportTableDesc tableDesc, EximUtil.SemanticAnalyzerWrapperContext x) {
        return tableDesc.getCreateTableTask(x);
    }

    private static Task<?> dropTableTask(Table table, EximUtil.SemanticAnalyzerWrapperContext x) {
        return TaskFactory.get(new DDLWork(x.getInputs(), x.getOutputs(),
                new DropTableDesc(table.getTableName(), null, true, true, null)), x.getConf());
    }

    private static Task<? extends Serializable> alterTableTask(ImportTableDesc tableDesc,
            EximUtil.SemanticAnalyzerWrapperContext x, ReplicationSpec replicationSpec) {
        tableDesc.setReplaceMode(true);
        if ((replicationSpec != null) && (replicationSpec.isInReplicationScope())) {
            tableDesc.setReplicationSpec(replicationSpec);
        }
        return tableDesc.getCreateTableTask(x);
    }

    private static Task<? extends Serializable> alterSinglePartition(URI fromURI, FileSystem fs,
            ImportTableDesc tblDesc, Table table, Warehouse wh, AddPartitionDesc addPartitionDesc,
            ReplicationSpec replicationSpec, org.apache.hadoop.hive.ql.metadata.Partition ptn,
            EximUtil.SemanticAnalyzerWrapperContext x) {
        addPartitionDesc.setReplaceMode(true);
        if ((replicationSpec != null) && (replicationSpec.isInReplicationScope())) {
            addPartitionDesc.setReplicationSpec(replicationSpec);
        }
        addPartitionDesc.getPartition(0).setLocation(ptn.getLocation()); // use existing location
        return TaskFactory.get(new DDLWork(x.getInputs(), x.getOutputs(), addPartitionDesc), x.getConf());
    }

    private static Task<?> addSinglePartition(URI fromURI, FileSystem fs, ImportTableDesc tblDesc, Table table,
            Warehouse wh, AddPartitionDesc addPartitionDesc, ReplicationSpec replicationSpec,
            EximUtil.SemanticAnalyzerWrapperContext x) throws MetaException, IOException, HiveException {
        AddPartitionDesc.OnePartitionDesc partSpec = addPartitionDesc.getPartition(0);
        if (tblDesc.isExternal() && tblDesc.getLocation() == null) {
            x.getLOG().debug(
                    "Importing in-place: adding AddPart for partition " + partSpecToString(partSpec.getPartSpec()));
            // addPartitionDesc already has the right partition location
            Task<?> addPartTask = TaskFactory.get(new DDLWork(x.getInputs(), x.getOutputs(), addPartitionDesc),
                    x.getConf());
            return addPartTask;
        } else {
            String srcLocation = partSpec.getLocation();
            fixLocationInPartSpec(fs, tblDesc, table, wh, replicationSpec, partSpec, x);
            x.getLOG().debug("adding dependent CopyWork/AddPart/MoveWork for partition "
                    + partSpecToString(partSpec.getPartSpec()) + " with source location: " + srcLocation);
            Path tgtLocation = new Path(partSpec.getLocation());
            Path tmpPath = x.getCtx().getExternalTmpPath(tgtLocation);
            Task<?> copyTask = ReplCopyTask.getLoadCopyTask(replicationSpec, new Path(srcLocation), tmpPath,
                    x.getConf());
            Task<?> addPartTask = TaskFactory.get(new DDLWork(x.getInputs(), x.getOutputs(), addPartitionDesc),
                    x.getConf());
            LoadTableDesc loadTableWork = new LoadTableDesc(tmpPath, Utilities.getTableDesc(table),
                    partSpec.getPartSpec(), replicationSpec.isReplace());
            loadTableWork.setInheritTableSpecs(false);
            Task<?> loadPartTask = TaskFactory
                    .get(new MoveWork(x.getInputs(), x.getOutputs(), loadTableWork, null, false), x.getConf());
            copyTask.addDependentTask(loadPartTask);
            addPartTask.addDependentTask(loadPartTask);
            x.getTasks().add(copyTask);
            return addPartTask;
        }
    }

    /**
     * Helper method to set location properly in partSpec
     */
    private static void fixLocationInPartSpec(FileSystem fs, ImportTableDesc tblDesc, Table table, Warehouse wh,
            ReplicationSpec replicationSpec, AddPartitionDesc.OnePartitionDesc partSpec,
            EximUtil.SemanticAnalyzerWrapperContext x) throws MetaException, HiveException, IOException {
        Path tgtPath = null;
        if (tblDesc.getLocation() == null) {
            if (table.getDataLocation() != null) {
                tgtPath = new Path(table.getDataLocation().toString(),
                        Warehouse.makePartPath(partSpec.getPartSpec()));
            } else {
                Database parentDb = x.getHive().getDatabase(tblDesc.getDatabaseName());
                tgtPath = new Path(wh.getDefaultTablePath(parentDb, tblDesc.getTableName()),
                        Warehouse.makePartPath(partSpec.getPartSpec()));
            }
        } else {
            tgtPath = new Path(tblDesc.getLocation(), Warehouse.makePartPath(partSpec.getPartSpec()));
        }
        FileSystem tgtFs = FileSystem.get(tgtPath.toUri(), x.getConf());
        checkTargetLocationEmpty(tgtFs, tgtPath, replicationSpec, x);
        partSpec.setLocation(tgtPath.toString());
    }

    private static void checkTargetLocationEmpty(FileSystem fs, Path targetPath, ReplicationSpec replicationSpec,
            EximUtil.SemanticAnalyzerWrapperContext x) throws IOException, SemanticException {
        if (replicationSpec.isInReplicationScope()) {
            // replication scope allows replacement, and does not require empty directories
            return;
        }
        x.getLOG().debug("checking emptiness of " + targetPath.toString());
        if (fs.exists(targetPath)) {
            FileStatus[] status = fs.listStatus(targetPath, FileUtils.HIDDEN_FILES_PATH_FILTER);
            if (status.length > 0) {
                x.getLOG().debug("Files inc. " + status[0].getPath().toString() + " found in path : "
                        + targetPath.toString());
                throw new SemanticException(ErrorMsg.TABLE_DATA_EXISTS.getMsg());
            }
        }
    }

    private static String partSpecToString(Map<String, String> partSpec) {
        StringBuilder sb = new StringBuilder();
        boolean firstTime = true;
        for (Map.Entry<String, String> entry : partSpec.entrySet()) {
            if (!firstTime) {
                sb.append(',');
            }
            firstTime = false;
            sb.append(entry.getKey());
            sb.append('=');
            sb.append(entry.getValue());
        }
        return sb.toString();
    }

    private static void checkTable(Table table, ImportTableDesc tableDesc, ReplicationSpec replicationSpec,
            HiveConf conf) throws SemanticException, URISyntaxException {
        // This method gets called only in the scope that a destination table already exists, so
        // we're validating if the table is an appropriate destination to import into

        if (replicationSpec.isInReplicationScope()) {
            // If this import is being done for replication, then this will be a managed table, and replacements
            // are allowed irrespective of what the table currently looks like. So no more checks are necessary.
            return;
        } else {
            // verify if table has been the target of replication, and if so, check HiveConf if we're allowed
            // to override. If not, fail.
            if (table.getParameters().containsKey(ReplicationSpec.KEY.CURR_STATE_ID.toString())
                    && conf.getBoolVar(HiveConf.ConfVars.HIVE_EXIM_RESTRICT_IMPORTS_INTO_REPLICATED_TABLES)) {
                throw new SemanticException(ErrorMsg.IMPORT_INTO_STRICT_REPL_TABLE
                        .getMsg("Table " + table.getTableName() + " has repl.last.id parameter set."));
            }
        }

        // Next, we verify that the destination table is not offline, or a non-native table
        EximUtil.validateTable(table);

        // If the import statement specified that we're importing to an external
        // table, we seem to be doing the following:
        //    a) We don't allow replacement in an unpartitioned pre-existing table
        //    b) We don't allow replacement in a partitioned pre-existing table where that table is external
        // TODO : Does this simply mean we don't allow replacement in external tables if they already exist?
        //    If so(i.e. the check is superfluous and wrong), this can be a simpler check. If not, then
        //    what we seem to be saying is that the only case we allow is to allow an IMPORT into an EXTERNAL
        //    table in the statement, if a destination partitioned table exists, so long as it is actually
        //    not external itself. Is that the case? Why?
        {
            if ((tableDesc.isExternal()) // IMPORT statement speicified EXTERNAL
                    && (!table.isPartitioned() || !table.getTableType().equals(TableType.EXTERNAL_TABLE))) {
                throw new SemanticException(ErrorMsg.INCOMPATIBLE_SCHEMA
                        .getMsg(" External table cannot overwrite existing table. Drop existing table first."));
            }
        }

        // If a table import statement specified a location and the table(unpartitioned)
        // already exists, ensure that the locations are the same.
        // Partitioned tables not checked here, since the location provided would need
        // checking against the partition in question instead.
        {
            if ((tableDesc.getLocation() != null) && (!table.isPartitioned())
                    && (!table.getDataLocation().equals(new Path(tableDesc.getLocation())))) {
                throw new SemanticException(ErrorMsg.INCOMPATIBLE_SCHEMA.getMsg(" Location does not match"));

            }
        }
        {
            // check column order and types
            List<FieldSchema> existingTableCols = table.getCols();
            List<FieldSchema> importedTableCols = tableDesc.getCols();
            if (!EximUtil.schemaCompare(importedTableCols, existingTableCols)) {
                throw new SemanticException(ErrorMsg.INCOMPATIBLE_SCHEMA.getMsg(" Column Schema does not match"));
            }
        }
        {
            // check partitioning column order and types
            List<FieldSchema> existingTablePartCols = table.getPartCols();
            List<FieldSchema> importedTablePartCols = tableDesc.getPartCols();
            if (!EximUtil.schemaCompare(importedTablePartCols, existingTablePartCols)) {
                throw new SemanticException(
                        ErrorMsg.INCOMPATIBLE_SCHEMA.getMsg(" Partition Schema does not match"));
            }
        }
        {
            // check table params
            Map<String, String> existingTableParams = table.getParameters();
            Map<String, String> importedTableParams = tableDesc.getTblProps();
            String error = checkParams(existingTableParams, importedTableParams,
                    new String[] { "howl.isd", "howl.osd" });
            if (error != null) {
                throw new SemanticException(
                        ErrorMsg.INCOMPATIBLE_SCHEMA.getMsg(" Table parameters do not match: " + error));
            }
        }
        {
            // check IF/OF/Serde
            String existingifc = table.getInputFormatClass().getName();
            String importedifc = tableDesc.getInputFormat();
            String existingofc = table.getOutputFormatClass().getName();
            String importedofc = tableDesc.getOutputFormat();
            /*
             * substitute OutputFormat name based on HiveFileFormatUtils.outputFormatSubstituteMap
             */
            try {
                Class<?> origin = Class.forName(importedofc, true, Utilities.getSessionSpecifiedClassLoader());
                Class<? extends OutputFormat> replaced = HiveFileFormatUtils.getOutputFormatSubstitute(origin);
                if (replaced == null) {
                    throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg());
                }
                importedofc = replaced.getCanonicalName();
            } catch (Exception e) {
                throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg());
            }
            if ((!existingifc.equals(importedifc)) || (!existingofc.equals(importedofc))) {
                throw new SemanticException(
                        ErrorMsg.INCOMPATIBLE_SCHEMA.getMsg(" Table inputformat/outputformats do not match"));
            }
            String existingSerde = table.getSerializationLib();
            String importedSerde = tableDesc.getSerName();
            if (!existingSerde.equals(importedSerde)) {
                throw new SemanticException(
                        ErrorMsg.INCOMPATIBLE_SCHEMA.getMsg(" Table Serde class does not match"));
            }
            String existingSerdeFormat = table.getSerdeParam(serdeConstants.SERIALIZATION_FORMAT);
            String importedSerdeFormat = tableDesc.getSerdeProps().get(serdeConstants.SERIALIZATION_FORMAT);
            /*
             * If Imported SerdeFormat is null, then set it to "1" just as
             * metadata.Table.getEmptyTable
             */
            importedSerdeFormat = importedSerdeFormat == null ? "1" : importedSerdeFormat;
            if (!ObjectUtils.equals(existingSerdeFormat, importedSerdeFormat)) {
                throw new SemanticException(
                        ErrorMsg.INCOMPATIBLE_SCHEMA.getMsg(" Table Serde format does not match"));
            }
        }
        {
            // check bucket/sort cols
            if (!ObjectUtils.equals(table.getBucketCols(), tableDesc.getBucketCols())) {
                throw new SemanticException(
                        ErrorMsg.INCOMPATIBLE_SCHEMA.getMsg(" Table bucketing spec does not match"));
            }
            List<Order> existingOrder = table.getSortCols();
            List<Order> importedOrder = tableDesc.getSortCols();
            // safely sorting
            final class OrderComparator implements Comparator<Order> {
                @Override
                public int compare(Order o1, Order o2) {
                    if (o1.getOrder() < o2.getOrder()) {
                        return -1;
                    } else {
                        if (o1.getOrder() == o2.getOrder()) {
                            return 0;
                        } else {
                            return 1;
                        }
                    }
                }
            }
            if (existingOrder != null) {
                if (importedOrder != null) {
                    Collections.sort(existingOrder, new OrderComparator());
                    Collections.sort(importedOrder, new OrderComparator());
                    if (!existingOrder.equals(importedOrder)) {
                        throw new SemanticException(
                                ErrorMsg.INCOMPATIBLE_SCHEMA.getMsg(" Table sorting spec does not match"));
                    }
                }
            } else {
                if (importedOrder != null) {
                    throw new SemanticException(
                            ErrorMsg.INCOMPATIBLE_SCHEMA.getMsg(" Table sorting spec does not match"));
                }
            }
        }
    }

    private static String checkParams(Map<String, String> map1, Map<String, String> map2, String[] keys) {
        if (map1 != null) {
            if (map2 != null) {
                for (String key : keys) {
                    String v1 = map1.get(key);
                    String v2 = map2.get(key);
                    if (!ObjectUtils.equals(v1, v2)) {
                        return "Mismatch for " + key;
                    }
                }
            } else {
                for (String key : keys) {
                    if (map1.get(key) != null) {
                        return "Mismatch for " + key;
                    }
                }
            }
        } else {
            if (map2 != null) {
                for (String key : keys) {
                    if (map2.get(key) != null) {
                        return "Mismatch for " + key;
                    }
                }
            }
        }
        return null;
    }

    /**
     * Create tasks for regular import, no repl complexity
     * @param tblDesc
     * @param partitionDescs
     * @param isPartSpecSet
     * @param replicationSpec
     * @param table
     * @param fromURI
     * @param fs
     * @param wh
     */
    private static void createRegularImportTasks(ImportTableDesc tblDesc, List<AddPartitionDesc> partitionDescs,
            boolean isPartSpecSet, ReplicationSpec replicationSpec, Table table, URI fromURI, FileSystem fs,
            Warehouse wh, EximUtil.SemanticAnalyzerWrapperContext x)
            throws HiveException, URISyntaxException, IOException, MetaException {

        if (table != null) {
            if (table.isPartitioned()) {
                x.getLOG().debug("table partitioned");

                for (AddPartitionDesc addPartitionDesc : partitionDescs) {
                    Map<String, String> partSpec = addPartitionDesc.getPartition(0).getPartSpec();
                    org.apache.hadoop.hive.ql.metadata.Partition ptn = null;
                    if ((ptn = x.getHive().getPartition(table, partSpec, false)) == null) {
                        x.getTasks().add(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc,
                                replicationSpec, x));
                    } else {
                        throw new SemanticException(ErrorMsg.PARTITION_EXISTS.getMsg(partSpecToString(partSpec)));
                    }
                }

            } else {
                x.getLOG().debug("table non-partitioned");
                // ensure if destination is not empty only for regular import
                Path tgtPath = new Path(table.getDataLocation().toString());
                FileSystem tgtFs = FileSystem.get(tgtPath.toUri(), x.getConf());
                checkTargetLocationEmpty(tgtFs, tgtPath, replicationSpec, x);
                loadTable(fromURI, table, false, tgtPath, replicationSpec, x);
            }
            // Set this to read because we can't overwrite any existing partitions
            x.getOutputs().add(new WriteEntity(table, WriteEntity.WriteType.DDL_NO_LOCK));
        } else {
            x.getLOG().debug("table " + tblDesc.getTableName() + " does not exist");

            Task<?> t = createTableTask(tblDesc, x);
            table = new Table(tblDesc.getDatabaseName(), tblDesc.getTableName());
            Database parentDb = x.getHive().getDatabase(tblDesc.getDatabaseName());

            // Since we are going to be creating a new table in a db, we should mark that db as a write entity
            // so that the auth framework can go to work there.
            x.getOutputs().add(new WriteEntity(parentDb, WriteEntity.WriteType.DDL_SHARED));

            if (isPartitioned(tblDesc)) {
                for (AddPartitionDesc addPartitionDesc : partitionDescs) {
                    t.addDependentTask(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc,
                            replicationSpec, x));
                }
            } else {
                x.getLOG().debug("adding dependent CopyWork/MoveWork for table");
                if (tblDesc.isExternal() && (tblDesc.getLocation() == null)) {
                    x.getLOG().debug("Importing in place, no emptiness check, no copying/loading");
                    Path dataPath = new Path(fromURI.toString(), EximUtil.DATA_PATH_NAME);
                    tblDesc.setLocation(dataPath.toString());
                } else {
                    Path tablePath = null;
                    if (tblDesc.getLocation() != null) {
                        tablePath = new Path(tblDesc.getLocation());
                    } else {
                        tablePath = wh.getDefaultTablePath(parentDb, tblDesc.getTableName());
                    }
                    FileSystem tgtFs = FileSystem.get(tablePath.toUri(), x.getConf());
                    checkTargetLocationEmpty(tgtFs, tablePath, replicationSpec, x);
                    t.addDependentTask(loadTable(fromURI, table, false, tablePath, replicationSpec, x));
                }
            }
            x.getTasks().add(t);
        }
    }

    /**
     * Create tasks for repl import
     */
    private static void createReplImportTasks(ImportTableDesc tblDesc, List<AddPartitionDesc> partitionDescs,
            boolean isPartSpecSet, ReplicationSpec replicationSpec, boolean waitOnPrecursor, Table table,
            URI fromURI, FileSystem fs, Warehouse wh, EximUtil.SemanticAnalyzerWrapperContext x)
            throws HiveException, URISyntaxException, IOException, MetaException {

        Task dr = null;
        WriteEntity.WriteType lockType = WriteEntity.WriteType.DDL_NO_LOCK;

        // Normally, on import, trying to create a table or a partition in a db that does not yet exist
        // is a error condition. However, in the case of a REPL LOAD, it is possible that we are trying
        // to create tasks to create a table inside a db that as-of-now does not exist, but there is
        // a precursor Task waiting that will create it before this is encountered. Thus, we instantiate
        // defaults and do not error out in that case.
        Database parentDb = x.getHive().getDatabase(tblDesc.getDatabaseName());
        if (parentDb == null) {
            if (!waitOnPrecursor) {
                throw new SemanticException(ErrorMsg.DATABASE_NOT_EXISTS.getMsg(tblDesc.getDatabaseName()));
            }
        }

        if (table != null) {
            if (!replicationSpec.allowReplacementInto(table.getParameters())) {
                // If the target table exists and is newer or same as current update based on repl.last.id, then just noop it.
                return;
            }
        } else {
            // If table doesn't exist, allow creating a new one only if the database state is older than the update.
            if ((parentDb != null) && (!replicationSpec.allowReplacementInto(parentDb.getParameters()))) {
                // If the target table exists and is newer or same as current update based on repl.last.id, then just noop it.
                return;
            }
        }

        if (tblDesc.getLocation() == null) {
            if (!waitOnPrecursor) {
                tblDesc.setLocation(wh.getDefaultTablePath(parentDb, tblDesc.getTableName()).toString());
            } else {
                tblDesc.setLocation(wh.getDnsPath(new Path(wh.getDefaultDatabasePath(tblDesc.getDatabaseName()),
                        MetaStoreUtils.encodeTableName(tblDesc.getTableName().toLowerCase()))).toString());

            }
        }

        /* Note: In the following section, Metadata-only import handling logic is
           interleaved with regular repl-import logic. The rule of thumb being
           followed here is that MD-only imports are essentially ALTERs. They do
           not load data, and should not be "creating" any metadata - they should
           be replacing instead. The only place it makes sense for a MD-only import
           to create is in the case of a table that's been dropped and recreated,
           or in the case of an unpartitioned table. In all other cases, it should
           behave like a noop or a pure MD alter.
        */
        if (table == null) {
            // Either we're dropping and re-creating, or the table didn't exist, and we're creating.

            if (lockType == WriteEntity.WriteType.DDL_NO_LOCK) {
                lockType = WriteEntity.WriteType.DDL_SHARED;
            }

            Task t = createTableTask(tblDesc, x);
            table = new Table(tblDesc.getDatabaseName(), tblDesc.getTableName());

            if (!replicationSpec.isMetadataOnly()) {
                if (isPartitioned(tblDesc)) {
                    for (AddPartitionDesc addPartitionDesc : partitionDescs) {
                        addPartitionDesc.setReplicationSpec(replicationSpec);
                        t.addDependentTask(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc,
                                replicationSpec, x));
                    }
                } else {
                    x.getLOG().debug("adding dependent CopyWork/MoveWork for table");
                    t.addDependentTask(
                            loadTable(fromURI, table, true, new Path(tblDesc.getLocation()), replicationSpec, x));
                }
            }
            if (dr == null) {
                // Simply create
                x.getTasks().add(t);
            } else {
                // Drop and recreate
                dr.addDependentTask(t);
                x.getTasks().add(dr);
            }
        } else {
            // Table existed, and is okay to replicate into, not dropping and re-creating.
            if (table.isPartitioned()) {
                x.getLOG().debug("table partitioned");
                for (AddPartitionDesc addPartitionDesc : partitionDescs) {
                    addPartitionDesc.setReplicationSpec(replicationSpec);
                    Map<String, String> partSpec = addPartitionDesc.getPartition(0).getPartSpec();
                    org.apache.hadoop.hive.ql.metadata.Partition ptn = null;

                    if ((ptn = x.getHive().getPartition(table, partSpec, false)) == null) {
                        if (!replicationSpec.isMetadataOnly()) {
                            x.getTasks().add(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc,
                                    replicationSpec, x));
                        }
                    } else {
                        // If replicating, then the partition already existing means we need to replace, maybe, if
                        // the destination ptn's repl.last.id is older than the replacement's.
                        if (replicationSpec.allowReplacementInto(ptn.getParameters())) {
                            if (!replicationSpec.isMetadataOnly()) {
                                x.getTasks().add(addSinglePartition(fromURI, fs, tblDesc, table, wh,
                                        addPartitionDesc, replicationSpec, x));
                            } else {
                                x.getTasks().add(alterSinglePartition(fromURI, fs, tblDesc, table, wh,
                                        addPartitionDesc, replicationSpec, ptn, x));
                            }
                            if (lockType == WriteEntity.WriteType.DDL_NO_LOCK) {
                                lockType = WriteEntity.WriteType.DDL_SHARED;
                            }
                        } else {
                            // ignore this ptn, do nothing, not an error.
                        }
                    }

                }
                if (replicationSpec.isMetadataOnly() && partitionDescs.isEmpty()) {
                    // MD-ONLY table alter
                    x.getTasks().add(alterTableTask(tblDesc, x, replicationSpec));
                    if (lockType == WriteEntity.WriteType.DDL_NO_LOCK) {
                        lockType = WriteEntity.WriteType.DDL_SHARED;
                    }
                }
            } else {
                x.getLOG().debug("table non-partitioned");
                if (!replicationSpec.allowReplacementInto(table.getParameters())) {
                    return; // silently return, table is newer than our replacement.
                }
                if (!replicationSpec.isMetadataOnly()) {
                    // repl-imports are replace-into unless the event is insert-into
                    loadTable(fromURI, table, replicationSpec.isReplace(), new Path(fromURI), replicationSpec, x);
                } else {
                    x.getTasks().add(alterTableTask(tblDesc, x, replicationSpec));
                }
                if (lockType == WriteEntity.WriteType.DDL_NO_LOCK) {
                    lockType = WriteEntity.WriteType.DDL_SHARED;
                }
            }
        }
        x.getOutputs().add(new WriteEntity(table, lockType));

    }

    private static boolean isPartitioned(ImportTableDesc tblDesc) {
        return !(tblDesc.getPartCols() == null || tblDesc.getPartCols().isEmpty());
    }

    /**
     * Utility method that returns a table if one corresponding to the destination
     * tblDesc is found. Returns null if no such table is found.
     */
    private static Table tableIfExists(ImportTableDesc tblDesc, Hive db) throws HiveException {
        try {
            return db.getTable(tblDesc.getDatabaseName(), tblDesc.getTableName());
        } catch (InvalidTableException e) {
            return null;
        }
    }

}