org.apache.hive.hcatalog.mapreduce.FileOutputCommitterContainer.java Source code

Introduction

Here is the source code for org.apache.hive.hcatalog.mapreduce.FileOutputCommitterContainer.java
Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.hive.hcatalog.mapreduce;

import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.JobStatus.State;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hive.hcatalog.common.ErrorType;
import org.apache.hive.hcatalog.common.HCatConstants;
import org.apache.hive.hcatalog.common.HCatException;
import org.apache.hive.hcatalog.common.HCatUtil;
import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
import org.apache.hive.hcatalog.data.schema.HCatSchema;
import org.apache.hive.hcatalog.data.schema.HCatSchemaUtils;
import org.apache.hive.hcatalog.har.HarOutputCommitterPostProcessor;
import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Part of the FileOutput*Container classes
 * See {@link FileOutputFormatContainer} for more information
 */
class FileOutputCommitterContainer extends OutputCommitterContainer {

    private static final String TEMP_DIR_NAME = "_temporary";
    private static final String LOGS_DIR_NAME = "_logs";

    static final String DYNTEMP_DIR_NAME = "_DYN";
    static final String SCRATCH_DIR_NAME = "_SCRATCH";
    private static final String APPEND_SUFFIX = "_a_";
    private static final int APPEND_COUNTER_WARN_THRESHOLD = 1000;
    private final int maxAppendAttempts;

    private static final Logger LOG = LoggerFactory.getLogger(FileOutputCommitterContainer.class);
    private final boolean dynamicPartitioningUsed;
    private boolean partitionsDiscovered;
    private final boolean customDynamicLocationUsed;

    private Map<String, Map<String, String>> partitionsDiscoveredByPath;
    private Map<String, JobContext> contextDiscoveredByPath;
    private final HiveStorageHandler cachedStorageHandler;

    HarOutputCommitterPostProcessor harProcessor = new HarOutputCommitterPostProcessor();

    private String ptnRootLocation = null;

    private OutputJobInfo jobInfo = null;

    /**
     * @param context current JobContext
     * @param baseCommitter OutputCommitter to contain
     * @throws IOException
     */
    public FileOutputCommitterContainer(JobContext context, org.apache.hadoop.mapred.OutputCommitter baseCommitter)
            throws IOException {
        super(context, baseCommitter);
        jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration());
        dynamicPartitioningUsed = jobInfo.isDynamicPartitioningUsed();

        this.partitionsDiscovered = !dynamicPartitioningUsed;
        cachedStorageHandler = HCatUtil.getStorageHandler(context.getConfiguration(),
                jobInfo.getTableInfo().getStorerInfo());
        Table table = new Table(jobInfo.getTableInfo().getTable());
        if (dynamicPartitioningUsed && Boolean.parseBoolean((String) table.getProperty("EXTERNAL"))
                && jobInfo.getCustomDynamicPath() != null && jobInfo.getCustomDynamicPath().length() > 0) {
            customDynamicLocationUsed = true;
        } else {
            customDynamicLocationUsed = false;
        }

        this.maxAppendAttempts = context.getConfiguration().getInt(HCatConstants.HCAT_APPEND_LIMIT,
                APPEND_COUNTER_WARN_THRESHOLD);
    }

    @Override
    public void abortTask(TaskAttemptContext context) throws IOException {
        if (!dynamicPartitioningUsed) {
            FileOutputFormatContainer.setWorkOutputPath(context);
            getBaseOutputCommitter().abortTask(HCatMapRedUtil.createTaskAttemptContext(context));
        } else {
            try {
                TaskCommitContextRegistry.getInstance().abortTask(context);
            } finally {
                TaskCommitContextRegistry.getInstance().discardCleanupFor(context);
            }
        }
    }

    @Override
    public void commitTask(TaskAttemptContext context) throws IOException {
        if (!dynamicPartitioningUsed) {
            //See HCATALOG-499
            FileOutputFormatContainer.setWorkOutputPath(context);
            getBaseOutputCommitter().commitTask(HCatMapRedUtil.createTaskAttemptContext(context));
        } else {
            try {
                TaskCommitContextRegistry.getInstance().commitTask(context);
            } finally {
                TaskCommitContextRegistry.getInstance().discardCleanupFor(context);
            }
        }
    }

    @Override
    public boolean needsTaskCommit(TaskAttemptContext context) throws IOException {
        if (!dynamicPartitioningUsed) {
            FileOutputFormatContainer.setWorkOutputPath(context);
            return getBaseOutputCommitter().needsTaskCommit(HCatMapRedUtil.createTaskAttemptContext(context));
        } else {
            // called explicitly through FileRecordWriterContainer.close() if dynamic - return false by default
            return true;
        }
    }

    @Override
    public void setupJob(JobContext context) throws IOException {
        if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) {
            getBaseOutputCommitter().setupJob(HCatMapRedUtil.createJobContext(context));
        }
        // in dynamic usecase, called through FileRecordWriterContainer
    }

    @Override
    public void setupTask(TaskAttemptContext context) throws IOException {
        if (!dynamicPartitioningUsed) {
            getBaseOutputCommitter().setupTask(HCatMapRedUtil.createTaskAttemptContext(context));
        }
    }

    @Override
    public void abortJob(JobContext jobContext, State state) throws IOException {
        try {
            if (dynamicPartitioningUsed) {
                discoverPartitions(jobContext);
            }
            org.apache.hadoop.mapred.JobContext mapRedJobContext = HCatMapRedUtil.createJobContext(jobContext);
            if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) {
                getBaseOutputCommitter().abortJob(mapRedJobContext, state);
            } else if (dynamicPartitioningUsed) {
                for (JobContext currContext : contextDiscoveredByPath.values()) {
                    try {
                        new JobConf(currContext.getConfiguration()).getOutputCommitter().abortJob(currContext,
                                state);
                    } catch (Exception e) {
                        throw new IOException(e);
                    }
                }
            }
            Path src;
            OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(jobContext.getConfiguration());
            Path tblPath = new Path(jobInfo.getTableInfo().getTableLocation());
            if (dynamicPartitioningUsed) {
                if (!customDynamicLocationUsed) {
                    src = new Path(getPartitionRootLocation(jobInfo.getLocation(),
                            jobInfo.getTableInfo().getTable().getPartitionKeysSize()));
                } else {
                    src = new Path(getCustomPartitionRootLocation(jobInfo, jobContext.getConfiguration()));
                }
            } else {
                src = new Path(jobInfo.getLocation());
            }
            FileSystem fs = src.getFileSystem(jobContext.getConfiguration());
            // Note fs.delete will fail on Windows. The reason is in OutputCommitter,
            // Hadoop is still writing to _logs/history. On Linux, OS don't care file is still
            // open and remove the directory anyway, but on Windows, OS refuse to remove a
            // directory containing open files. So on Windows, we will leave output directory
            // behind when job fail. User needs to remove the output directory manually
            LOG.info("Job failed. Try cleaning up temporary directory [{}].", src);
            if (!src.equals(tblPath)) {
                fs.delete(src, true);
            }
        } finally {
            cancelDelegationTokens(jobContext);
        }
    }

    public static final String SUCCEEDED_FILE_NAME = "_SUCCESS";
    static final String SUCCESSFUL_JOB_OUTPUT_DIR_MARKER = "mapreduce.fileoutputcommitter.marksuccessfuljobs";

    private static boolean getOutputDirMarking(Configuration conf) {
        return conf.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, false);
    }

    @Override
    public void commitJob(JobContext jobContext) throws IOException {
        if (dynamicPartitioningUsed) {
            discoverPartitions(jobContext);
            // Commit each partition so it gets moved out of the job work
            // dir
            for (JobContext context : contextDiscoveredByPath.values()) {
                new JobConf(context.getConfiguration()).getOutputCommitter().commitJob(context);
            }
        }
        if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) {
            getBaseOutputCommitter().commitJob(HCatMapRedUtil.createJobContext(jobContext));
        }
        registerPartitions(jobContext);
        // create _SUCCESS FILE if so requested.
        OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(jobContext.getConfiguration());
        if (getOutputDirMarking(jobContext.getConfiguration())) {
            Path outputPath = new Path(jobInfo.getLocation());
            FileSystem fileSys = outputPath.getFileSystem(jobContext.getConfiguration());
            // create a file in the folder to mark it
            if (fileSys.exists(outputPath)) {
                Path filePath = new Path(outputPath, SUCCEEDED_FILE_NAME);
                if (!fileSys.exists(filePath)) { // may have been
                    // created by
                    // baseCommitter.commitJob()
                    fileSys.create(filePath).close();
                }
            }
        }

        // Commit has succeeded (since no exceptions have been thrown.)
        // Safe to cancel delegation tokens now.
        cancelDelegationTokens(jobContext);
    }

    @Override
    public void cleanupJob(JobContext context) throws IOException {
        throw new IOException("The method cleanupJob is deprecated and should not be called.");
    }

    private String getCustomPartitionRootLocation(OutputJobInfo jobInfo, Configuration conf) {
        if (ptnRootLocation == null) {
            // we only need to calculate it once, it'll be the same for other partitions in this job.
            String parentPath = jobInfo.getTableInfo().getTableLocation();
            if (jobInfo.getCustomDynamicRoot() != null && jobInfo.getCustomDynamicRoot().length() > 0) {
                parentPath = new Path(parentPath, jobInfo.getCustomDynamicRoot()).toString();
            }
            Path ptnRoot = new Path(parentPath, DYNTEMP_DIR_NAME + conf.get(HCatConstants.HCAT_DYNAMIC_PTN_JOBID));
            ptnRootLocation = ptnRoot.toString();
        }
        return ptnRootLocation;
    }

    private String getPartitionRootLocation(String ptnLocn, int numPtnKeys) {
        if (customDynamicLocationUsed) {
            return null;
        }

        if (ptnRootLocation == null) {
            // we only need to calculate it once, it'll be the same for other partitions in this job.
            Path ptnRoot = new Path(ptnLocn);
            for (int i = 0; i < numPtnKeys; i++) {
                //          LOG.info("Getting parent of "+ptnRoot.getName());
                ptnRoot = ptnRoot.getParent();
            }
            ptnRootLocation = ptnRoot.toString();
        }
        //      LOG.info("Returning final parent : "+ptnRootLocation);
        return ptnRootLocation;
    }

    /**
     * Generate partition metadata object to be used to add to metadata.
     * @param context The job context.
     * @param jobInfo The OutputJobInfo.
     * @param partLocnRoot The table-equivalent location root of the partition
     *                       (temporary dir if dynamic partition, table dir if static)
     * @param dynPartPath The path of dynamic partition which is created
     * @param partKVs The keyvalue pairs that form the partition
     * @param outputSchema The output schema for the partition
     * @param params The parameters to store inside the partition
     * @param table The Table metadata object under which this Partition will reside
     * @param fs FileSystem object to operate on the underlying filesystem
     * @param grpName Group name that owns the table dir
     * @param perms FsPermission that's the default permission of the table dir.
     * @return Constructed Partition metadata object
     * @throws java.io.IOException
     */

    private Partition constructPartition(JobContext context, OutputJobInfo jobInfo, String partLocnRoot,
            String dynPartPath, Map<String, String> partKVs, HCatSchema outputSchema, Map<String, String> params,
            Table table, FileSystem fs, String grpName, FsPermission perms) throws IOException {

        Partition partition = new Partition();
        partition.setDbName(table.getDbName());
        partition.setTableName(table.getTableName());
        partition.setSd(new StorageDescriptor(table.getTTable().getSd()));

        List<FieldSchema> fields = new ArrayList<FieldSchema>();
        for (HCatFieldSchema fieldSchema : outputSchema.getFields()) {
            fields.add(HCatSchemaUtils.getFieldSchema(fieldSchema));
        }

        partition.getSd().setCols(fields);

        partition.setValues(FileOutputFormatContainer.getPartitionValueList(table, partKVs));

        partition.setParameters(params);

        // Sets permissions and group name on partition dirs and files.

        Path partPath;
        if (customDynamicLocationUsed) {
            partPath = new Path(dynPartPath);
        } else if (!dynamicPartitioningUsed && Boolean.parseBoolean((String) table.getProperty("EXTERNAL"))
                && jobInfo.getLocation() != null && jobInfo.getLocation().length() > 0) {
            // Now, we need to de-scratchify this location - i.e., get rid of any
            // _SCRATCH[\d].?[\d]+ from the location.
            String jobLocation = jobInfo.getLocation();
            String finalLocn = jobLocation.replaceAll(Path.SEPARATOR + SCRATCH_DIR_NAME + "\\d\\.?\\d+", "");
            partPath = new Path(finalLocn);
        } else {
            partPath = new Path(partLocnRoot);
            int i = 0;
            for (FieldSchema partKey : table.getPartitionKeys()) {
                if (i++ != 0) {
                    fs.mkdirs(partPath); // Attempt to make the path in case it does not exist before we check
                    applyGroupAndPerms(fs, partPath, perms, grpName, false);
                }
                partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs);
            }
        }

        // Apply the group and permissions to the leaf partition and files.
        // Need not bother in case of HDFS as permission is taken care of by setting UMask
        fs.mkdirs(partPath); // Attempt to make the path in case it does not exist before we check
        if (!ShimLoader.getHadoopShims().getHCatShim().isFileInHDFS(fs, partPath)) {
            applyGroupAndPerms(fs, partPath, perms, grpName, true);
        }

        // Set the location in the StorageDescriptor
        if (dynamicPartitioningUsed) {
            String dynamicPartitionDestination = getFinalDynamicPartitionDestination(table, partKVs, jobInfo);
            if (harProcessor.isEnabled()) {
                harProcessor.exec(context, partition, partPath);
                partition.getSd()
                        .setLocation(harProcessor.getProcessedLocation(new Path(dynamicPartitionDestination)));
            } else {
                partition.getSd().setLocation(dynamicPartitionDestination);
            }
        } else {
            partition.getSd().setLocation(partPath.toString());
        }
        return partition;
    }

    private void applyGroupAndPerms(FileSystem fs, Path dir, FsPermission permission, String group,
            boolean recursive) throws IOException {
        if (LOG.isDebugEnabled()) {
            LOG.debug("applyGroupAndPerms : " + dir + " perms: " + permission + " group: " + group + " recursive: "
                    + recursive);
        }
        fs.setPermission(dir, permission);
        if (recursive) {
            for (FileStatus fileStatus : fs.listStatus(dir)) {
                if (fileStatus.isDir()) {
                    applyGroupAndPerms(fs, fileStatus.getPath(), permission, group, true);
                } else {
                    fs.setPermission(fileStatus.getPath(), permission);
                }
            }
        }
    }

    private String getFinalDynamicPartitionDestination(Table table, Map<String, String> partKVs,
            OutputJobInfo jobInfo) {
        Path partPath = new Path(table.getTTable().getSd().getLocation());
        if (!customDynamicLocationUsed) {
            // file:///tmp/hcat_junit_warehouse/employee/_DYN0.7770480401313761/emp_country=IN/emp_state=KA  ->
            // file:///tmp/hcat_junit_warehouse/employee/emp_country=IN/emp_state=KA
            for (FieldSchema partKey : table.getPartitionKeys()) {
                partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs);
            }

            return partPath.toString();
        } else {
            // if custom root specified, update the parent path
            if (jobInfo.getCustomDynamicRoot() != null && jobInfo.getCustomDynamicRoot().length() > 0) {
                partPath = new Path(partPath, jobInfo.getCustomDynamicRoot());
            }
            return new Path(partPath, HCatFileUtil.resolveCustomPath(jobInfo, partKVs, false)).toString();
        }
    }

    private Map<String, String> getStorerParameterMap(StorerInfo storer) {
        Map<String, String> params = new HashMap<String, String>();

        //Copy table level hcat.* keys to the partition
        for (Entry<Object, Object> entry : storer.getProperties().entrySet()) {
            if (!entry.getKey().toString().equals(StatsSetupConst.COLUMN_STATS_ACCURATE)) {
                params.put(entry.getKey().toString(), entry.getValue().toString());
            }
        }
        return params;
    }

    private Path constructPartialPartPath(Path partialPath, String partKey, Map<String, String> partKVs) {

        StringBuilder sb = new StringBuilder(FileUtils.escapePathName(partKey));
        sb.append("=");
        sb.append(FileUtils.escapePathName(partKVs.get(partKey)));
        return new Path(partialPath, sb.toString());
    }

    /**
     * Update table schema, adding new columns as added for the partition.
     * @param client the client
     * @param table the table
     * @param partitionSchema the schema of the partition
     * @throws java.io.IOException Signals that an I/O exception has occurred.
     * @throws org.apache.hadoop.hive.metastore.api.InvalidOperationException the invalid operation exception
     * @throws org.apache.hadoop.hive.metastore.api.MetaException the meta exception
     * @throws org.apache.thrift.TException the t exception
     */
    private void updateTableSchema(IMetaStoreClient client, Table table, HCatSchema partitionSchema)
            throws IOException, InvalidOperationException, MetaException, TException {

        List<FieldSchema> newColumns = HCatUtil.validatePartitionSchema(table, partitionSchema);

        if (newColumns.size() != 0) {
            List<FieldSchema> tableColumns = new ArrayList<FieldSchema>(table.getTTable().getSd().getCols());
            tableColumns.addAll(newColumns);

            //Update table schema to add the newly added columns
            table.getTTable().getSd().setCols(tableColumns);
            client.alter_table(table.getDbName(), table.getTableName(), table.getTTable());
        }
    }

    /**
     * Move all of the files from the temp directory to the final location
     * @param fs the output file system
     * @param file the file to move
     * @param srcDir the source directory
     * @param destDir the target directory
     * @param dryRun - a flag that simply tests if this move would succeed or not based
     *                 on whether other files exist where we're trying to copy
     * @throws java.io.IOException
     */
    private void moveTaskOutputs(FileSystem fs, Path file, Path srcDir, Path destDir, final boolean dryRun,
            boolean immutable) throws IOException {
        if (LOG.isDebugEnabled()) {
            LOG.debug("moveTaskOutputs " + file + " from: " + srcDir + " to: " + destDir + " dry: " + dryRun
                    + " immutable: " + immutable);
        }

        if (dynamicPartitioningUsed) {
            immutable = true; // Making sure we treat dynamic partitioning jobs as if they were immutable.
        }

        if (file.getName().equals(TEMP_DIR_NAME) || file.getName().equals(LOGS_DIR_NAME)
                || file.getName().equals(SUCCEEDED_FILE_NAME)) {
            return;
        }

        final Path finalOutputPath = getFinalPath(fs, file, srcDir, destDir, immutable);
        FileStatus fileStatus = fs.getFileStatus(file);

        if (!fileStatus.isDir()) {
            if (dryRun) {
                if (immutable) {
                    // Dryrun checks are meaningless for mutable table - we should always succeed
                    // unless there is a runtime IOException.
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Testing if moving file: [" + file + "] to [" + finalOutputPath
                                + "] would cause a problem");
                    }
                    if (fs.exists(finalOutputPath)) {
                        throw new HCatException(ErrorType.ERROR_MOVE_FAILED,
                                "Data already exists in " + finalOutputPath + ", duplicate publish not possible.");
                    }
                }
            } else {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Moving file: [ " + file + "] to [" + finalOutputPath + "]");
                }
                // Make sure the parent directory exists.  It is not an error
                // to recreate an existing directory
                fs.mkdirs(finalOutputPath.getParent());
                if (!fs.rename(file, finalOutputPath)) {
                    if (!fs.delete(finalOutputPath, true)) {
                        throw new HCatException(ErrorType.ERROR_MOVE_FAILED,
                                "Failed to delete existing path " + finalOutputPath);
                    }
                    if (!fs.rename(file, finalOutputPath)) {
                        throw new HCatException(ErrorType.ERROR_MOVE_FAILED,
                                "Failed to move output to " + finalOutputPath);
                    }
                }
            }
        } else {

            FileStatus[] children = fs.listStatus(file);
            FileStatus firstChild = null;
            if (children != null) {
                int index = 0;
                while (index < children.length) {
                    if (!children[index].getPath().getName().equals(TEMP_DIR_NAME)
                            && !children[index].getPath().getName().equals(LOGS_DIR_NAME)
                            && !children[index].getPath().getName().equals(SUCCEEDED_FILE_NAME)) {
                        firstChild = children[index];
                        break;
                    }
                    index++;
                }
            }
            if (firstChild != null && firstChild.isDir()) {
                // If the first child is directory, then rest would be directory too according to HCatalog dir structure
                // recurse in that case
                for (FileStatus child : children) {
                    moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun, immutable);
                }
            } else {

                if (!dryRun) {
                    if (dynamicPartitioningUsed) {

                        // Optimization: if the first child is file, we have reached the leaf directory, move the parent directory itself
                        // instead of moving each file under the directory. See HCATALOG-538
                        // Note for future Append implementation : This optimization is another reason dynamic
                        // partitioning is currently incompatible with append on mutable tables.

                        final Path parentDir = finalOutputPath.getParent();
                        // Create the directory
                        Path placeholder = new Path(parentDir, "_placeholder");
                        if (fs.mkdirs(parentDir)) {
                            // It is weired but we need a placeholder, 
                            // otherwise rename cannot move file to the right place
                            fs.create(placeholder).close();
                        }
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("Moving directory: " + file + " to " + parentDir);
                        }

                        // If custom dynamic location provided, need to rename to final output path
                        Path dstPath = !customDynamicLocationUsed ? parentDir : finalOutputPath;
                        if (!fs.rename(file, dstPath)) {
                            final String msg = "Failed to move file: " + file + " to " + dstPath;
                            LOG.error(msg);
                            throw new HCatException(ErrorType.ERROR_MOVE_FAILED, msg);
                        }
                        fs.delete(placeholder, false);
                    } else {

                        // In case of no partition we have to move each file
                        for (FileStatus child : children) {
                            moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun, immutable);
                        }

                    }

                } else {
                    if (immutable && fs.exists(finalOutputPath)
                            && !MetaStoreUtils.isDirEmpty(fs, finalOutputPath)) {

                        throw new HCatException(ErrorType.ERROR_DUPLICATE_PARTITION,
                                "Data already exists in " + finalOutputPath + ", duplicate publish not possible.");
                    }

                }
            }
        }
    }

    /**
     * Find the final name of a given output file, given the output directory
     * and the work directory. If immutable, attempt to create file of name
     * _aN till we find an item that does not exist.
     * @param file the file to move
     * @param src the source directory
     * @param dest the target directory
     * @return the final path for the specific output file
     * @throws java.io.IOException
     */
    private Path getFinalPath(FileSystem fs, Path file, Path src, Path dest, final boolean immutable)
            throws IOException {
        URI taskOutputUri = file.toUri();
        URI relativePath = src.toUri().relativize(taskOutputUri);
        if (taskOutputUri == relativePath) {
            throw new HCatException(ErrorType.ERROR_MOVE_FAILED,
                    "Can not get the relative path: base = " + src + " child = " + file);
        }
        if (relativePath.getPath().length() > 0) {

            Path itemDest = new Path(dest, relativePath.getPath());
            if (!immutable) {
                String name = relativePath.getPath();
                String filetype;
                int index = name.lastIndexOf('.');
                if (index >= 0) {
                    filetype = name.substring(index);
                    name = name.substring(0, index);
                } else {
                    filetype = "";
                }

                // Attempt to find maxAppendAttempts possible alternatives to a filename by
                // appending _a_N and seeing if that destination also clashes. If we're
                // still clashing after that, give up.
                int counter = 1;
                for (; fs.exists(itemDest) && counter < maxAppendAttempts; counter++) {
                    itemDest = new Path(dest, name + (APPEND_SUFFIX + counter) + filetype);
                }

                if (counter == maxAppendAttempts) {
                    throw new HCatException(ErrorType.ERROR_MOVE_FAILED,
                            "Could not find a unique destination path for move: file = " + file + " , src = " + src
                                    + ", dest = " + dest);
                } else if (counter > APPEND_COUNTER_WARN_THRESHOLD) {
                    LOG.warn("Append job used filename clash counter [" + counter
                            + "] which is greater than warning limit [" + APPEND_COUNTER_WARN_THRESHOLD
                            + "]. Please compact this table so that performance is not impacted."
                            + " Please see HIVE-9381 for details.");
                }

            }

            if (LOG.isDebugEnabled()) {
                LOG.debug("FinalPath(file:" + file + ":" + src + "->" + dest + "=" + itemDest);
            }

            return itemDest;
        } else {

            return dest;
        }
    }

    /**
     * Run to discover dynamic partitions available
     */
    private void discoverPartitions(JobContext context) throws IOException {
        if (!partitionsDiscovered) {
            //      LOG.info("discover ptns called");
            OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration());

            harProcessor.setEnabled(jobInfo.getHarRequested());

            List<Integer> dynamicPartCols = jobInfo.getPosOfDynPartCols();
            int maxDynamicPartitions = jobInfo.getMaxDynamicPartitions();

            Path loadPath = new Path(jobInfo.getLocation());
            FileSystem fs = loadPath.getFileSystem(context.getConfiguration());

            // construct a path pattern (e.g., /*/*) to find all dynamically generated paths
            String dynPathSpec = loadPath.toUri().getPath();
            dynPathSpec = dynPathSpec.replaceAll("__HIVE_DEFAULT_PARTITION__", "*");

            //      LOG.info("Searching for "+dynPathSpec);
            Path pathPattern = new Path(dynPathSpec);
            FileStatus[] status = fs.globStatus(pathPattern, FileUtils.HIDDEN_FILES_PATH_FILTER);

            partitionsDiscoveredByPath = new LinkedHashMap<String, Map<String, String>>();
            contextDiscoveredByPath = new LinkedHashMap<String, JobContext>();

            if (status.length == 0) {
                //        LOG.warn("No partition found genereated by dynamic partitioning in ["
                //            +loadPath+"] with depth["+jobInfo.getTable().getPartitionKeysSize()
                //            +"], dynSpec["+dynPathSpec+"]");
            } else {
                if ((maxDynamicPartitions != -1) && (status.length > maxDynamicPartitions)) {
                    this.partitionsDiscovered = true;
                    throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS,
                            "Number of dynamic partitions being created "
                                    + "exceeds configured max allowable partitions[" + maxDynamicPartitions
                                    + "], increase parameter [" + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname
                                    + "] if needed.");
                }

                for (FileStatus st : status) {
                    LinkedHashMap<String, String> fullPartSpec = new LinkedHashMap<String, String>();
                    if (!customDynamicLocationUsed) {
                        Warehouse.makeSpecFromName(fullPartSpec, st.getPath());
                    } else {
                        HCatFileUtil.getPartKeyValuesForCustomLocation(fullPartSpec, jobInfo,
                                st.getPath().toString());
                    }
                    partitionsDiscoveredByPath.put(st.getPath().toString(), fullPartSpec);
                    JobConf jobConf = (JobConf) context.getConfiguration();
                    JobContext currContext = HCatMapRedUtil.createJobContext(jobConf, context.getJobID(),
                            InternalUtil.createReporter(HCatMapRedUtil.createTaskAttemptContext(jobConf,
                                    ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())));
                    HCatOutputFormat.configureOutputStorageHandler(currContext, jobInfo, fullPartSpec);
                    contextDiscoveredByPath.put(st.getPath().toString(), currContext);
                }
            }

            //      for (Entry<String,Map<String,String>> spec : partitionsDiscoveredByPath.entrySet()){
            //        LOG.info("Partition "+ spec.getKey());
            //        for (Entry<String,String> e : spec.getValue().entrySet()){
            //          LOG.info(e.getKey() + "=>" +e.getValue());
            //        }
            //      }

            this.partitionsDiscovered = true;
        }
    }

    private void registerPartitions(JobContext context) throws IOException {
        if (dynamicPartitioningUsed) {
            discoverPartitions(context);
        }
        OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration());
        Configuration conf = context.getConfiguration();
        Table table = new Table(jobInfo.getTableInfo().getTable());
        Path tblPath = new Path(table.getTTable().getSd().getLocation());
        FileSystem fs = tblPath.getFileSystem(conf);
        IMetaStoreClient client = null;
        HCatTableInfo tableInfo = jobInfo.getTableInfo();
        List<Partition> partitionsAdded = new ArrayList<Partition>();
        try {
            HiveConf hiveConf = HCatUtil.getHiveConf(conf);
            client = HCatUtil.getHiveMetastoreClient(hiveConf);
            if (table.getPartitionKeys().size() == 0) {
                // Move data from temp directory the actual table directory
                // No metastore operation required.
                Path src = new Path(jobInfo.getLocation());
                moveTaskOutputs(fs, src, src, tblPath, false, table.isImmutable());
                if (!src.equals(tblPath)) {
                    fs.delete(src, true);
                }
                if (table.getParameters() != null
                        && table.getParameters().containsKey(StatsSetupConst.COLUMN_STATS_ACCURATE)) {
                    table.getParameters().remove(StatsSetupConst.COLUMN_STATS_ACCURATE);
                    client.alter_table(table.getDbName(), table.getTableName(), table.getTTable());
                }
                return;
            }

            StorerInfo storer = InternalUtil.extractStorerInfo(table.getTTable().getSd(), table.getParameters());

            FileStatus tblStat = fs.getFileStatus(tblPath);
            String grpName = tblStat.getGroup();
            FsPermission perms = tblStat.getPermission();

            List<Partition> partitionsToAdd = new ArrayList<Partition>();
            if (!dynamicPartitioningUsed) {
                partitionsToAdd.add(constructPartition(context, jobInfo, tblPath.toString(), null,
                        jobInfo.getPartitionValues(), jobInfo.getOutputSchema(), getStorerParameterMap(storer),
                        table, fs, grpName, perms));
            } else {
                for (Entry<String, Map<String, String>> entry : partitionsDiscoveredByPath.entrySet()) {
                    partitionsToAdd.add(constructPartition(context, jobInfo,
                            getPartitionRootLocation(entry.getKey(), entry.getValue().size()), entry.getKey(),
                            entry.getValue(), jobInfo.getOutputSchema(), getStorerParameterMap(storer), table, fs,
                            grpName, perms));
                }
            }

            ArrayList<Map<String, String>> ptnInfos = new ArrayList<Map<String, String>>();
            for (Partition ptn : partitionsToAdd) {
                ptnInfos.add(InternalUtil.createPtnKeyValueMap(new Table(tableInfo.getTable()), ptn));
            }

            /**
             * Dynamic partitioning & Append incompatibility note:
             *
             * Currently, we do not support mixing dynamic partitioning and append in the
             * same job. One reason is that we need exhaustive testing of corner cases
             * for that, and a second reason is the behaviour of add_partitions. To support
             * dynamic partitioning with append, we'd have to have a add_partitions_if_not_exist
             * call, rather than an add_partitions call. Thus far, we've tried to keep the
             * implementation of append jobtype-agnostic, but here, in code, we assume that
             * a table is considered immutable if dynamic partitioning is enabled on the job.
             *
             * This does not mean that we can check before the job begins that this is going
             * to be a dynamic partition job on an immutable table and thus fail the job, since
             * it is quite possible to have a dynamic partitioning job run on an unpopulated
             * immutable table. It simply means that at the end of the job, as far as copying
             * in data is concerned, we will pretend that the table is immutable irrespective
             * of what table.isImmutable() tells us.
             */

            //Publish the new partition(s)
            if (dynamicPartitioningUsed && harProcessor.isEnabled() && (!partitionsToAdd.isEmpty())) {

                if (!customDynamicLocationUsed) {
                    Path src = new Path(ptnRootLocation);
                    // check here for each dir we're copying out, to see if it
                    // already exists, error out if so.
                    // Also, treat dyn-writes as writes to immutable tables.
                    moveTaskOutputs(fs, src, src, tblPath, true, true); // dryRun = true, immutable = true
                    moveTaskOutputs(fs, src, src, tblPath, false, true);
                    if (!src.equals(tblPath)) {
                        fs.delete(src, true);
                    }
                } else {
                    moveCustomLocationTaskOutputs(fs, table, hiveConf);
                }
                try {
                    updateTableSchema(client, table, jobInfo.getOutputSchema());
                    LOG.info("HAR is being used. The table {} has new partitions {}.", table.getTableName(),
                            ptnInfos);
                    client.add_partitions(partitionsToAdd);
                    partitionsAdded = partitionsToAdd;
                } catch (Exception e) {
                    // There was an error adding partitions : rollback fs copy and rethrow
                    for (Partition p : partitionsToAdd) {
                        Path ptnPath = new Path(harProcessor.getParentFSPath(new Path(p.getSd().getLocation())));
                        if (fs.exists(ptnPath)) {
                            fs.delete(ptnPath, true);
                        }
                    }
                    throw e;
                }

            } else {

                // no harProcessor, regular operation
                updateTableSchema(client, table, jobInfo.getOutputSchema());
                LOG.info("HAR not is not being used. The table {} has new partitions {}.", table.getTableName(),
                        ptnInfos);
                if (partitionsToAdd.size() > 0) {
                    if (!dynamicPartitioningUsed) {

                        // regular single-partition write into a partitioned table.
                        //Move data from temp directory the actual table directory
                        if (partitionsToAdd.size() > 1) {
                            throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION,
                                    "More than one partition to publish in non-dynamic partitioning job");
                        }
                        Partition p = partitionsToAdd.get(0);
                        Path src = new Path(jobInfo.getLocation());
                        Path dest = new Path(p.getSd().getLocation());
                        moveTaskOutputs(fs, src, src, dest, true, table.isImmutable());
                        moveTaskOutputs(fs, src, src, dest, false, table.isImmutable());
                        if (!src.equals(dest)) {
                            if (src.toString()
                                    .matches(".*" + Path.SEPARATOR + SCRATCH_DIR_NAME + "\\d\\.?\\d+.*")) {
                                // src is scratch directory, need to trim the part key value pairs from path
                                String diff = StringUtils.difference(src.toString(), dest.toString());
                                fs.delete(new Path(StringUtils.substringBefore(src.toString(), diff)), true);
                            } else {
                                fs.delete(src, true);
                            }
                        }

                        // Now, we check if the partition already exists. If not, we go ahead.
                        // If so, we error out if immutable, and if mutable, check that the partition's IF
                        // matches our current job's IF (table's IF) to check for compatibility. If compatible, we
                        // ignore and do not add. If incompatible, we error out again.

                        boolean publishRequired = false;
                        try {
                            Partition existingP = client.getPartition(p.getDbName(), p.getTableName(),
                                    p.getValues());
                            if (existingP != null) {
                                if (table.isImmutable()) {
                                    throw new HCatException(ErrorType.ERROR_DUPLICATE_PARTITION,
                                            "Attempted duplicate partition publish on to immutable table");
                                } else {
                                    if (!existingP.getSd().getInputFormat()
                                            .equals(table.getInputFormatClass().getName())) {
                                        throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION,
                                                "Attempted partition append, where old partition format was "
                                                        + existingP.getSd().getInputFormat()
                                                        + " and table format was "
                                                        + table.getInputFormatClass().getName());
                                    }
                                }
                            } else {
                                publishRequired = true;
                            }
                        } catch (NoSuchObjectException e) {
                            // All good, no such partition exists, move on.
                            publishRequired = true;
                        }
                        if (publishRequired) {
                            client.add_partitions(partitionsToAdd);
                            partitionsAdded = partitionsToAdd;
                        }

                    } else {
                        // Dynamic partitioning usecase
                        if (!customDynamicLocationUsed) {
                            Path src = new Path(ptnRootLocation);
                            moveTaskOutputs(fs, src, src, tblPath, true, true); // dryRun = true, immutable = true
                            moveTaskOutputs(fs, src, src, tblPath, false, true);
                            if (!src.equals(tblPath)) {
                                fs.delete(src, true);
                            }
                        } else {
                            moveCustomLocationTaskOutputs(fs, table, hiveConf);
                        }
                        client.add_partitions(partitionsToAdd);
                        partitionsAdded = partitionsToAdd;
                    }
                }

                // Set permissions appropriately for each of the partitions we just created
                // so as to have their permissions mimic the table permissions
                for (Partition p : partitionsAdded) {
                    applyGroupAndPerms(fs, new Path(p.getSd().getLocation()), tblStat.getPermission(),
                            tblStat.getGroup(), true);
                }

            }
        } catch (Exception e) {
            if (partitionsAdded.size() > 0) {
                try {
                    // baseCommitter.cleanupJob failed, try to clean up the
                    // metastore
                    for (Partition p : partitionsAdded) {
                        client.dropPartition(tableInfo.getDatabaseName(), tableInfo.getTableName(), p.getValues(),
                                true);
                    }
                } catch (Exception te) {
                    // Keep cause as the original exception
                    throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, e);
                }
            }
            if (e instanceof HCatException) {
                throw (HCatException) e;
            } else {
                throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, e);
            }
        } finally {
            HCatUtil.closeHiveClientQuietly(client);
        }
    }

    private void moveCustomLocationTaskOutputs(FileSystem fs, Table table, Configuration conf) throws IOException {
        // in case of custom dynamic partitions, we can't just move the sub-tree of partition root
        // directory since the partitions location contain regex pattern. We need to first find the
        // final destination of each partition and move its output.
        for (Entry<String, Map<String, String>> entry : partitionsDiscoveredByPath.entrySet()) {
            Path src = new Path(entry.getKey());
            Path destPath = new Path(getFinalDynamicPartitionDestination(table, entry.getValue(), jobInfo));
            moveTaskOutputs(fs, src, src, destPath, true, true); // dryRun = true, immutable = true
            moveTaskOutputs(fs, src, src, destPath, false, true);
        }
        // delete the parent temp directory of all custom dynamic partitions
        Path parentPath = new Path(getCustomPartitionRootLocation(jobInfo, conf));
        if (fs.exists(parentPath)) {
            fs.delete(parentPath, true);
        }
    }

    private void cancelDelegationTokens(JobContext context) throws IOException {
        LOG.info("Cancelling delegation token for the job.");
        IMetaStoreClient client = null;
        try {
            HiveConf hiveConf = HCatUtil.getHiveConf(context.getConfiguration());
            client = HCatUtil.getHiveMetastoreClient(hiveConf);
            // cancel the deleg. tokens that were acquired for this job now that
            // we are done - we should cancel if the tokens were acquired by
            // HCatOutputFormat and not if they were supplied by Oozie.
            // In the latter case the HCAT_KEY_TOKEN_SIGNATURE property in
            // the conf will not be set
            String tokenStrForm = client.getTokenStrForm();
            if (tokenStrForm != null
                    && context.getConfiguration().get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) {
                client.cancelDelegationToken(tokenStrForm);
            }
        } catch (MetaException e) {
            LOG.warn("MetaException while cancelling delegation token.", e);
        } catch (TException e) {
            LOG.warn("TException while cancelling delegation token.", e);
        } finally {
            HCatUtil.closeHiveClientQuietly(client);
        }
    }

}