org.apache.hadoop.hive.ql.txn.compactor.Initiator.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hive.ql.txn.compactor.Initiator.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.ql.txn.compactor;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.CompactionRequest;
import org.apache.hadoop.hive.metastore.api.CompactionType;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.ShowCompactRequest;
import org.apache.hadoop.hive.metastore.api.ShowCompactResponse;
import org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.metastore.txn.CompactionInfo;
import org.apache.hadoop.hive.metastore.txn.CompactionTxnHandler;
import org.apache.hadoop.hive.metastore.txn.TxnHandler;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.StringUtils;

import java.io.IOException;
import java.security.PrivilegedExceptionAction;
import java.util.List;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;

/**
 * A class to initiate compactions.  This will run in a separate thread.
 */
public class Initiator extends CompactorThread {
    static final private String CLASS_NAME = Initiator.class.getName();
    static final private Log LOG = LogFactory.getLog(CLASS_NAME);

    private long checkInterval;

    @Override
    public void run() {
        // Make sure nothing escapes this run method and kills the metastore at large,
        // so wrap it in a big catch Throwable statement.
        try {
            recoverFailedCompactions(false);

            int abortedThreshold = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_COMPACTOR_ABORTEDTXN_THRESHOLD);

            // Make sure we run through the loop once before checking to stop as this makes testing
            // much easier.  The stop value is only for testing anyway and not used when called from
            // HiveMetaStore.
            do {
                long startedAt = System.currentTimeMillis();

                // Wrap the inner parts of the loop in a catch throwable so that any errors in the loop
                // don't doom the entire thread.
                try {
                    ShowCompactResponse currentCompactions = txnHandler.showCompact(new ShowCompactRequest());
                    ValidTxnList txns = CompactionTxnHandler
                            .createValidCompactTxnList(txnHandler.getOpenTxnsInfo());
                    Set<CompactionInfo> potentials = txnHandler.findPotentialCompactions(abortedThreshold);
                    LOG.debug("Found " + potentials.size() + " potential compactions, "
                            + "checking to see if we should compact any of them");
                    for (CompactionInfo ci : potentials) {
                        LOG.info("Checking to see if we should compact " + ci.getFullPartitionName());
                        try {
                            Table t = resolveTable(ci);
                            if (t == null) {
                                // Most likely this means it's a temp table
                                LOG.info("Can't find table " + ci.getFullTableName() + ", assuming it's a temp "
                                        + "table or has been dropped and moving on.");
                                continue;
                            }

                            // check if no compaction set for this table
                            if (noAutoCompactSet(t)) {
                                LOG.info("Table " + tableName(t) + " marked true so we will not compact it.");
                                continue;
                            }

                            // Check to see if this is a table level request on a partitioned table.  If so,
                            // then it's a dynamic partitioning case and we shouldn't check the table itself.
                            if (t.getPartitionKeys() != null && t.getPartitionKeys().size() > 0
                                    && ci.partName == null) {
                                LOG.debug("Skipping entry for " + ci.getFullTableName() + " as it is from dynamic"
                                        + " partitioning");
                                continue;
                            }

                            // Check if we already have initiated or are working on a compaction for this partition
                            // or table.  If so, skip it.  If we are just waiting on cleaning we can still check,
                            // as it may be time to compact again even though we haven't cleaned.
                            if (lookForCurrentCompactions(currentCompactions, ci)) {
                                LOG.debug("Found currently initiated or working compaction for "
                                        + ci.getFullPartitionName()
                                        + " so we will not initiate another compaction");
                                continue;
                            }

                            // Figure out who we should run the file operations as
                            Partition p = resolvePartition(ci);
                            if (p == null && ci.partName != null) {
                                LOG.info("Can't find partition " + ci.getFullPartitionName()
                                        + ", assuming it has been dropped and moving on.");
                                continue;
                            }
                            StorageDescriptor sd = resolveStorageDescriptor(t, p);
                            String runAs = findUserToRunAs(sd.getLocation(), t);

                            CompactionType compactionNeeded = checkForCompaction(ci, txns, sd, runAs);
                            if (compactionNeeded != null)
                                requestCompaction(ci, runAs, compactionNeeded);
                        } catch (Throwable t) {
                            LOG.error("Caught exception while trying to determine if we should compact "
                                    + ci.getFullPartitionName() + ".  Marking clean to avoid repeated failures, "
                                    + "" + StringUtils.stringifyException(t));
                            txnHandler.markCleaned(ci);
                        }
                    }

                    // Check for timed out remote workers.
                    recoverFailedCompactions(true);

                    // Clean anything from the txns table that has no components left in txn_components.
                    txnHandler.cleanEmptyAbortedTxns();
                } catch (Throwable t) {
                    LOG.error("Initiator loop caught unexpected exception this time through the loop: "
                            + StringUtils.stringifyException(t));
                }

                long elapsedTime = System.currentTimeMillis() - startedAt;
                if (elapsedTime >= checkInterval || stop.get())
                    continue;
                else
                    Thread.sleep(checkInterval - elapsedTime);

            } while (!stop.get());
        } catch (Throwable t) {
            LOG.error("Caught an exception in the main loop of compactor initiator, exiting "
                    + StringUtils.stringifyException(t));
        }
    }

    @Override
    public void init(AtomicBoolean stop, AtomicBoolean looped) throws MetaException {
        super.init(stop, looped);
        checkInterval = conf.getTimeVar(HiveConf.ConfVars.HIVE_COMPACTOR_CHECK_INTERVAL, TimeUnit.MILLISECONDS);
    }

    private void recoverFailedCompactions(boolean remoteOnly) throws MetaException {
        if (!remoteOnly)
            txnHandler.revokeFromLocalWorkers(Worker.hostname());
        txnHandler.revokeTimedoutWorkers(
                HiveConf.getTimeVar(conf, HiveConf.ConfVars.HIVE_COMPACTOR_WORKER_TIMEOUT, TimeUnit.MILLISECONDS));
    }

    // Figure out if there are any currently running compactions on the same table or partition.
    private boolean lookForCurrentCompactions(ShowCompactResponse compactions, CompactionInfo ci) {
        if (compactions.getCompacts() != null) {
            for (ShowCompactResponseElement e : compactions.getCompacts()) {
                if (!e.getState().equals(TxnHandler.CLEANING_RESPONSE) && e.getDbname().equals(ci.dbname)
                        && e.getTablename().equals(ci.tableName)
                        && (e.getPartitionname() == null && ci.partName == null
                                || e.getPartitionname().equals(ci.partName))) {
                    return true;
                }
            }
        }
        return false;
    }

    private CompactionType checkForCompaction(final CompactionInfo ci, final ValidTxnList txns,
            final StorageDescriptor sd, final String runAs) throws IOException, InterruptedException {
        // If it's marked as too many aborted, we already know we need to compact
        if (ci.tooManyAborts) {
            LOG.debug("Found too many aborted transactions for " + ci.getFullPartitionName() + ", "
                    + "initiating major compaction");
            return CompactionType.MAJOR;
        }
        if (runJobAsSelf(runAs)) {
            return determineCompactionType(ci, txns, sd);
        } else {
            LOG.info("Going to initiate as user " + runAs);
            UserGroupInformation ugi = UserGroupInformation.createProxyUser(runAs,
                    UserGroupInformation.getLoginUser());
            return ugi.doAs(new PrivilegedExceptionAction<CompactionType>() {
                @Override
                public CompactionType run() throws Exception {
                    return determineCompactionType(ci, txns, sd);
                }
            });
        }
    }

    private CompactionType determineCompactionType(CompactionInfo ci, ValidTxnList txns, StorageDescriptor sd)
            throws IOException, InterruptedException {
        boolean noBase = false;
        Path location = new Path(sd.getLocation());
        FileSystem fs = location.getFileSystem(conf);
        AcidUtils.Directory dir = AcidUtils.getAcidState(location, conf, txns);
        Path base = dir.getBaseDirectory();
        long baseSize = 0;
        FileStatus stat = null;
        if (base != null) {
            stat = fs.getFileStatus(base);
            if (!stat.isDir()) {
                LOG.error("Was assuming base " + base.toString() + " is directory, but it's a file!");
                return null;
            }
            baseSize = sumDirSize(fs, base);
        }

        List<FileStatus> originals = dir.getOriginalFiles();
        for (FileStatus origStat : originals) {
            baseSize += origStat.getLen();
        }

        long deltaSize = 0;
        List<AcidUtils.ParsedDelta> deltas = dir.getCurrentDirectories();
        for (AcidUtils.ParsedDelta delta : deltas) {
            stat = fs.getFileStatus(delta.getPath());
            if (!stat.isDir()) {
                LOG.error("Was assuming delta " + delta.getPath().toString() + " is a directory, "
                        + "but it's a file!");
                return null;
            }
            deltaSize += sumDirSize(fs, delta.getPath());
        }

        if (baseSize == 0 && deltaSize > 0) {
            noBase = true;
        } else {
            float deltaPctThreshold = HiveConf.getFloatVar(conf,
                    HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_PCT_THRESHOLD);
            boolean bigEnough = (float) deltaSize / (float) baseSize > deltaPctThreshold;
            if (LOG.isDebugEnabled()) {
                StringBuilder msg = new StringBuilder("delta size: ");
                msg.append(deltaSize);
                msg.append(" base size: ");
                msg.append(baseSize);
                msg.append(" threshold: ");
                msg.append(deltaPctThreshold);
                msg.append(" will major compact: ");
                msg.append(bigEnough);
                LOG.debug(msg);
            }
            if (bigEnough)
                return CompactionType.MAJOR;
        }

        int deltaNumThreshold = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD);
        boolean enough = deltas.size() > deltaNumThreshold;
        if (enough) {
            LOG.debug("Found " + deltas.size() + " delta files, threshold is " + deltaNumThreshold
                    + (enough ? "" : "not") + " and no base, requesting " + (noBase ? "major" : "minor")
                    + " compaction");
            // If there's no base file, do a major compaction
            return noBase ? CompactionType.MAJOR : CompactionType.MINOR;
        }
        return null;
    }

    private long sumDirSize(FileSystem fs, Path dir) throws IOException {
        long size = 0;
        FileStatus[] buckets = fs.listStatus(dir, FileUtils.HIDDEN_FILES_PATH_FILTER);
        for (int i = 0; i < buckets.length; i++) {
            size += buckets[i].getLen();
        }
        return size;
    }

    private void requestCompaction(CompactionInfo ci, String runAs, CompactionType type) throws MetaException {
        String s = "Requesting " + type.toString() + " compaction for " + ci.getFullPartitionName();
        LOG.info(s);
        CompactionRequest rqst = new CompactionRequest(ci.dbname, ci.tableName, type);
        if (ci.partName != null)
            rqst.setPartitionname(ci.partName);
        rqst.setRunas(runAs);
        txnHandler.compact(rqst);
    }

    // Because TABLE_NO_AUTO_COMPACT was originally assumed to be NO_AUTO_COMPACT and then was moved
    // to no_auto_compact, we need to check it in both cases.
    private boolean noAutoCompactSet(Table t) {
        String noAutoCompact = t.getParameters().get(hive_metastoreConstants.TABLE_NO_AUTO_COMPACT);
        if (noAutoCompact == null) {
            noAutoCompact = t.getParameters().get(hive_metastoreConstants.TABLE_NO_AUTO_COMPACT.toUpperCase());
        }
        return noAutoCompact != null && noAutoCompact.equalsIgnoreCase("true");
    }
}