Source code

Java tutorial


Here is the source code for


 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * See the License for the specific language governing permissions and
 * limitations under the License.
package org.apache.hadoop.hive.ql.txn.compactor;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.MetaStoreThread;
import org.apache.hadoop.hive.metastore.RawStore;
import org.apache.hadoop.hive.metastore.RawStoreProxy;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.txn.CompactionInfo;
import org.apache.hadoop.hive.metastore.txn.CompactionTxnHandler;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;

 * Superclass for all threads in the compactor.
abstract class CompactorThread extends Thread implements MetaStoreThread {
    static final private String CLASS_NAME = CompactorThread.class.getName();
    static final private Log LOG = LogFactory.getLog(CLASS_NAME);

    protected HiveConf conf;
    protected CompactionTxnHandler txnHandler;
    protected RawStore rs;
    protected int threadId;
    protected AtomicBoolean stop;
    protected AtomicBoolean looped;

    public void setHiveConf(HiveConf conf) {
        this.conf = conf;

    public void setThreadId(int threadId) {
        this.threadId = threadId;


    public void init(AtomicBoolean stop, AtomicBoolean looped) throws MetaException {
        this.stop = stop;
        this.looped = looped;
        setDaemon(true); // this means the process will exit without waiting for this thread

        // Get our own instance of the transaction handler
        txnHandler = new CompactionTxnHandler(conf);

        // Get our own connection to the database so we can get table and partition information.
        rs = RawStoreProxy.getProxy(conf, conf, conf.getVar(HiveConf.ConfVars.METASTORE_RAW_STORE_IMPL), threadId);

     * Find the table being compacted
     * @param ci compaction info returned from the compaction queue
     * @return metastore table
     * @throws org.apache.hadoop.hive.metastore.api.MetaException if the table cannot be found.
    protected Table resolveTable(CompactionInfo ci) throws MetaException {
        try {
            return rs.getTable(ci.dbname, ci.tableName);
        } catch (MetaException e) {
            LOG.error("Unable to find table " + ci.getFullTableName() + ", " + e.getMessage());
            throw e;

     * Get the partition being compacted.
     * @param ci compaction info returned from the compaction queue
     * @return metastore partition, or null if there is not partition in this compaction info
     * @throws Exception if underlying calls throw, or if the partition name resolves to more than
     * one partition.
    protected Partition resolvePartition(CompactionInfo ci) throws Exception {
        if (ci.partName != null) {
            List<Partition> parts = null;
            try {
                parts = rs.getPartitionsByNames(ci.dbname, ci.tableName, Collections.singletonList(ci.partName));
                if (parts == null || parts.size() == 0) {
                    // The partition got dropped before we went looking for it.
                    return null;
            } catch (Exception e) {
                LOG.error("Unable to find partition " + ci.getFullPartitionName() + ", " + e.getMessage());
                throw e;
            if (parts.size() != 1) {
                LOG.error(ci.getFullPartitionName() + " does not refer to a single partition");
                throw new MetaException("Too many partitions");
            return parts.get(0);
        } else {
            return null;

     * Get the storage descriptor for a compaction.
     * @param t table from {@link #resolveTable(org.apache.hadoop.hive.metastore.txn.CompactionInfo)}
     * @param p table from {@link #resolvePartition(org.apache.hadoop.hive.metastore.txn.CompactionInfo)}
     * @return metastore storage descriptor.
    protected StorageDescriptor resolveStorageDescriptor(Table t, Partition p) {
        return (p == null) ? t.getSd() : p.getSd();

     * Determine which user to run an operation as, based on the owner of the directory to be
     * compacted.  It is asserted that either the user running the hive metastore or the table
     * owner must be able to stat the directory and determine the owner.
     * @param location directory that will be read or written to.
     * @param t metastore table object
     * @return username of the owner of the location.
     * @throws if neither the hive metastore user nor the table owner can stat
     * the location.
    protected String findUserToRunAs(String location, Table t) throws IOException, InterruptedException {
        LOG.debug("Determining who to run the job as.");
        final Path p = new Path(location);
        final FileSystem fs = p.getFileSystem(conf);
        try {
            FileStatus stat = fs.getFileStatus(p);
            LOG.debug("Running job as " + stat.getOwner());
            return stat.getOwner();
        } catch (AccessControlException e) {
            // TODO not sure this is the right exception
            LOG.debug("Unable to stat file as current user, trying as table owner");

            // Now, try it as the table owner and see if we get better luck.
            final List<String> wrapper = new ArrayList<String>(1);
            UserGroupInformation ugi = UserGroupInformation.createProxyUser(t.getOwner(),
            ugi.doAs(new PrivilegedExceptionAction<Object>() {
                public Object run() throws Exception {
                    FileStatus stat = fs.getFileStatus(p);
                    return null;

            if (wrapper.size() == 1) {
                LOG.debug("Running job as " + wrapper.get(0));
                return wrapper.get(0);
        LOG.error("Unable to stat file as either current user or table owner, giving up");
        throw new IOException("Unable to stat file");

     * Determine whether to run this job as the current user or whether we need a doAs to switch
     * users.
     * @param owner of the directory we will be working in, as determined by
     * {@link #findUserToRunAs(String, org.apache.hadoop.hive.metastore.api.Table)}
     * @return true if the job should run as the current user, false if a doAs is needed.
    protected boolean runJobAsSelf(String owner) {
        return (owner.equals(System.getProperty("")));

    protected String tableName(Table t) {
        return t.getDbName() + "." + t.getTableName();