org.apache.hadoop.hive.metastore.AggregateStatsCache.java Source code

Introduction

Here is the source code for org.apache.hadoop.hive.metastore.AggregateStatsCache.java
Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.hadoop.hive.metastore;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hive.common.util.BloomFilter;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

public class AggregateStatsCache {

    private static final Log LOG = LogFactory.getLog(AggregateStatsCache.class.getName());
    private static AggregateStatsCache self = null;

    // Backing store for this cache
    private final ConcurrentHashMap<Key, AggrColStatsList> cacheStore;
    // Cache size
    private final int maxCacheNodes;
    // Current nodes in the cache
    private AtomicInteger currentNodes = new AtomicInteger(0);
    // Run the cleaner thread when the cache is maxFull% full
    private final float maxFull;
    // Run the cleaner thread until cache is cleanUntil% occupied
    private final float cleanUntil;
    // Nodes go stale after this
    private final long timeToLiveMs;
    // Max time when waiting for write locks on node list
    private final long maxWriterWaitTime;
    // Max time when waiting for read locks on node list
    private final long maxReaderWaitTime;
    // Maximum number of paritions aggregated per cache node
    private final int maxPartsPerCacheNode;
    // Bloom filter false positive probability
    private final float falsePositiveProbability;
    // Max tolerable variance for matches
    private final float maxVariance;
    // Used to determine if cleaner thread is already running
    private boolean isCleaning = false;
    private AtomicLong cacheHits = new AtomicLong(0);
    private AtomicLong cacheMisses = new AtomicLong(0);
    // To track cleaner metrics
    int numRemovedTTL = 0, numRemovedLRU = 0;

    private AggregateStatsCache(int maxCacheNodes, int maxPartsPerCacheNode, long timeToLiveMs,
            float falsePositiveProbability, float maxVariance, long maxWriterWaitTime, long maxReaderWaitTime,
            float maxFull, float cleanUntil) {
        this.maxCacheNodes = maxCacheNodes;
        this.maxPartsPerCacheNode = maxPartsPerCacheNode;
        this.timeToLiveMs = timeToLiveMs;
        this.falsePositiveProbability = falsePositiveProbability;
        this.maxVariance = maxVariance;
        this.maxWriterWaitTime = maxWriterWaitTime;
        this.maxReaderWaitTime = maxReaderWaitTime;
        this.maxFull = maxFull;
        this.cleanUntil = cleanUntil;
        this.cacheStore = new ConcurrentHashMap<Key, AggrColStatsList>();
    }

    public static synchronized AggregateStatsCache getInstance(Configuration conf) {
        if (self == null) {
            int maxCacheNodes = HiveConf.getIntVar(conf, HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_SIZE);
            // The number of partitions aggregated per cache node
            // If the number of partitions requested is > this value, we'll fetch directly from Metastore
            int maxPartitionsPerCacheNode = HiveConf.getIntVar(conf,
                    HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_PARTITIONS);
            long timeToLiveMs = HiveConf.getTimeVar(conf, HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_TTL,
                    TimeUnit.SECONDS) * 1000;
            // False positives probability we are ready to tolerate for the underlying bloom filter
            float falsePositiveProbability = HiveConf.getFloatVar(conf,
                    HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_FPP);
            // Maximum tolerable variance in number of partitions between cached node and our request
            float maxVariance = HiveConf.getFloatVar(conf,
                    HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_VARIANCE);
            long maxWriterWaitTime = HiveConf.getTimeVar(conf,
                    HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_WRITER_WAIT, TimeUnit.MILLISECONDS);
            long maxReaderWaitTime = HiveConf.getTimeVar(conf,
                    HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_READER_WAIT, TimeUnit.MILLISECONDS);
            float maxFull = HiveConf.getFloatVar(conf, HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_FULL);
            float cleanUntil = HiveConf.getFloatVar(conf,
                    HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_CLEAN_UNTIL);
            self = new AggregateStatsCache(maxCacheNodes, maxPartitionsPerCacheNode, timeToLiveMs,
                    falsePositiveProbability, maxVariance, maxWriterWaitTime, maxReaderWaitTime, maxFull,
                    cleanUntil);
        }
        return self;
    }

    public int getMaxCacheNodes() {
        return maxCacheNodes;
    }

    public int getCurrentNodes() {
        return currentNodes.intValue();
    }

    public float getFullPercent() {
        return (currentNodes.intValue() / (float) maxCacheNodes) * 100;
    }

    public int getMaxPartsPerCacheNode() {
        return maxPartsPerCacheNode;
    }

    public float getFalsePositiveProbability() {
        return falsePositiveProbability;
    }

    public Float getHitRatio() {
        if (cacheHits.longValue() + cacheMisses.longValue() > 0) {
            return (float) (cacheHits.longValue()) / (cacheHits.longValue() + cacheMisses.longValue());
        }
        return null;
    }

    /**
     * Return aggregate stats for a column from the cache or null.
     * While reading from the nodelist for a key, we wait maxReaderWaitTime to acquire the lock,
     * failing which we return a cache miss (i.e. null)
     *
     * @param dbName
     * @param tblName
     * @param colName
     * @param partNames
     * @return
     */
    public AggrColStats get(String dbName, String tblName, String colName, List<String> partNames) {
        // Cache key
        Key key = new Key(dbName, tblName, colName);
        AggrColStatsList candidateList = cacheStore.get(key);
        // No key, or no nodes in candidate list
        if ((candidateList == null) || (candidateList.nodes.size() == 0)) {
            LOG.info("No aggregate stats cached for " + key.toString());
            return null;
        }
        // Find the value object
        // Update the timestamp of the key,value if value matches the criteria
        // Return the value
        AggrColStats match = null;
        boolean isLocked = false;
        try {
            // Try to readlock the candidateList; timeout after maxReaderWaitTime
            isLocked = candidateList.readLock.tryLock(maxReaderWaitTime, TimeUnit.MILLISECONDS);
            if (isLocked) {
                match = findBestMatch(partNames, candidateList.nodes);
            }
            if (match != null) {
                // Ok to not lock the list for this and use a volatile lastAccessTime instead
                candidateList.updateLastAccessTime();
                cacheHits.incrementAndGet();
                LOG.info("Returning aggregate stats from the cache; total hits: " + cacheHits.longValue()
                        + ", total misses: " + cacheMisses.longValue() + ", hit ratio: " + getHitRatio());
            } else {
                cacheMisses.incrementAndGet();
            }
        } catch (InterruptedException e) {
            LOG.debug(e);
        } finally {
            if (isLocked) {
                candidateList.readLock.unlock();
            }
        }
        return match;
    }

    /**
     * Find the best match using the configurable error tolerance and time to live value
     *
     * @param partNames
     * @param candidates
     * @return best matched node or null
     */
    private AggrColStats findBestMatch(List<String> partNames, List<AggrColStats> candidates) {
        // Hits, misses tracked for a candidate node
        MatchStats matchStats;
        // MatchStats for each candidate
        Map<AggrColStats, MatchStats> candidateMatchStats = new HashMap<AggrColStats, MatchStats>();
        // The final match we intend to return
        AggrColStats bestMatch = null;
        // To compare among potentially multiple matches
        int bestMatchHits = 0;
        int numPartsRequested = partNames.size();
        // 1st pass at marking invalid candidates
        // Checks based on variance and TTL
        // Note: we're not creating a copy of the list for saving memory
        for (AggrColStats candidate : candidates) {
            // Variance check
            if ((float) Math
                    .abs((candidate.getNumPartsCached() - numPartsRequested) / numPartsRequested) > maxVariance) {
                continue;
            }
            // TTL check
            if (isExpired(candidate)) {
                continue;
            } else {
                candidateMatchStats.put(candidate, new MatchStats(0, 0));
            }
        }
        // We'll count misses as we iterate
        int maxMisses = (int) maxVariance * numPartsRequested;
        for (String partName : partNames) {
            for (Iterator<Map.Entry<AggrColStats, MatchStats>> iterator = candidateMatchStats.entrySet()
                    .iterator(); iterator.hasNext();) {
                Map.Entry<AggrColStats, MatchStats> entry = iterator.next();
                AggrColStats candidate = entry.getKey();
                matchStats = entry.getValue();
                if (candidate.getBloomFilter().test(partName.getBytes())) {
                    ++matchStats.hits;
                } else {
                    ++matchStats.misses;
                }
                // 2nd pass at removing invalid candidates
                // If misses so far exceed max tolerable misses
                if (matchStats.misses > maxMisses) {
                    iterator.remove();
                    continue;
                }
                // Check if this is the best match so far
                if (matchStats.hits > bestMatchHits) {
                    bestMatch = candidate;
                }
            }
        }
        if (bestMatch != null) {
            // Update the last access time for this node
            bestMatch.updateLastAccessTime();
        }
        return bestMatch;
    }

    /**
     * Add a new node to the cache; may trigger the cleaner thread if the cache is near full capacity.
     * We'll however add the node even if we temporaily exceed maxCacheNodes, because the cleaner
     * will eventually create space from expired nodes or by removing LRU nodes.
     *
     * @param dbName
     * @param tblName
     * @param colName
     * @param numPartsCached
     * @param colStats
     * @param bloomFilter
     */
    // TODO: make add asynchronous: add shouldn't block the higher level calls
    public void add(String dbName, String tblName, String colName, long numPartsCached,
            ColumnStatisticsObj colStats, BloomFilter bloomFilter) {
        // If we have no space in the cache, run cleaner thread
        if (getCurrentNodes() / maxCacheNodes > maxFull) {
            spawnCleaner();
        }
        // Cache key
        Key key = new Key(dbName, tblName, colName);
        // Add new node to the cache
        AggrColStats node = new AggrColStats(numPartsCached, bloomFilter, colStats);
        AggrColStatsList nodeList;
        AggrColStatsList newNodeList = new AggrColStatsList();
        newNodeList.nodes = new ArrayList<AggrColStats>();
        nodeList = cacheStore.putIfAbsent(key, newNodeList);
        if (nodeList == null) {
            nodeList = newNodeList;
        }
        boolean isLocked = false;
        try {
            isLocked = nodeList.writeLock.tryLock(maxWriterWaitTime, TimeUnit.MILLISECONDS);
            if (isLocked) {
                nodeList.nodes.add(node);
                node.updateLastAccessTime();
                nodeList.updateLastAccessTime();
                currentNodes.getAndIncrement();
            }
        } catch (InterruptedException e) {
            LOG.debug(e);
        } finally {
            if (isLocked) {
                nodeList.writeLock.unlock();
            }
        }
    }

    /**
     * Cleans the expired nodes or removes LRU nodes of the cache,
     * until the cache size reduces to cleanUntil% full.
     */
    private void spawnCleaner() {
        // This spawns a separate thread to walk through the cache and removes expired nodes.
        // Only one cleaner thread should be running at any point.
        synchronized (this) {
            if (isCleaning) {
                return;
            }
            isCleaning = true;
        }
        Thread cleaner = new Thread("AggregateStatsCache-CleanerThread") {
            @Override
            public void run() {
                numRemovedTTL = 0;
                numRemovedLRU = 0;
                long cleanerStartTime = System.currentTimeMillis();
                LOG.info("AggregateStatsCache is " + getFullPercent() + "% full, with " + getCurrentNodes()
                        + " nodes; starting cleaner thread");
                try {
                    Iterator<Map.Entry<Key, AggrColStatsList>> mapIterator = cacheStore.entrySet().iterator();
                    while (mapIterator.hasNext()) {
                        Map.Entry<Key, AggrColStatsList> pair = (Map.Entry<Key, AggrColStatsList>) mapIterator
                                .next();
                        AggrColStats node;
                        AggrColStatsList candidateList = (AggrColStatsList) pair.getValue();
                        List<AggrColStats> nodes = candidateList.nodes;
                        if (nodes.size() == 0) {
                            mapIterator.remove();
                            continue;
                        }
                        boolean isLocked = false;
                        try {
                            isLocked = candidateList.writeLock.tryLock(maxWriterWaitTime, TimeUnit.MILLISECONDS);
                            if (isLocked) {
                                for (Iterator<AggrColStats> listIterator = nodes.iterator(); listIterator
                                        .hasNext();) {
                                    node = listIterator.next();
                                    // Remove the node if it has expired
                                    if (isExpired(node)) {
                                        listIterator.remove();
                                        numRemovedTTL++;
                                        currentNodes.getAndDecrement();
                                    }
                                }
                            }
                        } catch (InterruptedException e) {
                            LOG.debug(e);
                        } finally {
                            if (isLocked) {
                                candidateList.writeLock.unlock();
                            }
                        }
                        // We want to make sure this runs at a low priority in the background
                        Thread.yield();
                    }
                    // If the expired nodes did not result in cache being cleanUntil% in size,
                    // start removing LRU nodes
                    while (getCurrentNodes() / maxCacheNodes > cleanUntil) {
                        evictOneNode();
                    }
                } finally {
                    isCleaning = false;
                    LOG.info("Stopping cleaner thread; AggregateStatsCache is now " + getFullPercent()
                            + "% full, with " + getCurrentNodes() + " nodes");
                    LOG.info("Number of expired nodes removed: " + numRemovedTTL);
                    LOG.info("Number of LRU nodes removed: " + numRemovedLRU);
                    LOG.info("Cleaner ran for: " + (System.currentTimeMillis() - cleanerStartTime) + "ms");
                }
            }
        };
        cleaner.setPriority(Thread.MIN_PRIORITY);
        cleaner.setDaemon(true);
        cleaner.start();
    }

    /**
     * Evict an LRU node or expired node whichever we find first
     */
    private void evictOneNode() {
        // Get the LRU key, value
        Key lruKey = null;
        AggrColStatsList lruValue = null;
        for (Map.Entry<Key, AggrColStatsList> entry : cacheStore.entrySet()) {
            Key key = entry.getKey();
            AggrColStatsList value = entry.getValue();
            if (lruKey == null) {
                lruKey = key;
                lruValue = value;
                continue;
            }
            if ((value.lastAccessTime < lruValue.lastAccessTime) && !(value.nodes.isEmpty())) {
                lruKey = key;
                lruValue = value;
            }
        }
        // Now delete a node for this key's list
        AggrColStatsList candidateList = cacheStore.get(lruKey);
        boolean isLocked = false;
        try {
            isLocked = candidateList.writeLock.tryLock(maxWriterWaitTime, TimeUnit.MILLISECONDS);
            if (isLocked) {
                AggrColStats candidate;
                AggrColStats lruNode = null;
                int currentIndex = 0;
                int deleteIndex = 0;
                for (Iterator<AggrColStats> iterator = candidateList.nodes.iterator(); iterator.hasNext();) {
                    candidate = iterator.next();
                    // Since we have to create space for 1, if we find an expired node we will remove it &
                    // return
                    if (isExpired(candidate)) {
                        iterator.remove();
                        currentNodes.getAndDecrement();
                        numRemovedTTL++;
                        return;
                    }
                    // Sorry, too many ifs but this form looks optimal
                    // Update the LRU node from what we've seen so far
                    if (lruNode == null) {
                        lruNode = candidate;
                        ++currentIndex;
                        continue;
                    }
                    if (lruNode != null) {
                        if (candidate.lastAccessTime < lruNode.lastAccessTime) {
                            lruNode = candidate;
                            deleteIndex = currentIndex;
                        }
                    }
                }
                candidateList.nodes.remove(deleteIndex);
                currentNodes.getAndDecrement();
                numRemovedLRU++;
            }
        } catch (InterruptedException e) {
            LOG.debug(e);
        } finally {
            if (isLocked) {
                candidateList.writeLock.unlock();
            }
        }
    }

    private boolean isExpired(AggrColStats aggrColStats) {
        return (System.currentTimeMillis() - aggrColStats.lastAccessTime) > timeToLiveMs;
    }

    /**
     * Key object for the stats cache hashtable
     */
    static class Key {
        private final String dbName;
        private final String tblName;
        private final String colName;

        Key(String db, String table, String col) {
            // Don't construct an illegal cache key
            if ((db == null) || (table == null) || (col == null)) {
                throw new IllegalArgumentException("dbName, tblName, colName can't be null");
            }
            dbName = db;
            tblName = table;
            colName = col;
        }

        @Override
        public boolean equals(Object other) {
            if ((other == null) || !(other instanceof Key)) {
                return false;
            }
            Key that = (Key) other;
            return dbName.equals(that.dbName) && tblName.equals(that.tblName) && colName.equals(that.colName);
        }

        @Override
        public int hashCode() {
            return dbName.hashCode() * 31 + tblName.hashCode() * 31 + colName.hashCode();
        }

        @Override
        public String toString() {
            return "database:" + dbName + ", table:" + tblName + ", column:" + colName;
        }

    }

    static class AggrColStatsList {
        // TODO: figure out a better data structure for node list(?)
        private List<AggrColStats> nodes = new ArrayList<AggrColStats>();
        private ReadWriteLock lock = new ReentrantReadWriteLock();
        // Read lock for get operation
        private Lock readLock = lock.readLock();
        // Write lock for add, evict and clean operation
        private Lock writeLock = lock.writeLock();
        // Using volatile instead of locking updates to this variable,
        // since we can rely on approx lastAccessTime but don't want a performance hit
        private volatile long lastAccessTime = 0;

        List<AggrColStats> getNodes() {
            return nodes;
        }

        void updateLastAccessTime() {
            this.lastAccessTime = System.currentTimeMillis();
        }
    }

    public static class AggrColStats {
        private final long numPartsCached;
        private final BloomFilter bloomFilter;
        private final ColumnStatisticsObj colStats;
        private volatile long lastAccessTime;

        public AggrColStats(long numPartsCached, BloomFilter bloomFilter, ColumnStatisticsObj colStats) {
            this.numPartsCached = numPartsCached;
            this.bloomFilter = bloomFilter;
            this.colStats = colStats;
            this.lastAccessTime = System.currentTimeMillis();
        }

        public long getNumPartsCached() {
            return numPartsCached;
        }

        public ColumnStatisticsObj getColStats() {
            return colStats;
        }

        public BloomFilter getBloomFilter() {
            return bloomFilter;
        }

        void updateLastAccessTime() {
            this.lastAccessTime = System.currentTimeMillis();
        }
    }

    /**
     * Intermediate object, used to collect hits & misses for each cache node that is evaluate for an
     * incoming request
     */
    private static class MatchStats {
        private int hits = 0;
        private int misses = 0;

        MatchStats(int hits, int misses) {
            this.hits = hits;
            this.misses = misses;
        }
    }
}