org.diqube.executionenv.FlattenedTableInstanceManager.java Source code

Java tutorial

Introduction

Here is the source code for org.diqube.executionenv.FlattenedTableInstanceManager.java

Source

/**
 * diqube: Distributed Query Base.
 *
 * Copyright (C) 2015 Bastian Gloeckle
 *
 * This file is part of diqube.
 *
 * diqube is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package org.diqube.executionenv;

import java.util.Deque;
import java.util.HashSet;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.BiFunction;

import javax.annotation.PostConstruct;

import org.diqube.cache.CountingCache;
import org.diqube.cache.CountingCache.CountCleanupStrategy;
import org.diqube.cache.CountingCache.MemoryConsumptionProvider;
import org.diqube.cache.FlaggingCache;
import org.diqube.config.Config;
import org.diqube.config.ConfigKey;
import org.diqube.context.AutoInstatiate;
import org.diqube.data.flatten.FlattenedTable;
import org.diqube.util.Holder;
import org.diqube.util.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;

/**
 * Manages various {@link FlattenedTable}s that are available in main memory (=instances) locally.
 * <p>
 * Flattened tables that are not needed any more/are not used often enough/exceed a memory cap will be evicted . The
 * implementation is based on a {@link CountingCache}.
 * 
 * <p>
 * The counts are basically calls to {@link #getFlattenedTable(UUID, String, String)}.
 *
 * @author Bastian Gloeckle
 */
@AutoInstatiate
public class FlattenedTableInstanceManager {
    private static final Logger logger = LoggerFactory.getLogger(FlattenedTableInstanceManager.class);

    /**
     * Number of seconds a flattened table gets flagged by
     * {@link #getNewestFlattenedTableVersionAndFlagIt(String, String)} and therefore saves it from being evicted.
     */
    public static final long FLATTENED_TABLE_FLAG_NANOSECONDS = 120 * 1_000_000_000L; /* 2 min */

    private long flagNanoseconds = FLATTENED_TABLE_FLAG_NANOSECONDS;

    /**
     * Execute cache consolidation in approx. 10% of cases.
     */
    private CacheConsolidateStrategy cacheConsolidateStrategy = () -> ThreadLocalRandom.current().nextInt(128) < 13;

    @Config(ConfigKey.FLATTEN_MEMORY_CACHE_SIZE_MB)
    private int flattenedTableCacheSizeMb;

    /**
     * {@link FlaggingCache} we use to store the {@link FlattenedTable}s.
     * 
     * <p>
     * Key1: Pair of tableName, flattenBy <br/>
     * Key2: versionId.
     * 
     * <p>
     * This cache typically holds only the newest versions of the FlattenedTables, although old versions might still be
     * available if they were flagged.
     */
    private CountingCache<Pair<String, String>, UUID, FlattenedTableInfo> cache;

    /**
     * Usage counts for table/flatten-by pairs.
     * 
     * This field will never be cleaned up, it therefore counts usages even when the corresponding table gets evicted from
     * {@link #cache} in the meantime (the counts inside the cache get evicted, because the cache key is based on the
     * version Id additionally).
     */
    private ConcurrentMap<Pair<String, String>, AtomicLong> usageCounts = new ConcurrentHashMap<>();

    /**
     * ID of the newest version of a table/flatten-by pair. Sync access to this and cache by the UUID value of this map.
     */
    private ConcurrentMap<Pair<String, String>, UUID> newestVersionIds = new ConcurrentHashMap<>();

    /** Elements whose counts should be removed in {@link #cache} on the next opportunity. */
    private Deque<Pair<Pair<String, String>, UUID>> countCleanupCacheEntries = new ConcurrentLinkedDeque<>();

    @PostConstruct
    public void initialize() {
        // Use a CountCleanupStrategy that cleans up everything that was already evicted from the cache: If something was
        // evicted from the cache, we definitely
        // won't offer it again, since we will not use that same versionId again. Therefore we can free up the count memory
        // of those.
        // Additionally we remove the counts of every version that is in #countCleanupCacheEntries. These are old versions.
        // If anybody still needs those versions, they must have flagged those elements in the cache, otherwise their
        // entries will have count 0 and that will most probably lead to them being evicted from the cache on the next run.
        CountCleanupStrategy<Pair<String, String>, UUID> cacheCountCleanupStrategy = (countsForCleanup,
                allCounts) -> {
            Set<Pair<Pair<String, String>, UUID>> curCountCleanupCacheEntries = new HashSet<>();
            while (!countCleanupCacheEntries.isEmpty()) {
                try {
                    curCountCleanupCacheEntries.add(countCleanupCacheEntries.pop());
                } catch (NoSuchElementException e) {
                    // swallow -> two thread concurrently traversed countCleanupCacheEntries and our thread did not get another
                    // element. Thats fine. (Although this will not happen currently, since CountingCache synchronizes).
                }
            }

            Set<Pair<Pair<String, String>, UUID>> res = Sets.union(countsForCleanup,
                    Sets.intersection(allCounts, curCountCleanupCacheEntries));
            logger.trace("Evicting old usage counts (limit): {}", Iterables.limit(res, 100));
            return res;
        };

        MemoryConsumptionProvider<FlattenedTableInfo> cacheMemoryConsumptionProvider = info -> info
                .getFlattenedTable().calculateApproximateSizeInBytes();

        cache = new CountingCache<>(flattenedTableCacheSizeMb * 1024L * 1024L, cacheMemoryConsumptionProvider,
                cacheCountCleanupStrategy);
    }

    /**
     * Register a newly created {@link FlattenedTable} from a Flattener.
     * 
     * <p>
     * This version will automatically be the newest version available, so it is likely that
     * {@link #getNewestFlattenedTableVersion(String, String)} will return this flattened table version if called right
     * after registering the new version.
     * 
     * <p>
     * The new flattenedTable will be available through this table manager at least for
     * {@link #FLATTENED_TABLE_FLAG_NANOSECONDS}.
     * 
     * @param versionId
     *          The version ID of the flattened table.
     * @param flattenedTable
     *          The flattened table itself.
     * @param origTableName
     *          The table the flattened table was based on.
     * @param flattenBy
     *          The field which the table was flattened by.
     */
    public void registerFlattenedTableVersion(UUID versionId, FlattenedTable flattenedTable, String origTableName,
            String flattenBy) {
        Pair<String, String> keyPair = new Pair<>(origTableName, flattenBy);

        // We manually manage the "counts" here: Get the count from usageCounts. Then manually force the cache to remove the
        // count on the old version (which will lead to eviction of the old entity from the cache).
        // This counting is not 100% thread safe, as we might loose a few "counts" if this method is called simultaneously
        // for the same flattening with different flattenedTables - but that is not a big problem, since typically only one
        // Flattener will be running for one table/flatten-by pair anyway.
        FlattenedTableInfo newInfo = new FlattenedTableInfo(versionId, flattenedTable);

        Holder<Long> oldCountHolder = new Holder<>(null);

        Runnable update = () -> {
            // flag new flattened table to make sure it definitely ends up being in the cache.

            if (oldCountHolder.getValue() != null)
                cache.offerAndFlag(keyPair, versionId, newInfo, System.nanoTime() + flagNanoseconds,
                        oldCountHolder.getValue() + 1L);
            else
                cache.offerAndFlag(keyPair, versionId, newInfo, System.nanoTime() + flagNanoseconds);

            usageCounts.merge(keyPair, new AtomicLong(1L), (oldValue, newValue) -> {
                AtomicLong res = new AtomicLong(oldValue.get());
                res.addAndGet(newValue.get());
                return res;
            });

            newestVersionIds.put(keyPair, versionId);
        };

        Pair<UUID, FlattenedTable> oldNewestVersionPair = getNewestFlattenedTableVersion(origTableName, flattenBy);
        if (oldNewestVersionPair != null && !oldNewestVersionPair.getLeft().equals(versionId)) {
            // sync on previous UUID, since that'd be used by anyone who wants to query the newsest UUID.
            synchronized (oldNewestVersionPair.getLeft()) {
                Long c = usageCounts.get(keyPair).get();
                if (c != null)
                    oldCountHolder.setValue(c);

                logger.info("Registering new flattened table {} from table '{}' flattened by '{}' of which an "
                        + "old version was in the cache already ({}). Using cached usageCounts for new table: {}",
                        versionId, origTableName, flattenBy, oldNewestVersionPair.getLeft(),
                        oldCountHolder.getValue());

                update.run();
            }

            countCleanupCacheEntries.add(new Pair<>(keyPair, oldNewestVersionPair.getLeft()));
        } else {
            logger.info("Registering new flattened table {} from table '{}' flattened by '{}'", versionId,
                    origTableName, flattenBy);
            update.run();
        }
    }

    /**
     * Fetches the newest version of a flattened table.
     * 
     * @param origTableName
     *          Name of the original table.
     * @param flattenBy
     *          Field by which the orig table was flattened.
     * @return Pair of version ID and flattened table, or <code>null</code> in case there is no flattened version of that
     *         table.
     */
    public Pair<UUID, FlattenedTable> getNewestFlattenedTableVersion(String origTableName, String flattenBy) {
        logger.trace("Getting newest version of flattened table '{}' by '{}'", origTableName, flattenBy);
        return getNewestFlattenedTableVersion(origTableName, flattenBy,
                (keyPair, newestVersion) -> cache.get(keyPair, newestVersion));
    }

    /**
     * Fetches the newest version of a flattened table and flag that version to not be evicted for
     * {@link #FLATTENED_TABLE_FLAG_NANOSECONDS}.
     * 
     * @param origTableName
     *          Name of the original table.
     * @param flattenBy
     *          Field by which the orig table was flattened.
     * @return Pair of version ID and flattened table, or <code>null</code> in case there is no flattened version of that
     *         table.
     */
    public Pair<UUID, FlattenedTable> getNewestFlattenedTableVersionAndFlagIt(String origTableName,
            String flattenBy) {
        logger.trace("Flagging and getting newest version of flattened table '{}' by '{}'", origTableName,
                flattenBy);
        return getNewestFlattenedTableVersion(origTableName, flattenBy, //
                (keyPair, newestVersion) -> //
                cache.flagAndGet(keyPair, newestVersion, System.nanoTime() + flagNanoseconds));
    }

    private Pair<UUID, FlattenedTable> getNewestFlattenedTableVersion(String origTableName, String flattenBy,
            BiFunction<Pair<String, String>, UUID, FlattenedTableInfo> flattenTableInfoResolver) {
        Pair<String, String> keyPair = new Pair<>(origTableName, flattenBy);

        UUID newestVersion = newestVersionIds.get(keyPair);
        while (newestVersion != null) {
            FlattenedTableInfo info = null;
            synchronized (newestVersion) {
                UUID newNewestVersion = newestVersionIds.get(keyPair);
                if (!newestVersion.equals(newNewestVersion)) {
                    // newestVersion changed, we have synced on old object -> retry!
                    newestVersion = newNewestVersion;
                    continue;
                }
                info = flattenTableInfoResolver.apply(keyPair, newestVersion);
            }

            if (info != null)
                return new Pair<>(newestVersion, info.getFlattenedTable());
            else
                return null;
        }

        return null;
    }

    /**
     * Get a specific version of a flattened table and increase its usage count.
     * 
     * @param versionId
     *          The version of the flattened table.
     * @param origTableName
     *          The table name the flattening was based on.
     * @param flattenBy
     *          The field by which the original table was flattened.
     * @return The {@link FlattenedTable} or <code>null</code> if it is not available.
     */
    public FlattenedTable getFlattenedTable(UUID versionId, String origTableName, String flattenBy) {
        Pair<String, String> keyPair = new Pair<>(origTableName, flattenBy);
        FlattenedTableInfo info = cache.get(keyPair, versionId);

        if (info != null) {
            logger.trace("Using version {} of flattened table '{}' by '{}'.", versionId, origTableName, flattenBy);

            // increase usage count in cache and in our cache-evict-safe map.
            // Note that this will also happen if a version of a flattened table is fetched which is about to be removed from
            // the cache (= whose count was/will be removed). This is not that nice, since old versions get another "used"
            // count although they should have been deleted right away, but there is no simple way around this unfortunately.
            // The old versions might get another count, but that is not as bad either, as the count will always be very low
            // and the entry will be evicted soon again (together with its count being removed).
            cache.offer(keyPair, versionId, info);
            usageCounts.get(keyPair).incrementAndGet();

            // Consolidate cache. This will evict all not-any-more-flagged entries. We need to do this, as we might not call
            // #offer on the cache that often (which would execute the same). But if we do not do this here, we might end up
            // leaving unneeded objects in the cache for a longer time.
            if (cacheConsolidateStrategy.consolidateCache())
                cache.consolidate();

            return info.getFlattenedTable();
        }

        // Consolidate cache.
        if (cacheConsolidateStrategy.consolidateCache())
            cache.consolidate();

        return null;
    }

    /** for tests */
    /* package */ void setFlagNanoseconds(long flagNanoseconds) {
        this.flagNanoseconds = flagNanoseconds;
    }

    /** for tests */
    /* package */ void setFlattenedTableCacheSizeMb(int flattenedTableCacheSizeMb) {
        this.flattenedTableCacheSizeMb = flattenedTableCacheSizeMb;
        initialize();
    }

    /** for tests */
    /* package */ CountingCache<Pair<String, String>, UUID, FlattenedTableInfo> getCache() {
        return cache;
    }

    /** for tests */
    /* package */void setCacheConsolidateStrategy(CacheConsolidateStrategy cacheConsolidateStrategy) {
        this.cacheConsolidateStrategy = cacheConsolidateStrategy;
    }

    private class FlattenedTableInfo {
        private UUID versionId;
        private FlattenedTable flattenedTable;

        public FlattenedTableInfo(UUID versionId, FlattenedTable flattenedTable) {
            this.versionId = versionId;
            this.flattenedTable = flattenedTable;
        }

        public UUID getVersionId() {
            return versionId;
        }

        public FlattenedTable getFlattenedTable() {
            return flattenedTable;
        }
    }

    /* package */ static interface CacheConsolidateStrategy {
        public boolean consolidateCache();
    }

}