com.qubole.rubix.spi.CacheConfig.java Source code

Java tutorial

Introduction

Here is the source code for com.qubole.rubix.spi.CacheConfig.java

Source

/**
 * Copyright (c) 2016. Qubole Inc
 * Licensed under the Apache License, Version 2.0 (the License);
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License. See accompanying LICENSE file.
 */
package com.qubole.rubix.spi;

import com.google.common.base.Charsets;
import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.google.common.hash.HashCode;
import com.google.common.hash.HashFunction;
import com.google.common.hash.Hashing;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;

import java.io.File;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;

/**
 * Created by stagra on 14/2/16.
 */

/*
 * The conf provided the CachingFileSystem should have appropriate configs set if user wish to override defaults
 * Different engines provides different ways to do this:
 * Presto: "hive.config.resources" can point to files storing hadoop configurations
 * Hive: --hiveconf argument can be used
 * Spark: --config spark.hadoop.XYZ can be used
 */

public class CacheConfig {
    public static final String DATA_CACHE_ENABLED = "hadoop.cache.data.enabled";
    public static final String DATA_CACHE_TABLE_WHITELIST = "hadoop.cache.data.table.whitelist";
    public static final String DATA_CACHE_TABLE = "hadoop.cache.data.table";
    // Internal
    // In strict mode, queries will error out if BookKeeper cannot be reached
    public static final String DATA_CACHE_STRICT_MODE = "hadoop.cache.data.strict.mode";
    public static final String DATA_CACHE_LOCATION_WHITELIST = "hadoop.cache.data.location.whitelist"; // only these locations will cached
    public static final String DATA_CACHE_LOCATION_BLACKLIST = "hadoop.cache.data.location.blacklist"; // these locations will be skipped, takes priority over Whitelist
    public static final String DATA_CACHE_TABLE_MIN_COLS = "hadoop.cache.data.table.columns.min";
    public static final String DATA_CACHE_TABLE_COLS_CHOSEN = "hadoop.cache.data.table.columns.chosen";

    public static String dataCacheExpirationConf = "hadoop.cache.data.expiration";
    public static String dataCacheExpirationAfterWriteConf = "hadoop.cache.data.expiration.after-write";
    public static String dataCacheFullnessConf = "hadoop.cache.data.fullness.percentage";
    public static String dataCacheDirprefixesConf = "hadoop.cache.data.dirprefix.list";
    public static String blockSizeConf = "hadoop.cache.data.block-size";
    public static String dataCacheBookkeeperPortConf = "hadoop.cache.data.bookkeeper.port";
    public static String dataCacheBookkeeperMaxThreadsConf = "hadoop.cache.data.bookkeeper.max-threads";
    private static String clientTimeoutConf = "hadoop.cache.data.client.timeout";
    private static String maxRetriesConf = "hadoop.cache.data.client.num-retries";
    static String fileCacheDirSuffixConf = "/fcache/";
    static int maxDisksConf = 5;

    // default values
    private static final int dataCacheExpiry = Integer.MAX_VALUE;
    // Keepnig this low to workaround the Guava Cache static weighing limitation
    private static final int dataCacheExpiryAfterWrite = 300; //In sec
    private static final int dataCacheFullness = 80; // percent
    private static final String dataCacheDirPrefixes = "/media/ephemeral";
    private static final int blockSize = 1 * 1024 * 1024; // 1MB
    private static int serverPort = 8899;
    private static int serverMaxThreads = Integer.MAX_VALUE;

    private CacheConfig() {
    }

    public static int getCacheDataExpiration(Configuration conf) {
        return conf.getInt(dataCacheExpirationConf, dataCacheExpiry);
    }

    public static int getCacheDataExpirationAfterWrite(Configuration conf) {
        return conf.getInt(dataCacheExpirationAfterWriteConf, dataCacheExpiryAfterWrite);
    }

    public static int getCacheDataFullnessPercentage(Configuration conf) {
        return conf.getInt(dataCacheFullnessConf, dataCacheFullness);
    }

    public static String getCacheDirPrefixList(Configuration conf) {
        return conf.get(dataCacheDirprefixesConf, dataCacheDirPrefixes);
    }

    public static int getBlockSize(Configuration conf) {
        return conf.getInt(blockSizeConf, blockSize);
    }

    public static int getServerPort(Configuration conf) {
        return conf.getInt(dataCacheBookkeeperPortConf, serverPort);
    }

    public static int getServerMaxThreads(Configuration conf) {
        return conf.getInt(dataCacheBookkeeperMaxThreadsConf, serverMaxThreads);
    }

    public static int getClientTimeout(Configuration conf) {
        return conf.getInt(clientTimeoutConf, 60000); //ms
    }

    public static int getMaxRetries(Configuration conf) {
        return conf.getInt(maxRetriesConf, 3);
    }

    public static int numDisks(Configuration conf) {
        return getDiskPathsMap(conf).size();
    }

    public static HashMap<Integer, String> getDiskPathsMap(final Configuration conf) {
        Supplier<HashMap<Integer, String>> s = Suppliers.memoize(new Supplier<HashMap<Integer, String>>() {
            @Override
            public HashMap<Integer, String> get() {
                HashMap<Integer, String> dirPathMap = new HashMap<>();
                int ndisks = 0;
                List<String> dirPrefixList = getDirPrefixList(conf);
                for (String dirPrefix : dirPrefixList) {
                    for (int i = 0; i < maxDisksConf; ++i) {
                        if (exists(dirPrefix + i)) {
                            File dir = new File(dirPrefix + i + fileCacheDirSuffixConf);
                            dir.mkdir();
                            dirPathMap.put(ndisks, dirPrefix + i);
                            ++ndisks;
                        }
                    }
                }
                return dirPathMap;
            }
        });
        return s.get();
    }

    private static List<String> getDirPrefixList(Configuration conf) {
        String cacheDirPrefixList = getCacheDirPrefixList(conf);
        return Arrays.asList(cacheDirPrefixList.split("\\s*,\\s*"));
    }

    public static String getDirPath(Configuration conf, int d) {
        HashMap<Integer, String> dirPrefixMap = getDiskPathsMap(conf);
        return dirPrefixMap.get(d);
    }

    private static boolean exists(String filename) {
        return (new File(filename)).exists();
    }

    public static String getLocalPath(String remotePath, Configuration conf) {
        String absLocation = getDirectory(remotePath, conf);
        return absLocation + "/" + getName(remotePath);
    }

    public static String getMDFile(String remotePath, Configuration conf) {
        String absLocation = getDirectory(remotePath, conf);
        return absLocation + "/" + getName(remotePath) + "_mdfile";
    }

    public static String getDirectory(String remotePath, Configuration conf) {
        String parentPath = getParent(remotePath);
        String relLocation = parentPath.indexOf(':') == -1 ? parentPath
                : parentPath.substring(parentPath.indexOf(':') + 3);
        String absLocation = getLocalDirFor(remotePath, conf) + relLocation;
        File parent = new File(absLocation);
        parent.mkdirs();
        return absLocation;
    }

    public static String getLocalDirFor(String remotePath, Configuration conf) {
        int numDisks = numDisks(conf);
        int numBuckets = 100 * numDisks;
        HashFunction hf = Hashing.murmur3_32();
        HashCode hc = hf.hashString(remotePath, Charsets.UTF_8);
        int bucket = Math.abs(hc.asInt()) % numBuckets;
        int dirNum = (bucket / numDisks) % numDisks;

        String dirname = getDirPath(conf, dirNum) + CacheConfig.fileCacheDirSuffixConf;
        return dirname;
    }

    public static String getName(String remotePath) {
        return remotePath.lastIndexOf('/') == -1 ? remotePath : remotePath.substring(remotePath.lastIndexOf('/'));
    }

    public static String getParent(String remotePath) {
        return remotePath.lastIndexOf('/') == -1 ? "" : remotePath.substring(0, remotePath.lastIndexOf('/'));
    }

    static int getCacheDataChosenColumns(Configuration c) {
        return c.getInt(DATA_CACHE_TABLE_COLS_CHOSEN, 0);
    }

    // Internal
    // In strict mode, queries will error out if BookKeeper cannot be reached

    public static boolean isStrictMode(Configuration c) {
        return c.getBoolean(DATA_CACHE_STRICT_MODE, false);
    }

    // Helper methods to get information based on configuration

    public static boolean skipCache(Path path, Configuration conf) {
        if (!CacheConfig.isCacheDataEnabled(conf)) {
            return true;
        }

        if (!isLocationAllowedToCache(path, conf)) {
            return true;
        }

        if (!isTableAllowedToCache(conf)) {
            return true;
        }

        if (!minColumnsSelected(conf)) {
            return true;
        }

        return false;
    }

    private static boolean isTableAllowedToCache(Configuration conf) {
        String table = CacheConfig.getCacheDataTable(conf);
        if (table == null || (table != null && table.isEmpty())) {
            // Support not added by engine
            return true;
        }

        String whitelist = CacheConfig.getCacheDataTableWhitelist(conf);
        if (whitelist.length() > 0) {
            if (!table.matches(whitelist)) {
                return false;
            }
        }

        return true;
    }

    private static boolean isLocationAllowedToCache(Path path, Configuration conf) {
        // Check whitelist first, if location matches whitelist and blacklist both then blacklist it
        String whitelist = CacheConfig.getCacheDataLocationWhitelist(conf);
        if (whitelist.length() > 0) {
            if (!path.toString().matches(whitelist)) {
                return false;
            }
        }

        String blacklist = CacheConfig.getCacheDataLocationBlacklist(conf);
        if (blacklist.length() > 0) {
            if (path.toString().matches(blacklist)) {
                return false;
            }
        }

        return true;
    }

    private static boolean minColumnsSelected(Configuration conf) {
        if (CacheConfig.getCacheDataMinColumns(conf) > CacheConfig.getCacheDataChosenColumns(conf)) {
            return false;
        }
        return true;
    }

    static boolean isCacheDataEnabled(Configuration c) {
        return c.getBoolean(DATA_CACHE_ENABLED, true);
    }

    static String getCacheDataTableWhitelist(Configuration c) {
        return c.get(DATA_CACHE_TABLE_WHITELIST, ".*");
    }

    static void setCacheDataTable(Configuration configuration, String table) {
        configuration.set(DATA_CACHE_TABLE, table);
    }

    static String getCacheDataTable(Configuration configuration) {
        return configuration.get(DATA_CACHE_TABLE, "");
    }

    static String getCacheDataLocationBlacklist(Configuration configuration) {
        return configuration.get(DATA_CACHE_LOCATION_BLACKLIST, "");
    }

    static String getCacheDataLocationWhitelist(Configuration configuration) {
        return configuration.get(DATA_CACHE_LOCATION_WHITELIST, ".*");
    }

    static int getCacheDataMinColumns(Configuration c) {
        return c.getInt(DATA_CACHE_TABLE_MIN_COLS, 0);
    }

    static void setCacheDataChosenColumns(Configuration c, int chosen) {
        c.setInt(DATA_CACHE_TABLE_COLS_CHOSEN, chosen);
    }
}