com.liveramp.hank.storage.cueball.Cueball.java Source code

Java tutorial

Introduction

Here is the source code for com.liveramp.hank.storage.cueball.Cueball.java

Source

/**
 * Copyright 2011 LiveRamp
 * <p>
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * <p>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.liveramp.hank.storage.cueball;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.google.common.collect.HashMultimap;
import com.google.common.collect.Maps;
import com.google.common.collect.Multimap;

import com.liveramp.hank.compression.cueball.CueballCompressionCodec;
import com.liveramp.hank.compression.cueball.NoCueballCompressionCodec;
import com.liveramp.hank.config.DataDirectoriesConfigurator;
import com.liveramp.hank.config.ReaderConfigurator;
import com.liveramp.hank.coordinator.Domain;
import com.liveramp.hank.coordinator.DomainVersion;
import com.liveramp.hank.hasher.Hasher;
import com.liveramp.hank.hasher.IdentityHasher;
import com.liveramp.hank.partition_server.DiskPartitionAssignment;
import com.liveramp.hank.storage.Compactor;
import com.liveramp.hank.storage.Deleter;
import com.liveramp.hank.storage.FileOpsUtil;
import com.liveramp.hank.storage.PartitionRemoteFileOps;
import com.liveramp.hank.storage.PartitionRemoteFileOpsFactory;
import com.liveramp.hank.storage.PartitionUpdater;
import com.liveramp.hank.storage.Reader;
import com.liveramp.hank.storage.RemoteDomainCleaner;
import com.liveramp.hank.storage.RemoteDomainVersionDeleter;
import com.liveramp.hank.storage.StorageEngine;
import com.liveramp.hank.storage.StorageEngineFactory;
import com.liveramp.hank.storage.Writer;
import com.liveramp.hank.storage.incremental.IncrementalDomainVersionProperties;
import com.liveramp.hank.storage.incremental.IncrementalStorageEngine;
import com.liveramp.hank.storage.incremental.IncrementalUpdatePlanner;
import com.liveramp.hank.util.FsUtils;

/**
 * Cueball is a storage engine optimized for small, fixed-size values.
 */
public class Cueball extends IncrementalStorageEngine implements StorageEngine {

    private static final Pattern BASE_OR_DELTA_PATTERN = Pattern.compile(".*(\\d{5})\\.((base)|(delta))\\.cueball");
    static final String BASE_REGEX = ".*\\d{5}\\.base\\.cueball";
    static final String DELTA_REGEX = ".*\\d{5}\\.delta\\.cueball";

    public static class Factory implements StorageEngineFactory {

        public static final String REMOTE_DOMAIN_ROOT_KEY = "remote_domain_root";
        public static final String HASH_INDEX_BITS_KEY = "hash_index_bits";
        public static final String VALUE_SIZE_KEY = "value_size";
        public static final String KEY_HASH_SIZE_KEY = "key_hash_size";
        public static final String FILE_OPS_FACTORY_KEY = "file_ops_factory";
        public static final String HASHER_KEY = "hasher";
        public static final String COMPRESSION_CODEC = "compression_codec";
        public static final String NUM_REMOTE_LEAF_VERSIONS_TO_KEEP = "num_remote_leaf_versions_to_keep";

        private static final Set<String> REQUIRED_KEYS = new HashSet<String>(
                Arrays.asList(HASH_INDEX_BITS_KEY, HASHER_KEY, VALUE_SIZE_KEY, KEY_HASH_SIZE_KEY,
                        FILE_OPS_FACTORY_KEY, NUM_REMOTE_LEAF_VERSIONS_TO_KEEP));

        @Override
        public StorageEngine getStorageEngine(Map<String, Object> options, Domain domain) throws IOException {
            for (String requiredKey : REQUIRED_KEYS) {
                if (options == null || options.get(requiredKey) == null) {
                    throw new IOException("Required key '" + requiredKey + "' was not found!");
                }
            }

            // Hasher
            Hasher hasher;
            PartitionRemoteFileOpsFactory fileOpsFactory;
            try {
                hasher = (Hasher) Class.forName((String) options.get(HASHER_KEY)).newInstance();
                fileOpsFactory = (PartitionRemoteFileOpsFactory) Class
                        .forName((String) options.get(FILE_OPS_FACTORY_KEY)).newInstance();
            } catch (Exception e) {
                throw new IOException(e);
            }

            // Compression codec
            String compressionCodec = (String) options.get(COMPRESSION_CODEC);
            Class<? extends CueballCompressionCodec> compressionCodecClass = NoCueballCompressionCodec.class;
            if (compressionCodec != null) {
                try {
                    compressionCodecClass = (Class<? extends CueballCompressionCodec>) Class
                            .forName(compressionCodec);
                } catch (ClassNotFoundException e) {
                    throw new IOException("Failed to get CompressionCodec class '" + compressionCodec + "'!", e);
                }
            }

            // Num remote bases to keep
            Integer numRemoteLeafVersionsToKeep = (Integer) options.get(NUM_REMOTE_LEAF_VERSIONS_TO_KEEP);

            return new Cueball((Integer) options.get(KEY_HASH_SIZE_KEY), hasher,
                    (Integer) options.get(VALUE_SIZE_KEY), (Integer) options.get(HASH_INDEX_BITS_KEY),
                    FileOpsUtil.getDomainBuilderRoot(options), FileOpsUtil.getPartitionServerRoot(options),
                    fileOpsFactory, compressionCodecClass, domain, numRemoteLeafVersionsToKeep);
        }

        @Override
        public String getPrettyName() {
            return "Cueball";
        }

        @Override
        public String getDefaultOptions() {
            return "";
        }
    }

    private final Domain domain;

    private final int keyHashSize;
    private final Hasher hasher;
    private final int valueSize;
    private final int hashIndexBits;
    private final String domainBuilderRemoteDomainRoot;
    private final String partitionServerRemoteDomainRoot;
    private final PartitionRemoteFileOpsFactory partitionRemoteFileOpsFactory;
    private final ByteBuffer keyHashBuffer;
    private final int numRemoteLeafVersionsToKeep;

    private final Class<? extends CueballCompressionCodec> compressionCodecClass;

    public Cueball(int keyHashSize, Hasher hasher, int valueSize, int hashIndexBits,
            String domainBuilderRemoteDomainRoot, String partitionServerRemoteDomainRoot,
            PartitionRemoteFileOpsFactory partitionRemoteFileOpsFactory,
            Class<? extends CueballCompressionCodec> compressionCodecClass, Domain domain,
            int numRemoteLeafVersionsToKeep) {
        this.keyHashSize = keyHashSize;
        this.hasher = hasher;
        this.valueSize = valueSize;
        this.hashIndexBits = hashIndexBits;
        this.domainBuilderRemoteDomainRoot = domainBuilderRemoteDomainRoot;
        this.partitionServerRemoteDomainRoot = partitionServerRemoteDomainRoot;
        this.partitionRemoteFileOpsFactory = partitionRemoteFileOpsFactory;
        this.keyHashBuffer = ByteBuffer.allocate(keyHashSize);
        this.compressionCodecClass = compressionCodecClass;
        this.domain = domain;
        this.numRemoteLeafVersionsToKeep = numRemoteLeafVersionsToKeep;
        // Sanity check
        if (hashIndexBits > 32) {
            throw new RuntimeException("hashIndexBits is much too large (" + hashIndexBits + ")");
        }
    }

    @Override
    public Reader getReader(ReaderConfigurator configurator, int partitionNumber,
            DiskPartitionAssignment assignment) throws IOException {
        return new CueballReader(getTargetDirectory(assignment, partitionNumber), keyHashSize, hasher, valueSize,
                hashIndexBits, getCompressionCodec(), configurator.getCacheNumBytesCapacity(),
                (int) configurator.getCacheNumItemsCapacity());
    }

    private CueballCompressionCodec getCompressionCodec() throws IOException {
        try {
            return compressionCodecClass.newInstance();
        } catch (Exception e) {
            throw new IOException(e);
        }
    }

    @Override
    public Writer getWriter(DomainVersion domainVersion, PartitionRemoteFileOps partitionRemoteFileOps,
            int partitionNumber) throws IOException {
        IncrementalDomainVersionProperties domainVersionProperties = getDomainVersionProperties(domainVersion);
        return new CueballWriter(
                partitionRemoteFileOps.getOutputStream(
                        getName(domainVersion.getVersionNumber(), domainVersionProperties.isBase())),
                keyHashSize, hasher, valueSize, getCompressionCodec(), hashIndexBits);
    }

    private IncrementalDomainVersionProperties getDomainVersionProperties(DomainVersion domainVersion)
            throws IOException {
        IncrementalDomainVersionProperties result;
        try {
            result = (IncrementalDomainVersionProperties) domainVersion.getProperties();
        } catch (ClassCastException e) {
            throw new IOException("Failed to load properties of version " + domainVersion);
        }
        if (result == null) {
            throw new IOException("Null properties for version " + domainVersion);
        }
        return result;
    }

    @Override
    public IncrementalUpdatePlanner getUpdatePlanner(Domain domain) {
        return new CueballUpdatePlanner(domain);
    }

    @Override
    public PartitionUpdater getUpdater(DiskPartitionAssignment assignment, int partitionNumber) throws IOException {
        String localDir = getTargetDirectory(assignment, partitionNumber);
        return new CueballPartitionUpdater(domain,
                getPartitionRemoteFileOps(RemoteLocation.PARTITION_SERVER, partitionNumber), new CueballMerger(),
                keyHashSize, valueSize, hashIndexBits, getCompressionCodec(), localDir);
    }

    @Override
    public Compactor getCompactor(DiskPartitionAssignment configurator, int partitionNumber) throws IOException {
        throw new UnsupportedOperationException();
    }

    @Override
    public Writer getCompactorWriter(DomainVersion domainVersion, PartitionRemoteFileOps partitionRemoteFileOps,
            int partitionNumber) throws IOException {
        IncrementalDomainVersionProperties domainVersionProperties = getDomainVersionProperties(domainVersion);
        // Note: We use the identity hasher since keys coming in are already hashed keys
        return new CueballWriter(
                partitionRemoteFileOps.getOutputStream(
                        getName(domainVersion.getVersionNumber(), domainVersionProperties.isBase())),
                keyHashSize, new IdentityHasher(), valueSize, getCompressionCodec(), hashIndexBits);
    }

    @Override
    public ByteBuffer getComparableKey(ByteBuffer key) {
        hasher.hash(key, keyHashSize, keyHashBuffer.array());
        return keyHashBuffer;
    }

    @Override
    public PartitionRemoteFileOpsFactory getPartitionRemoteFileOpsFactory(RemoteLocation location) {
        return partitionRemoteFileOpsFactory;
    }

    @Override
    public PartitionRemoteFileOps getPartitionRemoteFileOps(RemoteLocation location, int partitionNumber)
            throws IOException {
        return partitionRemoteFileOpsFactory.getPartitionRemoteFileOps(getRoot(location), partitionNumber);
    }

    @Override
    public Deleter getDeleter(DiskPartitionAssignment assignment, int partitionNumber) throws IOException {
        String localDir = getTargetDirectory(assignment, partitionNumber);
        return new CueballDeleter(localDir);
    }

    public static String padVersionNumber(int versionNumber) {
        return String.format("%05d", versionNumber);
    }

    public static SortedSet<CueballFilePath> getBases(String... dirs) throws IOException {
        SortedSet<CueballFilePath> result = new TreeSet<CueballFilePath>();
        Set<String> paths = FsUtils.getMatchingPaths(BASE_REGEX, dirs);
        for (String path : paths) {
            result.add(new CueballFilePath(path));
        }
        return result;
    }

    public static SortedSet<CueballFilePath> getDeltas(String... dirs) throws IOException {
        SortedSet<CueballFilePath> result = new TreeSet<CueballFilePath>();
        Set<String> paths = FsUtils.getMatchingPaths(DELTA_REGEX, dirs);
        for (String path : paths) {
            result.add(new CueballFilePath(path));
        }
        return result;
    }

    public static int parseVersionNumber(String name) {
        Matcher matcher = BASE_OR_DELTA_PATTERN.matcher(name);
        if (!matcher.matches()) {
            throw new IllegalArgumentException(
                    "string " + name + " isn't a path that parseVersionNumber can parse!");
        }

        return Integer.parseInt(matcher.group(1));
    }

    public static String getName(int versionNumber, boolean base) {
        String s = padVersionNumber(versionNumber) + ".";
        if (base) {
            s += "base";
        } else {
            s += "delta";
        }
        return s + ".cueball";
    }

    public static String getName(DomainVersion domainVersion) throws IOException {
        return getName(domainVersion.getVersionNumber(), IncrementalDomainVersionProperties.isBase(domainVersion));
    }

    @Override
    public RemoteDomainVersionDeleter getRemoteDomainVersionDeleter(RemoteLocation location) throws IOException {
        return new CueballRemoteDomainVersionDeleter(domain, getRoot(location), partitionRemoteFileOpsFactory);
    }

    @Override
    public RemoteDomainCleaner getRemoteDomainCleaner() throws IOException {
        return new CueballRemoteDomainCleaner(domain, numRemoteLeafVersionsToKeep);
    }

    @Override
    public DiskPartitionAssignment getDataDirectoryPerPartition(DataDirectoriesConfigurator configurator,
            Collection<Integer> partitionNumbers) {
        return getDataDirectoryAssignments(configurator, partitionNumbers);
    }

    public static DiskPartitionAssignment getDataDirectoryAssignments(DataDirectoriesConfigurator configurator,
            Collection<Integer> partitionNumbers) {

        ArrayList<String> sortedDataDirectories = new ArrayList<String>(configurator.getDataDirectories());
        Collections.sort(sortedDataDirectories);

        LinkedList<Integer> sortedPartitions = new LinkedList<>(partitionNumbers);
        Collections.sort(sortedPartitions);

        //  TODO we can make this dynamic based on disk size, but not urgent
        double numPartitionsPerDisk = (double) partitionNumbers.size() / sortedDataDirectories.size();

        Multimap<String, Integer> partitionsPerDisk = HashMultimap.create();
        for (String dataDirectory : sortedDataDirectories) {

            int numToAssign = (int) Math.ceil(numPartitionsPerDisk * (partitionsPerDisk.keySet().size() + 1))
                    - partitionsPerDisk.values().size();

            for (int i = 0; i < numToAssign && !sortedPartitions.isEmpty(); i++) {
                partitionsPerDisk.put(dataDirectory, sortedPartitions.pop());
            }

        }

        Map<Integer, String> inverse = Maps.newHashMap();
        for (Map.Entry<String, Integer> entry : partitionsPerDisk.entries()) {
            inverse.put(entry.getValue(), entry.getKey());
        }

        return new DiskPartitionAssignment(inverse);
    }

    private String getTargetDirectory(DiskPartitionAssignment assignment, int partitionNumber) {
        return assignment.getDisk(partitionNumber) + "/" + domain.getName() + "/" + partitionNumber;
    }

    @Override
    public Set<String> getFiles(DiskPartitionAssignment assignment, int domainVersionNumber, int partitionNumber)
            throws IOException {
        Set<String> result = new HashSet<String>();
        result.add(getTargetDirectory(assignment, partitionNumber) + "/" + getName(domainVersionNumber, true));
        return result;
    }

    private final String getRoot(RemoteLocation location) {
        if (location == RemoteLocation.DOMAIN_BUILDER) {
            return domainBuilderRemoteDomainRoot;
        } else if (location == RemoteLocation.PARTITION_SERVER) {
            return partitionServerRemoteDomainRoot;
        } else {
            throw new RuntimeException();
        }
    }

    @Override
    public String toString() {
        return "Cueball{" + ", keyHashSize=" + keyHashSize + ", hasher=" + hasher + ", valueSize=" + valueSize
                + ", hashIndexBits=" + hashIndexBits + ", domainBuilderRemoteDomainRoot='"
                + domainBuilderRemoteDomainRoot + '\'' + ", partitionServerRemoteDomainRoot='"
                + partitionServerRemoteDomainRoot + '\'' + ", partitionRemoteFileOpsFactory="
                + partitionRemoteFileOpsFactory + ", keyHashBuffer=" + keyHashBuffer
                + ", numRemoteLeafVersionsToKeep=" + numRemoteLeafVersionsToKeep + ", compressionCodecClass="
                + compressionCodecClass + '}';
    }
}