com.earnstone.index.ShardedIndex.java Source code

Java tutorial

Introduction

Here is the source code for com.earnstone.index.ShardedIndex.java

Source

/*
 * Eindex: Earnstone Sharded Column Index for Cassandra
 * 
 * Copyright 2011 Corey Hulen, Earnstone Corporation
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License. 
 */
package com.earnstone.index;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.TreeMap;

import org.apache.commons.lang.ArrayUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import me.prettyprint.cassandra.serializers.BytesArraySerializer;
import me.prettyprint.hector.api.Cluster;
import me.prettyprint.hector.api.Keyspace;
import me.prettyprint.hector.api.Serializer;
import me.prettyprint.hector.api.beans.ColumnSlice;
import me.prettyprint.hector.api.beans.HColumn;
import me.prettyprint.hector.api.ddl.ColumnFamilyDefinition;
import me.prettyprint.hector.api.ddl.ColumnType;
import me.prettyprint.hector.api.ddl.ComparatorType;
import me.prettyprint.hector.api.ddl.KeyspaceDefinition;
import me.prettyprint.hector.api.exceptions.HectorException;
import me.prettyprint.hector.api.factory.HFactory;
import me.prettyprint.hector.api.mutation.Mutator;
import me.prettyprint.hector.api.query.ColumnQuery;
import me.prettyprint.hector.api.query.QueryResult;
import me.prettyprint.hector.api.query.SliceQuery;

public abstract class ShardedIndex<T> {

    private static byte[] Delim = { ':' };
    private static byte[] SubDelim = { ':', ':' };
    private static final Logger log = LoggerFactory.getLogger(ShardedIndex.class);

    protected Cluster cluster;
    protected Keyspace keyspace;
    protected String columnFamily;
    protected byte[] baseIndexKey;
    protected byte[] emptyIndexKey;
    protected TreeMap<T, T> shards;

    /**
     * Constructs a sharded index. It is expected that multiple indexes of the
     * same indexType are stored in the same column family and will be
     * differentiated by the index name.
     * 
     * @param indexType
     *            The type of the index to create (this is needed because of
     *            type erasure).
     * @param comparatorType
     *            The Cassandra columnFamily comparator type.
     * @param cluster
     *            The running cluster.
     * @param keyspace
     *            Will verify if the keyspace exists and throws a
     *            IllegalArgumentException if the keyspace doesn't exist.
     * @param columnFamily
     *            Will verify if the column family exists and throws a
     *            IllegalArgumentException if the column family doesn't exist.
     * @param name
     *            the name of the index.
     */
    protected ShardedIndex(Cluster cluster, Keyspace keyspace, String columnFamily, byte[] baseIndexKey) {
        this.cluster = cluster;
        this.keyspace = keyspace;
        this.columnFamily = columnFamily;
        this.baseIndexKey = baseIndexKey;

        if (!getIndexType().equals(Long.class)) {
            String msg = "Only indexType of Long.class is currently supported.";
            log.error(msg);
            throw new IllegalArgumentException(msg);
        }

        KeyspaceDefinition kdef = cluster.describeKeyspace(keyspace.getKeyspaceName());
        ColumnFamilyDefinition cdef = null;

        for (ColumnFamilyDefinition tempDef : kdef.getCfDefs()) {
            if (tempDef.getName().equals(columnFamily)) {
                cdef = tempDef;
                break;
            }
        }

        if (cdef == null) {
            String msg = "Missing column family '" + columnFamily + "' for keyspace '" + keyspace + "'";
            log.error(msg);
            throw new IllegalArgumentException(msg);
        }

        if (!cdef.getColumnType().equals(ColumnType.STANDARD)) {
            String msg = "Expected column family '" + columnFamily + "' to be ColumnType.STANDARD";
            log.error(msg);
            throw new IllegalArgumentException(msg);
        }

        if (!cdef.getComparatorType().equals(getComparatorType())) {
            String msg = "Expected comparator of type '" + getComparatorType().getClassName() + "', but was '"
                    + cdef.getComparatorType().getClassName() + "' for column family '" + columnFamily + "'";
            log.error(msg);
            throw new IllegalArgumentException(msg);
        }

        if (baseIndexKey == null || baseIndexKey.length == 0) {
            String msg = "Sharded index name cannot be null or empty.";
            log.error(msg);
            throw new IllegalArgumentException(msg);
        }

        emptyIndexKey = ArrayUtils.addAll(baseIndexKey, ArrayUtils.addAll(getEmptyValue(), Delim));

        reloadShardsCache();
    }

    public boolean initializeShardBoundries(List<T> startingBoundries) {

        if (shards.size() == 0) {
            TreeMap<T, T> map = new TreeMap<T, T>();

            for (T value : startingBoundries) {
                map.put(value, value);
            }

            saveShardsCache(map);
            shards = map;
            return true;
        } else {
            return false;
        }
    }

    public void reloadShardsCache() {
        ColumnQuery<byte[], T, byte[]> columnQuery = HFactory.createColumnQuery(keyspace,
                BytesArraySerializer.get(), getColumnNameSerializer(), BytesArraySerializer.get());
        columnQuery.setColumnFamily(columnFamily).setKey(baseIndexKey).setName(getShardColumnName());
        QueryResult<HColumn<T, byte[]>> result = columnQuery.execute();

        if (result.get() == null)
            shards = new TreeMap<T, T>();
        else {
            List<T> list = getDataListForBytes(result.get().getValue());
            TreeMap<T, T> map = new TreeMap<T, T>();
            for (T value : list) {
                map.put(value, value);
            }

            shards = map;
        }
    }

    protected void saveShardsCache(TreeMap<T, T> map) {
        byte[] raw = getBytesForDataList(new ArrayList<T>(map.values()));
        Mutator<byte[]> mutator = HFactory.createMutator(keyspace, BytesArraySerializer.get());
        mutator.insert(baseIndexKey, columnFamily, HFactory.createColumn(getShardColumnName(), raw,
                getColumnNameSerializer(), BytesArraySerializer.get()));
    }

    public void splitAndBuildShards() {
        throw new UnsupportedOperationException("Currently you must rely on the prepopulate splits.");
    }

    public void addToIndex(T index, T valueToAdd) throws HectorException {
        byte[] shardKey = getShardKeyForIndex(index);
        byte[] raw = getRawDataForIndex(shardKey, keyspace, index);

        if (raw == null || raw.length == 0) {
            overwriteIndex(shardKey, keyspace, index, getBytesForData(valueToAdd));
        } else if (Arrays.equals(getEmptyValue(), raw)) {
            byte[] subShardKey = getSubShardKeyForIndex(index);
            overwriteIndex(subShardKey, keyspace, valueToAdd, getBytesForData(valueToAdd));
        } else {
            byte[] subShardKey = getSubShardKeyForIndex(index);
            T previousValue = getDataForBytes(raw);
            overwriteIndex(subShardKey, keyspace, previousValue, getBytesForData(previousValue));
            overwriteIndex(subShardKey, keyspace, valueToAdd, getBytesForData(valueToAdd));
            overwriteIndex(shardKey, keyspace, index, getEmptyValue());
        }
    }

    public void removeAllValuesAtIndex(T index) {
        byte[] shardKey = getShardKeyForIndex(index);
        byte[] subShardKey = getSubShardKeyForIndex(index);
        removeIndex(subShardKey, keyspace, null);
        removeIndex(shardKey, keyspace, index);
    }

    public void removeValueAtIndex(T index, T valueToRemove) {
        byte[] shardKey = getShardKeyForIndex(index);
        byte[] raw = getRawDataForIndex(shardKey, keyspace, index);

        if (raw == null || raw.length == 0) {
            return;
        } else if (Arrays.equals(getEmptyValue(), raw)) {
            byte[] subShardKey = getSubShardKeyForIndex(index);
            removeIndex(subShardKey, keyspace, valueToRemove);
        } else {
            removeIndex(shardKey, keyspace, index);
        }
    }

    protected void removeIndex(byte[] shardKey, Keyspace ks, T index) {
        Mutator<byte[]> mutator = HFactory.createMutator(ks, BytesArraySerializer.get());
        mutator.delete(shardKey, columnFamily, index, getColumnNameSerializer());
    }

    public void overwriteIndex(T index, T valueToOverwriteWith) {
        byte[] shardKey = getShardKeyForIndex(index);
        overwriteIndex(shardKey, keyspace, index, getBytesForData(valueToOverwriteWith));
    }

    protected void overwriteIndex(byte[] shardKey, Keyspace ks, T index, byte[] value) {
        Mutator<byte[]> mutator = HFactory.createMutator(ks, BytesArraySerializer.get());
        mutator.insert(shardKey, columnFamily,
                HFactory.createColumn(index, value, getColumnNameSerializer(), BytesArraySerializer.get()));
    }

    public T getValueForIndex(T index) {
        List<T> list = getValuesForIndex(index, 1);

        if (list.size() > 0)
            return list.get(0);
        else
            return null;
    }

    public List<IndexItem<T>> getValueRangesForIndex(T index, boolean reversed, int limit, int subIndexLimit) {
        byte[] shardKey = getShardKeyForIndex(index);
        List<IndexItem<T>> list = getValueRangesForIndex(shardKey, keyspace, index, reversed, limit, subIndexLimit);

        if (list.size() < limit) {
            byte[] nextShardKey = getNextNearestShardKeyForIndex(index, reversed);

            if (nextShardKey != null) {
                List<IndexItem<T>> nextList = getValueRangesForIndex(nextShardKey, keyspace, index, reversed, limit,
                        subIndexLimit);

                int countToAdd = limit - list.size();

                if (countToAdd > nextList.size())
                    countToAdd = nextList.size();

                for (int i = 0; i < countToAdd; i++) {
                    list.add(nextList.get(i));
                }
            }
        }

        return list;
    }

    protected List<IndexItem<T>> getValueRangesForIndex(byte[] shardKey, Keyspace ks, T index, boolean reversed,
            int limit, int subIndexLimit) {

        SliceQuery<byte[], T, byte[]> query = HFactory.createSliceQuery(ks, BytesArraySerializer.get(),
                getColumnNameSerializer(), BytesArraySerializer.get());
        query.setColumnFamily(columnFamily);
        query.setKey(shardKey);
        query.setRange(index, null, reversed, limit);
        QueryResult<ColumnSlice<T, byte[]>> result = query.execute();
        ColumnSlice<T, byte[]> cs = result.get();

        List<IndexItem<T>> list = new ArrayList<IndexItem<T>>();

        if (cs == null)
            return list;

        for (HColumn<T, byte[]> column : cs.getColumns()) {
            IndexItem<T> item = new IndexItem<T>();
            item.setIndex(column.getName());
            list.add(item);

            if (column.getValue() == null || column.getValue().length == 0) {
                item.setValues(new ArrayList<T>());
            } else if (Arrays.equals(getEmptyValue(), column.getValue())) {
                byte[] subShardKey = getSubShardKeyForIndex(item.getIndex());
                item.setValues(getValuesForIndex(subShardKey, ks, subIndexLimit));
            } else {
                item.setValues(new ArrayList<T>());
                item.getValues().add(getDataForBytes(column.getValue()));
            }
        }

        return list;
    }

    public List<T> getValuesForIndex(T index, int limit) {
        byte[] shardKey = getShardKeyForIndex(index);
        byte[] raw = getRawDataForIndex(shardKey, keyspace, index);

        if (raw == null || raw.length == 0) {
            return new ArrayList<T>();
        } else if (Arrays.equals(getEmptyValue(), raw)) {
            byte[] subShardKey = getSubShardKeyForIndex(index);
            return getValuesForIndex(subShardKey, keyspace, limit);
        } else {
            ArrayList<T> list = new ArrayList<T>();
            list.add(getDataForBytes(raw));
            return list;
        }
    }

    protected List<T> getValuesForIndex(byte[] shardKey, Keyspace ks, int limit) {
        ArrayList<T> list = new ArrayList<T>();
        SliceQuery<byte[], T, byte[]> query = HFactory.createSliceQuery(ks, BytesArraySerializer.get(),
                getColumnNameSerializer(), BytesArraySerializer.get());
        query.setColumnFamily(columnFamily);
        query.setKey(shardKey);
        query.setRange(null, null, false, limit);
        QueryResult<ColumnSlice<T, byte[]>> result = query.execute();
        ColumnSlice<T, byte[]> cs = result.get();

        if (cs == null)
            return list;

        for (HColumn<T, byte[]> column : cs.getColumns()) {
            list.add(getDataForBytes(column.getValue()));
        }

        return list;
    }

    protected byte[] getRawDataForIndex(byte[] shardKey, Keyspace ks, T index) {
        ColumnQuery<byte[], T, byte[]> columnQuery = HFactory.createColumnQuery(ks, BytesArraySerializer.get(),
                getColumnNameSerializer(), BytesArraySerializer.get());
        columnQuery.setColumnFamily(columnFamily).setKey(shardKey).setName(index);
        QueryResult<HColumn<T, byte[]>> result = columnQuery.execute();

        if (result.get() == null)
            return null;
        else
            return result.get().getValue();
    }

    protected byte[] getShardKeyForIndex(T index) {

        if (shards.size() == 0) {
            return emptyIndexKey;
        } else {
            T shard = shards.ceilingKey(index);

            if (shard == null)
                return emptyIndexKey;
            else
                return ArrayUtils.addAll(baseIndexKey, ArrayUtils.addAll(Delim, getBytesForData(shard)));
        }
    }

    protected byte[] getNextNearestShardKeyForIndex(T index, boolean reversed) {

        if (shards.size() == 0) {
            return null;
        } else {
            T shard = shards.ceilingKey(index);

            if (reversed) {
                if (shard == null)
                    shard = shards.lastKey();
                else
                    shard = shards.lowerKey(shard);

                if (shard == null)
                    return null;
            } else {
                if (shard == null)
                    return null;
                else
                    shard = shards.higherKey(shard);

                if (shard == null)
                    return emptyIndexKey;
            }

            return ArrayUtils.addAll(baseIndexKey, ArrayUtils.addAll(Delim, getBytesForData(shard)));
        }
    }

    protected byte[] getSubShardKeyForIndex(T index) {
        return ArrayUtils.addAll(baseIndexKey, ArrayUtils.addAll(SubDelim, getBytesForData(index)));
    }

    public Cluster getCluster() {
        return cluster;
    }

    public Keyspace getKeyspace() {
        return keyspace;
    }

    public String getColumnFamily() {
        return columnFamily;
    }

    public byte[] getBaseIndexKey() {
        return baseIndexKey;
    }

    public abstract ComparatorType getComparatorType();

    public abstract Class<T> getIndexType();

    public abstract byte[] getEmptyValue();

    public abstract Serializer<T> getColumnNameSerializer();

    public abstract byte[] getBytesForData(T data);

    public abstract T getDataForBytes(byte[] data);

    public abstract byte[] getBytesForDataList(List<T> data);

    public abstract List<T> getDataListForBytes(byte[] data);

    public abstract T getShardColumnName();

    public abstract T getStatisticsColumnName();
}