org.apache.fluo.recipes.map.CollisionFreeMap.java Source code

Introduction

Here is the source code for org.apache.fluo.recipes.map.CollisionFreeMap.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
 * agreements. See the NOTICE file distributed with this work for additional information regarding
 * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License. You may obtain a
 * copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

package org.apache.fluo.recipes.map;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.Set;
import java.util.regex.Pattern;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterators;
import com.google.common.collect.Sets;
import com.google.common.hash.Hashing;
import org.apache.fluo.api.client.SnapshotBase;
import org.apache.fluo.api.client.TransactionBase;
import org.apache.fluo.api.config.FluoConfiguration;
import org.apache.fluo.api.config.ObserverConfiguration;
import org.apache.fluo.api.config.ScannerConfiguration;
import org.apache.fluo.api.config.SimpleConfiguration;
import org.apache.fluo.api.data.Bytes;
import org.apache.fluo.api.data.BytesBuilder;
import org.apache.fluo.api.data.Column;
import org.apache.fluo.api.data.RowColumn;
import org.apache.fluo.api.data.RowColumnValue;
import org.apache.fluo.api.data.Span;
import org.apache.fluo.api.iterator.ColumnIterator;
import org.apache.fluo.api.iterator.RowIterator;
import org.apache.fluo.recipes.common.Pirtos;
import org.apache.fluo.recipes.common.RowRange;
import org.apache.fluo.recipes.common.TransientRegistry;
import org.apache.fluo.recipes.impl.BucketUtil;
import org.apache.fluo.recipes.serialization.SimpleSerializer;

/**
 * See the project level documentation for information about this recipe.
 */
public class CollisionFreeMap<K, V> {

    private static final String UPDATE_RANGE_END = ":u:~";

    private static final String DATA_RANGE_END = ":d:~";

    private String mapId;

    private Class<K> keyType;
    private Class<V> valType;
    private SimpleSerializer serializer;
    private Combiner<K, V> combiner;
    UpdateObserver<K, V> updateObserver;
    private long bufferSize;

    static final Column UPDATE_COL = new Column("u", "v");
    static final Column NEXT_COL = new Column("u", "next");

    private int numBuckets = -1;

    @SuppressWarnings("unchecked")
    CollisionFreeMap(Options opts, SimpleSerializer serializer) throws Exception {

        this.mapId = opts.mapId;
        // TODO defer loading classes
        // TODO centralize class loading
        // TODO try to check type params
        this.numBuckets = opts.numBuckets;
        this.keyType = (Class<K>) getClass().getClassLoader().loadClass(opts.keyType);
        this.valType = (Class<V>) getClass().getClassLoader().loadClass(opts.valueType);
        this.combiner = (Combiner<K, V>) getClass().getClassLoader().loadClass(opts.combinerType).newInstance();
        this.serializer = serializer;
        if (opts.updateObserverType != null) {
            this.updateObserver = getClass().getClassLoader().loadClass(opts.updateObserverType)
                    .asSubclass(UpdateObserver.class).newInstance();
        } else {
            this.updateObserver = new NullUpdateObserver<>();
        }
        this.bufferSize = opts.getBufferSize();
    }

    private V deserVal(Bytes val) {
        return serializer.deserialize(val.toArray(), valType);
    }

    private Bytes getKeyFromUpdateRow(Bytes prefix, Bytes row) {
        return row.subSequence(prefix.length(), row.length() - 8);
    }

    void process(TransactionBase tx, Bytes ntfyRow, Column col) throws Exception {

        Bytes nextKey = tx.get(ntfyRow, NEXT_COL);

        ScannerConfiguration sc = new ScannerConfiguration();

        if (nextKey != null) {
            Bytes startRow = Bytes.newBuilder(ntfyRow.length() + nextKey.length()).append(ntfyRow).append(nextKey)
                    .toBytes();
            Span tmpSpan = Span.prefix(ntfyRow);
            Span nextSpan = new Span(new RowColumn(startRow, UPDATE_COL), false, tmpSpan.getEnd(),
                    tmpSpan.isEndInclusive());
            sc.setSpan(nextSpan);
        } else {
            sc.setSpan(Span.prefix(ntfyRow));
        }

        sc.setSpan(Span.prefix(ntfyRow));
        sc.fetchColumn(UPDATE_COL.getFamily(), UPDATE_COL.getQualifier());
        RowIterator iter = tx.get(sc);

        Map<Bytes, List<Bytes>> updates = new HashMap<>();

        long approxMemUsed = 0;

        Bytes partiallyReadKey = null;

        if (iter.hasNext()) {
            Bytes lastKey = null;
            while (iter.hasNext() && approxMemUsed < bufferSize) {
                Entry<Bytes, ColumnIterator> rowCol = iter.next();
                Bytes curRow = rowCol.getKey();

                tx.delete(curRow, UPDATE_COL);

                Bytes serializedKey = getKeyFromUpdateRow(ntfyRow, curRow);
                lastKey = serializedKey;

                List<Bytes> updateList = updates.get(serializedKey);
                if (updateList == null) {
                    updateList = new ArrayList<>();
                    updates.put(serializedKey, updateList);
                }

                Bytes val = rowCol.getValue().next().getValue();
                updateList.add(val);

                approxMemUsed += curRow.length();
                approxMemUsed += val.length();
            }

            if (iter.hasNext()) {
                Entry<Bytes, ColumnIterator> rowCol = iter.next();
                Bytes curRow = rowCol.getKey();

                // check if more updates for last key
                if (getKeyFromUpdateRow(ntfyRow, curRow).equals(lastKey)) {
                    // there are still more updates for this key
                    partiallyReadKey = lastKey;

                    // start next time at the current key
                    tx.set(ntfyRow, NEXT_COL, partiallyReadKey);
                } else {
                    // start next time at the next possible key
                    Bytes nextPossible = Bytes.newBuilder(lastKey.length() + 1).append(lastKey)
                            .append(new byte[] { 0 }).toBytes();
                    tx.set(ntfyRow, NEXT_COL, nextPossible);
                }

                // may not read all data because of mem limit, so notify self
                tx.setWeakNotification(ntfyRow, col);
            } else if (nextKey != null) {
                // clear nextKey
                tx.delete(ntfyRow, NEXT_COL);
            }
        } else if (nextKey != null) {
            tx.delete(ntfyRow, NEXT_COL);
        }

        byte[] dataPrefix = ntfyRow.toArray();
        // TODO this is awful... no sanity check... hard to read
        dataPrefix[Bytes.of(mapId).length() + 1] = 'd';

        BytesBuilder rowBuilder = Bytes.newBuilder();
        rowBuilder.append(dataPrefix);
        int rowPrefixLen = rowBuilder.getLength();

        Set<Bytes> keysToFetch = updates.keySet();
        if (partiallyReadKey != null) {
            final Bytes prk = partiallyReadKey;
            keysToFetch = Sets.filter(keysToFetch, b -> !b.equals(prk));
        }
        Map<Bytes, Map<Column, Bytes>> currentVals = getCurrentValues(tx, rowBuilder, keysToFetch);

        ArrayList<Update<K, V>> updatesToReport = new ArrayList<>(updates.size());

        for (Entry<Bytes, List<Bytes>> entry : updates.entrySet()) {
            rowBuilder.setLength(rowPrefixLen);
            Bytes currentValueRow = rowBuilder.append(entry.getKey()).toBytes();
            Bytes currVal = currentVals.getOrDefault(currentValueRow, Collections.emptyMap()).get(DATA_COLUMN);

            Iterator<V> ui = Iterators.transform(entry.getValue().iterator(), this::deserVal);

            K kd = serializer.deserialize(entry.getKey().toArray(), keyType);

            if (partiallyReadKey != null && partiallyReadKey.equals(entry.getKey())) {
                // not all updates were read for this key, so requeue the combined updates as an update
                Optional<V> nv = combiner.combine(kd, ui);
                if (nv.isPresent()) {
                    update(tx, Collections.singletonMap(kd, nv.get()));
                }
            } else {
                Optional<V> nv = combiner.combine(kd, concat(ui, currVal));
                Bytes newVal = nv.isPresent() ? Bytes.of(serializer.serialize(nv.get())) : null;
                if (newVal != null ^ currVal != null || (currVal != null && !currVal.equals(newVal))) {
                    if (newVal == null) {
                        tx.delete(currentValueRow, DATA_COLUMN);
                    } else {
                        tx.set(currentValueRow, DATA_COLUMN, newVal);
                    }

                    Optional<V> cvd = Optional.ofNullable(currVal).map(this::deserVal);
                    updatesToReport.add(new Update<>(kd, cvd, nv));
                }
            }
        }

        // TODO could clear these as converted to objects to avoid double memory usage
        updates.clear();
        currentVals.clear();

        if (updatesToReport.size() > 0) {
            updateObserver.updatingValues(tx, updatesToReport.iterator());
        }
    }

    private static final Column DATA_COLUMN = new Column("data", "current");

    private Map<Bytes, Map<Column, Bytes>> getCurrentValues(TransactionBase tx, BytesBuilder prefix,
            Set<Bytes> keySet) {

        Set<Bytes> rows = new HashSet<>();

        int prefixLen = prefix.getLength();
        for (Bytes key : keySet) {
            prefix.setLength(prefixLen);
            rows.add(prefix.append(key).toBytes());
        }

        try {
            return tx.get(rows, Collections.singleton(DATA_COLUMN));
        } catch (IllegalArgumentException e) {
            System.out.println(rows.size());
            throw e;
        }
    }

    private Iterator<V> concat(Iterator<V> updates, Bytes currentVal) {
        if (currentVal == null) {
            return updates;
        }

        return Iterators.concat(updates, Iterators.singletonIterator(deserVal(currentVal)));
    }

    /**
     * This method will retrieve the current value for key and any outstanding updates and combine
     * them using the configured {@link Combiner}. The result from the combiner is returned.
     */
    public V get(SnapshotBase tx, K key) {

        byte[] k = serializer.serialize(key);

        int hash = Hashing.murmur3_32().hashBytes(k).asInt();
        String bucketId = BucketUtil.genBucketId(Math.abs(hash % numBuckets), numBuckets);

        BytesBuilder rowBuilder = Bytes.newBuilder();
        rowBuilder.append(mapId).append(":u:").append(bucketId).append(":").append(k);

        ScannerConfiguration sc = new ScannerConfiguration();
        sc.setSpan(Span.prefix(rowBuilder.toBytes()));

        RowIterator iter = tx.get(sc);

        Iterator<V> ui;

        if (iter.hasNext()) {
            ui = Iterators.transform(iter, e -> deserVal(e.getValue().next().getValue()));
        } else {
            ui = Collections.<V>emptyList().iterator();
        }

        rowBuilder.setLength(mapId.length());
        rowBuilder.append(":d:").append(bucketId).append(":").append(k);

        Bytes dataRow = rowBuilder.toBytes();

        Bytes cv = tx.get(dataRow, DATA_COLUMN);

        if (!ui.hasNext()) {
            if (cv == null) {
                return null;
            } else {
                return deserVal(cv);
            }
        }

        return combiner.combine(key, concat(ui, cv)).orElse(null);
    }

    String getId() {
        return mapId;
    }

    /**
     * Queues updates for a collision free map. These updates will be made by an Observer executing
     * another transaction. This method will not collide with other transaction queuing updates for
     * the same keys.
     *
     * @param tx This transaction will be used to make the updates.
     * @param updates The keys in the map should correspond to keys in the collision free map being
     *        updated. The values in the map will be queued for updating.
     */
    public void update(TransactionBase tx, Map<K, V> updates) {
        Preconditions.checkState(numBuckets > 0, "Not initialized");

        Set<String> buckets = new HashSet<>();

        BytesBuilder rowBuilder = Bytes.newBuilder();
        rowBuilder.append(mapId).append(":u:");
        int prefixLength = rowBuilder.getLength();

        byte[] startTs = encSeq(tx.getStartTimestamp());

        for (Entry<K, V> entry : updates.entrySet()) {
            byte[] k = serializer.serialize(entry.getKey());
            int hash = Hashing.murmur3_32().hashBytes(k).asInt();
            String bucketId = BucketUtil.genBucketId(Math.abs(hash % numBuckets), numBuckets);

            // reset to the common row prefix
            rowBuilder.setLength(prefixLength);

            Bytes row = rowBuilder.append(bucketId).append(":").append(k).append(startTs).toBytes();
            Bytes val = Bytes.of(serializer.serialize(entry.getValue()));

            // TODO set if not exists would be comforting here.... but
            // collisions on bucketId+key+uuid should never occur
            tx.set(row, UPDATE_COL, val);

            buckets.add(bucketId);
        }

        for (String bucketId : buckets) {
            rowBuilder.setLength(prefixLength);
            rowBuilder.append(bucketId).append(":");

            Bytes row = rowBuilder.toBytes();

            tx.setWeakNotification(row, new Column("fluoRecipes", "cfm:" + mapId));
        }
    }

    public static <K2, V2> CollisionFreeMap<K2, V2> getInstance(String mapId, SimpleConfiguration appConf) {
        Options opts = new Options(mapId, appConf);
        try {
            return new CollisionFreeMap<>(opts, SimpleSerializer.getInstance(appConf));
        } catch (Exception e) {
            // TODO
            throw new RuntimeException(e);
        }
    }

    /**
     * A @link {@link CollisionFreeMap} stores data in its own data format in the Fluo table. When
     * initializing a Fluo table with something like Map Reduce or Spark, data will need to be written
     * in this format. Thats the purpose of this method, it provide a simple class that can do this
     * conversion.
     *
     */
    public static <K2, V2> Initializer<K2, V2> getInitializer(String mapId, int numBuckets,
            SimpleSerializer serializer) {
        return new Initializer<>(mapId, numBuckets, serializer);
    }

    /**
     * @see CollisionFreeMap#getInitializer(String, int, SimpleSerializer)
     */
    public static class Initializer<K2, V2> implements Serializable {

        private static final long serialVersionUID = 1L;

        private String mapId;

        private SimpleSerializer serializer;

        private int numBuckets = -1;

        private Initializer(String mapId, int numBuckets, SimpleSerializer serializer) {
            this.mapId = mapId;
            this.numBuckets = numBuckets;
            this.serializer = serializer;
        }

        public RowColumnValue convert(K2 key, V2 val) {
            byte[] k = serializer.serialize(key);
            int hash = Hashing.murmur3_32().hashBytes(k).asInt();
            String bucketId = BucketUtil.genBucketId(Math.abs(hash % numBuckets), numBuckets);

            BytesBuilder bb = Bytes.newBuilder();
            Bytes row = bb.append(mapId).append(":d:").append(bucketId).append(":").append(k).toBytes();
            byte[] v = serializer.serialize(val);

            return new RowColumnValue(row, DATA_COLUMN, Bytes.of(v));
        }
    }

    public static class Options {

        static final long DEFAULT_BUFFER_SIZE = 1 << 22;
        static final int DEFAULT_BUCKETS_PER_TABLET = 10;

        int numBuckets;
        Integer bucketsPerTablet = null;

        Long bufferSize;

        String keyType;
        String valueType;
        String combinerType;
        String updateObserverType;
        String mapId;

        private static final String PREFIX = "recipes.cfm.";

        Options(String mapId, SimpleConfiguration appConfig) {
            this.mapId = mapId;

            this.numBuckets = appConfig.getInt(PREFIX + mapId + ".buckets");
            this.combinerType = appConfig.getString(PREFIX + mapId + ".combiner");
            this.keyType = appConfig.getString(PREFIX + mapId + ".key");
            this.valueType = appConfig.getString(PREFIX + mapId + ".val");
            this.updateObserverType = appConfig.getString(PREFIX + mapId + ".updateObserver", null);
            this.bufferSize = appConfig.getLong(PREFIX + mapId + ".bufferSize", DEFAULT_BUFFER_SIZE);
            this.bucketsPerTablet = appConfig.getInt(PREFIX + mapId + ".bucketsPerTablet",
                    DEFAULT_BUCKETS_PER_TABLET);
        }

        public Options(String mapId, String combinerType, String keyType, String valType, int buckets) {
            Preconditions.checkArgument(buckets > 0);
            Preconditions.checkArgument(!mapId.contains(":"), "Map id cannot contain ':'");

            this.mapId = mapId;
            this.numBuckets = buckets;
            this.combinerType = combinerType;
            this.updateObserverType = null;
            this.keyType = keyType;
            this.valueType = valType;
        }

        public Options(String mapId, String combinerType, String updateObserverType, String keyType,
                String valueType, int buckets) {
            Preconditions.checkArgument(buckets > 0);
            Preconditions.checkArgument(!mapId.contains(":"), "Map id cannot contain ':'");

            this.mapId = mapId;
            this.numBuckets = buckets;
            this.combinerType = combinerType;
            this.updateObserverType = updateObserverType;
            this.keyType = keyType;
            this.valueType = valueType;
        }

        /**
         * Sets a limit on the amount of serialized updates to read into memory. Additional memory will
         * be used to actually deserialize and process the updates. This limit does not account for
         * object overhead in java, which can be significant.
         *
         * <p>
         * The way memory read is calculated is by summing the length of serialized key and value byte
         * arrays. Once this sum exceeds the configured memory limit, no more update key values are
         * processed in the current transaction. When not everything is processed, the observer
         * processing updates will notify itself causing another transaction to continue processing
         * later
         */
        public Options setBufferSize(long bufferSize) {
            Preconditions.checkArgument(bufferSize > 0, "Buffer size must be positive");
            this.bufferSize = bufferSize;
            return this;
        }

        long getBufferSize() {
            if (bufferSize == null) {
                return DEFAULT_BUFFER_SIZE;
            }

            return bufferSize;
        }

        /**
         * Sets the number of buckets per tablet to generate. This affects how many split points will be
         * generated when optimizing the Accumulo table.
         *
         */
        public Options setBucketsPerTablet(int bucketsPerTablet) {
            Preconditions.checkArgument(bucketsPerTablet > 0, "bucketsPerTablet is <= 0 : " + bucketsPerTablet);
            this.bucketsPerTablet = bucketsPerTablet;
            return this;
        }

        int getBucketsPerTablet() {
            if (bucketsPerTablet == null) {
                return DEFAULT_BUCKETS_PER_TABLET;
            }

            return bucketsPerTablet;
        }

        public <K, V> Options(String mapId, Class<? extends Combiner<K, V>> combiner, Class<K> keyType,
                Class<V> valueType, int buckets) {
            this(mapId, combiner.getName(), keyType.getName(), valueType.getName(), buckets);
        }

        public <K, V> Options(String mapId, Class<? extends Combiner<K, V>> combiner,
                Class<? extends UpdateObserver<K, V>> updateObserver, Class<K> keyType, Class<V> valueType,
                int buckets) {
            this(mapId, combiner.getName(), updateObserver.getName(), keyType.getName(), valueType.getName(),
                    buckets);
        }

        void save(SimpleConfiguration appConfig) {
            appConfig.setProperty(PREFIX + mapId + ".buckets", numBuckets + "");
            appConfig.setProperty(PREFIX + mapId + ".combiner", combinerType + "");
            appConfig.setProperty(PREFIX + mapId + ".key", keyType);
            appConfig.setProperty(PREFIX + mapId + ".val", valueType);
            if (updateObserverType != null) {
                appConfig.setProperty(PREFIX + mapId + ".updateObserver", updateObserverType + "");
            }
            if (bufferSize != null) {
                appConfig.setProperty(PREFIX + mapId + ".bufferSize", bufferSize);
            }
            if (bucketsPerTablet != null) {
                appConfig.setProperty(PREFIX + mapId + ".bucketsPerTablet", bucketsPerTablet);
            }
        }
    }

    /**
     * This method configures a collision free map for use. It must be called before initializing
     * Fluo.
     */
    public static void configure(FluoConfiguration fluoConfig, Options opts) {
        opts.save(fluoConfig.getAppConfiguration());
        fluoConfig.addObserver(new ObserverConfiguration(CollisionFreeMapObserver.class.getName())
                .setParameters(ImmutableMap.of("mapId", opts.mapId)));

        Bytes dataRangeEnd = Bytes.of(opts.mapId + DATA_RANGE_END);
        Bytes updateRangeEnd = Bytes.of(opts.mapId + UPDATE_RANGE_END);

        new TransientRegistry(fluoConfig.getAppConfiguration()).addTransientRange("cfm." + opts.mapId,
                new RowRange(dataRangeEnd, updateRangeEnd));
    }

    /**
     * Return suggested Fluo table optimizations for all previously configured collision free maps.
     *
     * @param appConfig Must pass in the application configuration obtained from
     *        {@code FluoClient.getAppConfiguration()} or
     *        {@code FluoConfiguration.getAppConfiguration()}
     */
    public static Pirtos getTableOptimizations(SimpleConfiguration appConfig) {
        HashSet<String> mapIds = new HashSet<>();
        appConfig.getKeys(Options.PREFIX.substring(0, Options.PREFIX.length() - 1))
                .forEachRemaining(k -> mapIds.add(k.substring(Options.PREFIX.length()).split("\\.", 2)[0]));

        Pirtos pirtos = new Pirtos();
        mapIds.forEach(mid -> pirtos.merge(getTableOptimizations(mid, appConfig)));

        return pirtos;
    }

    /**
     * Return suggested Fluo table optimizations for the specified collisiong free map.
     *
     * @param appConfig Must pass in the application configuration obtained from
     *        {@code FluoClient.getAppConfiguration()} or
     *        {@code FluoConfiguration.getAppConfiguration()}
     */
    public static Pirtos getTableOptimizations(String mapId, SimpleConfiguration appConfig) {
        Options opts = new Options(mapId, appConfig);

        BytesBuilder rowBuilder = Bytes.newBuilder();
        rowBuilder.append(mapId);

        List<Bytes> dataSplits = new ArrayList<>();
        for (int i = opts.getBucketsPerTablet(); i < opts.numBuckets; i += opts.getBucketsPerTablet()) {
            String bucketId = BucketUtil.genBucketId(i, opts.numBuckets);
            rowBuilder.setLength(mapId.length());
            dataSplits.add(rowBuilder.append(":d:").append(bucketId).toBytes());
        }
        Collections.sort(dataSplits);

        List<Bytes> updateSplits = new ArrayList<>();
        for (int i = opts.getBucketsPerTablet(); i < opts.numBuckets; i += opts.getBucketsPerTablet()) {
            String bucketId = BucketUtil.genBucketId(i, opts.numBuckets);
            rowBuilder.setLength(mapId.length());
            updateSplits.add(rowBuilder.append(":u:").append(bucketId).toBytes());
        }
        Collections.sort(updateSplits);

        Bytes dataRangeEnd = Bytes.of(opts.mapId + DATA_RANGE_END);
        Bytes updateRangeEnd = Bytes.of(opts.mapId + UPDATE_RANGE_END);

        List<Bytes> splits = new ArrayList<>();
        splits.add(dataRangeEnd);
        splits.add(updateRangeEnd);
        splits.addAll(dataSplits);
        splits.addAll(updateSplits);

        Pirtos pirtos = new Pirtos();
        pirtos.setSplits(splits);

        pirtos.setTabletGroupingRegex(Pattern.quote(mapId + ":") + "[du]:");

        return pirtos;
    }

    private static byte[] encSeq(long l) {
        byte[] ret = new byte[8];
        ret[0] = (byte) (l >>> 56);
        ret[1] = (byte) (l >>> 48);
        ret[2] = (byte) (l >>> 40);
        ret[3] = (byte) (l >>> 32);
        ret[4] = (byte) (l >>> 24);
        ret[5] = (byte) (l >>> 16);
        ret[6] = (byte) (l >>> 8);
        ret[7] = (byte) (l >>> 0);
        return ret;
    }
}