com.addthis.hydra.data.util.KeyTopper.java Source code

Java tutorial

Introduction

Here is the source code for com.addthis.hydra.data.util.KeyTopper.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.addthis.hydra.data.util;

import javax.annotation.Nonnull;

import java.io.UnsupportedEncodingException;

import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;

import com.addthis.basis.util.Varint;

import com.addthis.codec.annotations.FieldConfig;
import com.addthis.codec.codables.BytesCodable;
import com.addthis.codec.codables.Codable;

import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;

import io.netty.buffer.ByteBuf;
import io.netty.buffer.PooledByteBufAllocator;
import io.netty.buffer.Unpooled;

/**
 * Class that helps maintain a top N list for any String Map.
 */
public final class KeyTopper implements Codable, BytesCodable {

    private static final byte[] EMPTY = new byte[0];

    public KeyTopper() {
    }

    @FieldConfig(codable = true, required = true)
    private HashMap<String, Long> map;
    /**
     * Minimum value in the data structure. Not serialized
     * to byte representation. Regenerated as needed.
     */
    @FieldConfig(codable = true)
    private long minVal;
    /**
     * Minimum key in the data structure. Not serialized
     * to byte representation. Regenerated as needed.
     */
    @FieldConfig(codable = true)
    private String minKey;
    @FieldConfig(codable = true)
    private boolean lossy;

    /**
     * Error estimates are only supported in the BytesCodable
     * serialization format. They are not supported
     * in the older serialization format to preserve
     * serialization compatibility.
     */
    @FieldConfig(codable = false)
    private HashMap<String, Long> errors;

    @Override
    public String toString() {
        return "topper(min:" + minKey + "=" + minVal + "->" + map.toString() + ",lossy:" + lossy + ")";
    }

    public KeyTopper init() {
        map = new HashMap<>();
        return this;
    }

    public KeyTopper setLossy(boolean isLossy) {
        lossy = isLossy;
        return this;
    }

    public KeyTopper enableErrors(boolean enable) {
        if (enable) {
            errors = new HashMap<>();
        } else {
            errors = null;
        }
        return this;
    }

    public boolean hasErrors() {
        return errors != null;
    }

    public boolean isLossy() {
        return lossy;
    }

    public int size() {
        return map.size();
    }

    public Long get(@Nonnull String key) {
        return map.get(key);
    }

    /**
     * Retrieve an upper bound on the error
     * associated with an estimate or null
     * if errors have not been enabled.
     *
     * @param key input key
     * @return error estimate or null
     */
    public Long getError(@Nonnull String key) {
        if (errors == null) {
            return null;
        }
        Long error = errors.get(key);
        if (error != null) {
            return error;
        } else {
            return 0L;
        }
    }

    private static final Comparator<Map.Entry<String, Long>> ENTRIES_COMPARATOR = (arg0, arg1) -> Long
            .compare(arg1.getValue(), arg0.getValue());

    /**
     * returns the list sorted by greatest to least count.
     */
    @SuppressWarnings("unchecked")
    public Map.Entry<String, Long>[] getSortedEntries() {
        Map.Entry<String, Long>[] e = new Map.Entry[map.size()];
        e = map.entrySet().toArray(e);
        Arrays.sort(e, ENTRIES_COMPARATOR);
        return e;
    }

    /**
     * Recreate the minimum key and minimum value if the map
     * contains one or more elements and current minimum key is null
     * or the {@code force} parameter is true. Use {@code force}
     * when the minimum key has been evicted from the data structure
     * or the count associated with the minimum key has been updated.
     *
     * Postcondition: Either the top N is empty or the minimum key
     * is a non-null value.
     *
     * @param force if true then always recreate minimum key and value
     */
    private void recreateMinimum(boolean force) {
        if (map.size() > 0 && (minKey == null || force)) {
            minVal = Long.MAX_VALUE;
            for (Map.Entry<String, Long> e : this.map.entrySet()) {
                if (e.getValue() < minVal) {
                    minKey = e.getKey();
                    minVal = e.getValue();
                }
            }
        }
        assert ((minKey != null) ^ (map.size() == 0));
    }

    /**
     * Adds 'ID' the top N if: 1) there are more empty slots or 2) count >
     * smallest top count in the list
     *
     * @param id
     * @return element dropped from top or null if accepted into top with no
     *         drops
     */
    public String increment(@Nonnull String id, int maxsize) {
        return increment(id, 1, maxsize);
    }

    /**
     * Adds 'ID' the top N if: 1) there are more empty slots or 2) count >
     * smallest top count in the list
     * This one increments weight
     *
     * @param id
     * @param weight
     * @return element dropped from top or null if accepted into top with no
     *         drops
     */
    public String increment(@Nonnull String id, int weight, int maxsize) {
        Long count = map.get(id);
        if (count == null) {
            if (lossy && map.size() >= maxsize) {
                recreateMinimum(false);
                count = minVal;
            } else {
                count = 0L;
            }
        }
        return update(id, count + weight, maxsize);
    }

    /**
     * Increments the count for 'ID' in the top map if 'ID' already exists in
     * the map. This method is used if you want to increment a lossy top without
     * removing an element. Used when there is a two stage update for new data
     * elements
     *
     * @param id the id to increment if it already exists in the map
     * @return whether the element was in the map
     */
    public boolean incrementExisting(@Nonnull String id) {
        Long value = map.get(id);
        if (value != null) {
            map.put(id, value + 1L);
            if (id.equals(minKey)) {
                recreateMinimum(true);
            }
            return true;
        }
        return false;
    }

    /**
     * Adds 'id' the top N if: (1) there are more empty slots or
     * (2) value > minimum value in the top N.
     *
     * @param id       key to insert or update
     * @param value    count to associate with the key
     * @return element dropped from top or null if accepted into top with no
     *         drops. returns the offered key if it was rejected for update
     *         or inclusion in the top.
     */
    public String update(@Nonnull String id, long value, int maxsize) {
        Preconditions.checkArgument(value >= 0, "Argument was %s but expected nonnegative", value);
        Preconditions.checkArgument(maxsize > 0, "Argument was %s but expected positive integer", maxsize);
        /** There is guaranteed capacity to update or insert value */
        if (map.size() < maxsize) {
            map.put(id, value);
            /** new minimum key has been identified */
            if (value < minVal) {
                minKey = id;
                minVal = value;
                /** recalculate min if the minimum key was updated */
            } else if (id.equals(minKey)) {
                recreateMinimum(true);
            }
            return null;
        }
        /** compute minimum key and value if they are missing */
        recreateMinimum(false);
        /** insert or update key. Evict if necessary */
        if (value >= minVal) {
            String result = null;
            /** only remove if topN is full and we're not updating an existing entry */
            boolean remove = !map.containsKey(id) && (minKey != null);
            if (remove) {
                map.remove(minKey);
                if (hasErrors()) {
                    errors.remove(minKey);
                    errors.put(id, minVal);
                }
                result = minKey;
            }
            /** update or add entry */
            map.put(id, value);
            /** recalculate min *only* if the min entry was removed or updated */
            if (remove || id.equals(minKey)) {
                recreateMinimum(true);
            }
            return result;
        }
        /** not eligible for top */
        else {
            return id;
        }
    }

    /**
     * Encode the data structure into a serialized representation.
     * Encode the number of elements followed by each (key, value)
     * pair. If the error estimation is used then encode the special
     * byte value 0 (since we will never encode 0 as the size
     * of a non-empty map) at the head of the byte array.
     * @param version
     * @return
     */
    @Override
    public byte[] bytesEncode(long version) {
        if (map.size() == 0) {
            return EMPTY;
        }
        byte[] retBytes = null;
        ByteBuf byteBuf = PooledByteBufAllocator.DEFAULT.buffer();
        try {
            if (hasErrors()) {
                byteBuf.writeByte(0);
            }
            Varint.writeUnsignedVarInt(map.size(), byteBuf);
            for (Map.Entry<String, Long> mapEntry : map.entrySet()) {
                String key = mapEntry.getKey();
                if (key == null) {
                    throw new NullPointerException("KeyTopper decoded null key");
                }
                byte[] keyBytes = key.getBytes("UTF-8");
                Varint.writeUnsignedVarInt(keyBytes.length, byteBuf);
                byteBuf.writeBytes(keyBytes);
                Varint.writeUnsignedVarLong(mapEntry.getValue(), byteBuf);
                if (hasErrors()) {
                    Long error = errors.get(key);
                    if (error != null) {
                        Varint.writeUnsignedVarLong(error, byteBuf);
                    } else {
                        Varint.writeUnsignedVarLong(0, byteBuf);
                    }
                }
            }
            retBytes = new byte[byteBuf.readableBytes()];
            byteBuf.readBytes(retBytes);
        } catch (UnsupportedEncodingException e) {
            throw Throwables.propagate(e);
        } finally {
            byteBuf.release();
        }
        return retBytes;
    }

    @Override
    public void bytesDecode(byte[] b, long version) {
        map = new HashMap<>();
        errors = null;
        if (b.length == 0) {
            return;
        }
        ByteBuf byteBuf = Unpooled.wrappedBuffer(b);
        try {
            byte marker = byteBuf.getByte(byteBuf.readerIndex());
            if (marker == 0) {
                errors = new HashMap<>();
                // Consume the sentinel byte value
                byteBuf.readByte();
            }
            int mapSize = Varint.readUnsignedVarInt(byteBuf);
            try {
                if (mapSize > 0) {
                    for (int i = 0; i < mapSize; i++) {
                        int keyLength = Varint.readUnsignedVarInt(byteBuf);
                        byte[] keybytes = new byte[keyLength];
                        byteBuf.readBytes(keybytes);
                        String k = new String(keybytes, "UTF-8");
                        long value = Varint.readUnsignedVarLong(byteBuf);
                        map.put(k, value);
                        if (hasErrors()) {
                            long error = Varint.readUnsignedVarLong(byteBuf);
                            if (error != 0) {
                                errors.put(k, error);
                            }
                        }
                    }
                }
            } catch (Exception e) {
                throw Throwables.propagate(e);
            }
        } finally {
            byteBuf.release();
        }
    }

    public long getMinVal() {
        return minVal;
    }

    public String getMinKey() {
        return minKey;
    }
}