org.apache.hadoop.yarn.server.timeline.LeveldbTimelineStore.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.yarn.server.timeline.LeveldbTimelineStore.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.yarn.server.timeline;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import org.apache.commons.collections.map.LRUMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.api.records.timeline.*;
import org.apache.hadoop.yarn.api.records.timeline.TimelineEvents.EventsOfOneEntity;
import org.apache.hadoop.yarn.api.records.timeline.TimelinePutResponse.TimelinePutError;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.VersionProto;
import org.apache.hadoop.yarn.server.records.Version;
import org.apache.hadoop.yarn.server.records.impl.pb.VersionPBImpl;
import org.apache.hadoop.yarn.server.timeline.TimelineDataManager.CheckAcl;
import org.apache.hadoop.yarn.server.timeline.util.LeveldbUtils.KeyBuilder;
import org.apache.hadoop.yarn.server.timeline.util.LeveldbUtils.KeyParser;
import org.apache.hadoop.yarn.server.utils.LeveldbIterator;
import org.fusesource.leveldbjni.JniDBFactory;
import org.iq80.leveldb.*;

import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.*;
import java.util.Map.Entry;
import java.util.concurrent.locks.ReentrantLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

import static org.apache.hadoop.yarn.server.timeline.GenericObjectMapper.readReverseOrderedLong;
import static org.apache.hadoop.yarn.server.timeline.GenericObjectMapper.writeReverseOrderedLong;
import static org.apache.hadoop.yarn.server.timeline.TimelineDataManager.DEFAULT_DOMAIN_ID;
import static org.apache.hadoop.yarn.server.timeline.util.LeveldbUtils.prefixMatches;
import static org.fusesource.leveldbjni.JniDBFactory.bytes;

/**
 * <p>An implementation of an application timeline store backed by leveldb.</p>
 *
 * <p>There are three sections of the db, the start time section,
 * the entity section, and the indexed entity section.</p>
 *
 * <p>The start time section is used to retrieve the unique start time for
 * a given entity. Its values each contain a start time while its keys are of
 * the form:</p>
 * <pre>
 *   START_TIME_LOOKUP_PREFIX + entity type + entity id</pre>
 *
 * <p>The entity section is ordered by entity type, then entity start time
 * descending, then entity ID. There are four sub-sections of the entity
 * section: events, primary filters, related entities,
 * and other info. The event entries have event info serialized into their
 * values. The other info entries have values corresponding to the values of
 * the other info name/value map for the entry (note the names are contained
 * in the key). All other entries have empty values. The key structure is as
 * follows:</p>
 * <pre>
 *   ENTITY_ENTRY_PREFIX + entity type + revstarttime + entity id
 *
 *   ENTITY_ENTRY_PREFIX + entity type + revstarttime + entity id +
 *     EVENTS_COLUMN + reveventtimestamp + eventtype
 *
 *   ENTITY_ENTRY_PREFIX + entity type + revstarttime + entity id +
 *     PRIMARY_FILTERS_COLUMN + name + value
 *
 *   ENTITY_ENTRY_PREFIX + entity type + revstarttime + entity id +
 *     OTHER_INFO_COLUMN + name
 *
 *   ENTITY_ENTRY_PREFIX + entity type + revstarttime + entity id +
 *     RELATED_ENTITIES_COLUMN + relatedentity type + relatedentity id
 *
 *   ENTITY_ENTRY_PREFIX + entity type + revstarttime + entity id +
 *     DOMAIN_ID_COLUMN
 *
 *   ENTITY_ENTRY_PREFIX + entity type + revstarttime + entity id +
 *     INVISIBLE_REVERSE_RELATED_ENTITIES_COLUMN + relatedentity type +
 *     relatedentity id</pre>
 *
 * <p>The indexed entity section contains a primary filter name and primary
 * filter value as the prefix. Within a given name/value, entire entity
 * entries are stored in the same format as described in the entity section
 * above (below, "key" represents any one of the possible entity entry keys
 * described above).</p>
 * <pre>
 *   INDEXED_ENTRY_PREFIX + primaryfilter name + primaryfilter value +
 *     key</pre>
 */
@InterfaceAudience.Private
@InterfaceStability.Unstable
public class LeveldbTimelineStore extends AbstractService implements TimelineStore {
    private static final Log LOG = LogFactory.getLog(LeveldbTimelineStore.class);

    @Private
    @VisibleForTesting
    static final String FILENAME = "leveldb-timeline-store.ldb";

    private static final byte[] START_TIME_LOOKUP_PREFIX = "k".getBytes(Charset.forName("UTF-8"));
    private static final byte[] ENTITY_ENTRY_PREFIX = "e".getBytes(Charset.forName("UTF-8"));
    private static final byte[] INDEXED_ENTRY_PREFIX = "i".getBytes(Charset.forName("UTF-8"));

    private static final byte[] EVENTS_COLUMN = "e".getBytes(Charset.forName("UTF-8"));
    private static final byte[] PRIMARY_FILTERS_COLUMN = "f".getBytes(Charset.forName("UTF-8"));
    private static final byte[] OTHER_INFO_COLUMN = "i".getBytes(Charset.forName("UTF-8"));
    private static final byte[] RELATED_ENTITIES_COLUMN = "r".getBytes(Charset.forName("UTF-8"));
    private static final byte[] INVISIBLE_REVERSE_RELATED_ENTITIES_COLUMN = "z".getBytes(Charset.forName("UTF-8"));
    private static final byte[] DOMAIN_ID_COLUMN = "d".getBytes(Charset.forName("UTF-8"));

    private static final byte[] DOMAIN_ENTRY_PREFIX = "d".getBytes(Charset.forName("UTF-8"));
    private static final byte[] OWNER_LOOKUP_PREFIX = "o".getBytes(Charset.forName("UTF-8"));
    private static final byte[] DESCRIPTION_COLUMN = "d".getBytes(Charset.forName("UTF-8"));
    private static final byte[] OWNER_COLUMN = "o".getBytes(Charset.forName("UTF-8"));
    private static final byte[] READER_COLUMN = "r".getBytes(Charset.forName("UTF-8"));
    private static final byte[] WRITER_COLUMN = "w".getBytes(Charset.forName("UTF-8"));
    private static final byte[] TIMESTAMP_COLUMN = "t".getBytes(Charset.forName("UTF-8"));

    private static final byte[] EMPTY_BYTES = new byte[0];

    private static final String TIMELINE_STORE_VERSION_KEY = "timeline-store-version";

    private static final Version CURRENT_VERSION_INFO = Version.newInstance(1, 0);

    @Private
    @VisibleForTesting
    static final FsPermission LEVELDB_DIR_UMASK = FsPermission.createImmutable((short) 0700);

    private Map<EntityIdentifier, StartAndInsertTime> startTimeWriteCache;
    private Map<EntityIdentifier, Long> startTimeReadCache;

    /**
     * Per-entity locks are obtained when writing.
     */
    private final LockMap<EntityIdentifier> writeLocks = new LockMap<EntityIdentifier>();

    private final ReentrantReadWriteLock deleteLock = new ReentrantReadWriteLock();

    private DB db;

    private Thread deletionThread;

    public LeveldbTimelineStore() {
        super(LeveldbTimelineStore.class.getName());
    }

    @Override
    @SuppressWarnings("unchecked")
    protected void serviceInit(Configuration conf) throws Exception {
        Preconditions.checkArgument(
                conf.getLong(YarnConfiguration.TIMELINE_SERVICE_TTL_MS,
                        YarnConfiguration.DEFAULT_TIMELINE_SERVICE_TTL_MS) > 0,
                "%s property value should be greater than zero", YarnConfiguration.TIMELINE_SERVICE_TTL_MS);
        Preconditions.checkArgument(
                conf.getLong(YarnConfiguration.TIMELINE_SERVICE_LEVELDB_TTL_INTERVAL_MS,
                        YarnConfiguration.DEFAULT_TIMELINE_SERVICE_LEVELDB_TTL_INTERVAL_MS) > 0,
                "%s property value should be greater than zero",
                YarnConfiguration.TIMELINE_SERVICE_LEVELDB_TTL_INTERVAL_MS);
        Preconditions.checkArgument(
                conf.getLong(YarnConfiguration.TIMELINE_SERVICE_LEVELDB_READ_CACHE_SIZE,
                        YarnConfiguration.DEFAULT_TIMELINE_SERVICE_LEVELDB_READ_CACHE_SIZE) >= 0,
                "%s property value should be greater than or equal to zero",
                YarnConfiguration.TIMELINE_SERVICE_LEVELDB_READ_CACHE_SIZE);
        Preconditions.checkArgument(
                conf.getLong(YarnConfiguration.TIMELINE_SERVICE_LEVELDB_START_TIME_READ_CACHE_SIZE,
                        YarnConfiguration.DEFAULT_TIMELINE_SERVICE_LEVELDB_START_TIME_READ_CACHE_SIZE) > 0,
                " %s property value should be greater than zero",
                YarnConfiguration.TIMELINE_SERVICE_LEVELDB_START_TIME_READ_CACHE_SIZE);
        Preconditions.checkArgument(
                conf.getLong(YarnConfiguration.TIMELINE_SERVICE_LEVELDB_START_TIME_WRITE_CACHE_SIZE,
                        YarnConfiguration.DEFAULT_TIMELINE_SERVICE_LEVELDB_START_TIME_WRITE_CACHE_SIZE) > 0,
                "%s property value should be greater than zero",
                YarnConfiguration.TIMELINE_SERVICE_LEVELDB_START_TIME_WRITE_CACHE_SIZE);

        Options options = new Options();
        options.createIfMissing(true);
        options.cacheSize(conf.getLong(YarnConfiguration.TIMELINE_SERVICE_LEVELDB_READ_CACHE_SIZE,
                YarnConfiguration.DEFAULT_TIMELINE_SERVICE_LEVELDB_READ_CACHE_SIZE));
        JniDBFactory factory = new JniDBFactory();
        Path dbPath = new Path(conf.get(YarnConfiguration.TIMELINE_SERVICE_LEVELDB_PATH), FILENAME);
        FileSystem localFS = null;
        try {
            localFS = FileSystem.getLocal(conf);
            if (!localFS.exists(dbPath)) {
                if (!localFS.mkdirs(dbPath)) {
                    throw new IOException("Couldn't create directory for leveldb " + "timeline store " + dbPath);
                }
                localFS.setPermission(dbPath, LEVELDB_DIR_UMASK);
            }
        } finally {
            IOUtils.cleanup(LOG, localFS);
        }
        LOG.info("Using leveldb path " + dbPath);
        db = factory.open(new File(dbPath.toString()), options);
        checkVersion();
        startTimeWriteCache = Collections.synchronizedMap(new LRUMap(getStartTimeWriteCacheSize(conf)));
        startTimeReadCache = Collections.synchronizedMap(new LRUMap(getStartTimeReadCacheSize(conf)));

        if (conf.getBoolean(YarnConfiguration.TIMELINE_SERVICE_TTL_ENABLE, true)) {
            deletionThread = new EntityDeletionThread(conf);
            deletionThread.start();
        }

        super.serviceInit(conf);
    }

    @Override
    protected void serviceStop() throws Exception {
        if (deletionThread != null) {
            deletionThread.interrupt();
            LOG.info("Waiting for deletion thread to complete its current action");
            try {
                deletionThread.join();
            } catch (InterruptedException e) {
                LOG.warn("Interrupted while waiting for deletion thread to complete," + " closing db now", e);
            }
        }
        IOUtils.cleanup(LOG, db);
        super.serviceStop();
    }

    private static class StartAndInsertTime {
        final long startTime;
        final long insertTime;

        public StartAndInsertTime(long startTime, long insertTime) {
            this.startTime = startTime;
            this.insertTime = insertTime;
        }
    }

    private class EntityDeletionThread extends Thread {
        private final long ttl;
        private final long ttlInterval;

        public EntityDeletionThread(Configuration conf) {
            ttl = conf.getLong(YarnConfiguration.TIMELINE_SERVICE_TTL_MS,
                    YarnConfiguration.DEFAULT_TIMELINE_SERVICE_TTL_MS);
            ttlInterval = conf.getLong(YarnConfiguration.TIMELINE_SERVICE_LEVELDB_TTL_INTERVAL_MS,
                    YarnConfiguration.DEFAULT_TIMELINE_SERVICE_LEVELDB_TTL_INTERVAL_MS);
            LOG.info("Starting deletion thread with ttl " + ttl + " and cycle " + "interval " + ttlInterval);
        }

        @Override
        public void run() {
            while (true) {
                long timestamp = System.currentTimeMillis() - ttl;
                try {
                    discardOldEntities(timestamp);
                    Thread.sleep(ttlInterval);
                } catch (IOException e) {
                    LOG.error(e);
                } catch (InterruptedException e) {
                    LOG.info("Deletion thread received interrupt, exiting");
                    break;
                }
            }
        }
    }

    private static class LockMap<K> {
        private static class CountingReentrantLock<K> extends ReentrantLock {
            private static final long serialVersionUID = 1L;
            private int count;
            private K key;

            CountingReentrantLock(K key) {
                super();
                this.count = 0;
                this.key = key;
            }
        }

        private Map<K, CountingReentrantLock<K>> locks = new HashMap<K, CountingReentrantLock<K>>();

        synchronized CountingReentrantLock<K> getLock(K key) {
            CountingReentrantLock<K> lock = locks.get(key);
            if (lock == null) {
                lock = new CountingReentrantLock<K>(key);
                locks.put(key, lock);
            }

            lock.count++;
            return lock;
        }

        synchronized void returnLock(CountingReentrantLock<K> lock) {
            if (lock.count == 0) {
                throw new IllegalStateException("Returned lock more times than it " + "was retrieved");
            }
            lock.count--;

            if (lock.count == 0) {
                locks.remove(lock.key);
            }
        }
    }

    @Override
    public TimelineEntity getEntity(String entityId, String entityType, EnumSet<Field> fields) throws IOException {
        Long revStartTime = getStartTimeLong(entityId, entityType);
        if (revStartTime == null) {
            return null;
        }
        byte[] prefix = KeyBuilder.newInstance().add(ENTITY_ENTRY_PREFIX).add(entityType)
                .add(writeReverseOrderedLong(revStartTime)).add(entityId).getBytesForLookup();

        LeveldbIterator iterator = null;
        try {
            iterator = new LeveldbIterator(db);
            iterator.seek(prefix);

            if (fields == null) {
                fields = EnumSet.allOf(Field.class);
            }
            return getEntity(entityId, entityType, revStartTime, fields, iterator, prefix, prefix.length);
        } catch (DBException e) {
            throw new IOException(e);
        } finally {
            IOUtils.cleanup(LOG, iterator);
        }
    }

    /**
     * Read entity from a db iterator.  If no information is found in the
     * specified fields for this entity, return null.
     */
    private static TimelineEntity getEntity(String entityId, String entityType, Long startTime,
            EnumSet<Field> fields, LeveldbIterator iterator, byte[] prefix, int prefixlen) throws IOException {
        TimelineEntity entity = new TimelineEntity();
        boolean events = false;
        boolean lastEvent = false;
        if (fields.contains(Field.EVENTS)) {
            events = true;
        } else if (fields.contains(Field.LAST_EVENT_ONLY)) {
            lastEvent = true;
        } else {
            entity.setEvents(null);
        }
        boolean relatedEntities = false;
        if (fields.contains(Field.RELATED_ENTITIES)) {
            relatedEntities = true;
        } else {
            entity.setRelatedEntities(null);
        }
        boolean primaryFilters = false;
        if (fields.contains(Field.PRIMARY_FILTERS)) {
            primaryFilters = true;
        } else {
            entity.setPrimaryFilters(null);
        }
        boolean otherInfo = false;
        if (fields.contains(Field.OTHER_INFO)) {
            otherInfo = true;
        } else {
            entity.setOtherInfo(null);
        }

        // iterate through the entity's entry, parsing information if it is part
        // of a requested field
        for (; iterator.hasNext(); iterator.next()) {
            byte[] key = iterator.peekNext().getKey();
            if (!prefixMatches(prefix, prefixlen, key)) {
                break;
            }
            if (key.length == prefixlen) {
                continue;
            }
            if (key[prefixlen] == PRIMARY_FILTERS_COLUMN[0]) {
                if (primaryFilters) {
                    addPrimaryFilter(entity, key, prefixlen + PRIMARY_FILTERS_COLUMN.length);
                }
            } else if (key[prefixlen] == OTHER_INFO_COLUMN[0]) {
                if (otherInfo) {
                    entity.addOtherInfo(parseRemainingKey(key, prefixlen + OTHER_INFO_COLUMN.length),
                            GenericObjectMapper.read(iterator.peekNext().getValue()));
                }
            } else if (key[prefixlen] == RELATED_ENTITIES_COLUMN[0]) {
                if (relatedEntities) {
                    addRelatedEntity(entity, key, prefixlen + RELATED_ENTITIES_COLUMN.length);
                }
            } else if (key[prefixlen] == EVENTS_COLUMN[0]) {
                if (events || (lastEvent && entity.getEvents().size() == 0)) {
                    TimelineEvent event = getEntityEvent(null, key, prefixlen + EVENTS_COLUMN.length,
                            iterator.peekNext().getValue());
                    if (event != null) {
                        entity.addEvent(event);
                    }
                }
            } else if (key[prefixlen] == DOMAIN_ID_COLUMN[0]) {
                byte[] v = iterator.peekNext().getValue();
                String domainId = new String(v, Charset.forName("UTF-8"));
                entity.setDomainId(domainId);
            } else {
                if (key[prefixlen] != INVISIBLE_REVERSE_RELATED_ENTITIES_COLUMN[0]) {
                    LOG.warn(String.format("Found unexpected column for entity %s of " + "type %s (0x%02x)",
                            entityId, entityType, key[prefixlen]));
                }
            }
        }

        entity.setEntityId(entityId);
        entity.setEntityType(entityType);
        entity.setStartTime(startTime);

        return entity;
    }

    @Override
    public TimelineEvents getEntityTimelines(String entityType, SortedSet<String> entityIds, Long limit,
            Long windowStart, Long windowEnd, Set<String> eventType) throws IOException {
        TimelineEvents events = new TimelineEvents();
        if (entityIds == null || entityIds.isEmpty()) {
            return events;
        }
        // create a lexicographically-ordered map from start time to entities
        Map<byte[], List<EntityIdentifier>> startTimeMap = new TreeMap<byte[], List<EntityIdentifier>>(
                new Comparator<byte[]>() {
                    @Override
                    public int compare(byte[] o1, byte[] o2) {
                        return WritableComparator.compareBytes(o1, 0, o1.length, o2, 0, o2.length);
                    }
                });
        LeveldbIterator iterator = null;
        try {
            // look up start times for the specified entities
            // skip entities with no start time
            for (String entityId : entityIds) {
                byte[] startTime = getStartTime(entityId, entityType);
                if (startTime != null) {
                    List<EntityIdentifier> entities = startTimeMap.get(startTime);
                    if (entities == null) {
                        entities = new ArrayList<EntityIdentifier>();
                        startTimeMap.put(startTime, entities);
                    }
                    entities.add(new EntityIdentifier(entityId, entityType));
                }
            }
            for (Entry<byte[], List<EntityIdentifier>> entry : startTimeMap.entrySet()) {
                // look up the events matching the given parameters (limit,
                // start time, end time, event types) for entities whose start times
                // were found and add the entities to the return list
                byte[] revStartTime = entry.getKey();
                for (EntityIdentifier entityIdentifier : entry.getValue()) {
                    EventsOfOneEntity entity = new EventsOfOneEntity();
                    entity.setEntityId(entityIdentifier.getId());
                    entity.setEntityType(entityType);
                    events.addEvent(entity);
                    KeyBuilder kb = KeyBuilder.newInstance().add(ENTITY_ENTRY_PREFIX).add(entityType)
                            .add(revStartTime).add(entityIdentifier.getId()).add(EVENTS_COLUMN);
                    byte[] prefix = kb.getBytesForLookup();
                    if (windowEnd == null) {
                        windowEnd = Long.MAX_VALUE;
                    }
                    byte[] revts = writeReverseOrderedLong(windowEnd);
                    kb.add(revts);
                    byte[] first = kb.getBytesForLookup();
                    byte[] last = null;
                    if (windowStart != null) {
                        last = KeyBuilder.newInstance().add(prefix).add(writeReverseOrderedLong(windowStart))
                                .getBytesForLookup();
                    }
                    if (limit == null) {
                        limit = DEFAULT_LIMIT;
                    }
                    iterator = new LeveldbIterator(db);
                    for (iterator.seek(first); entity.getEvents().size() < limit && iterator.hasNext(); iterator
                            .next()) {
                        byte[] key = iterator.peekNext().getKey();
                        if (!prefixMatches(prefix, prefix.length, key) || (last != null
                                && WritableComparator.compareBytes(key, 0, key.length, last, 0, last.length) > 0)) {
                            break;
                        }
                        TimelineEvent event = getEntityEvent(eventType, key, prefix.length,
                                iterator.peekNext().getValue());
                        if (event != null) {
                            entity.addEvent(event);
                        }
                    }
                }
            }
        } catch (DBException e) {
            throw new IOException(e);
        } finally {
            IOUtils.cleanup(LOG, iterator);
        }
        return events;
    }

    @Override
    public TimelineEntities getEntities(String entityType, Long limit, Long windowStart, Long windowEnd,
            String fromId, Long fromTs, NameValuePair primaryFilter, Collection<NameValuePair> secondaryFilters,
            EnumSet<Field> fields, CheckAcl checkAcl) throws IOException {
        if (primaryFilter == null) {
            // if no primary filter is specified, prefix the lookup with
            // ENTITY_ENTRY_PREFIX
            return getEntityByTime(ENTITY_ENTRY_PREFIX, entityType, limit, windowStart, windowEnd, fromId, fromTs,
                    secondaryFilters, fields, checkAcl);
        } else {
            // if a primary filter is specified, prefix the lookup with
            // INDEXED_ENTRY_PREFIX + primaryFilterName + primaryFilterValue +
            // ENTITY_ENTRY_PREFIX
            byte[] base = KeyBuilder.newInstance().add(INDEXED_ENTRY_PREFIX).add(primaryFilter.getName())
                    .add(GenericObjectMapper.write(primaryFilter.getValue()), true).add(ENTITY_ENTRY_PREFIX)
                    .getBytesForLookup();
            return getEntityByTime(base, entityType, limit, windowStart, windowEnd, fromId, fromTs,
                    secondaryFilters, fields, checkAcl);
        }
    }

    /**
     * Retrieves a list of entities satisfying given parameters.
     *
     * @param base A byte array prefix for the lookup
     * @param entityType The type of the entity
     * @param limit A limit on the number of entities to return
     * @param starttime The earliest entity start time to retrieve (exclusive)
     * @param endtime The latest entity start time to retrieve (inclusive)
     * @param fromId Retrieve entities starting with this entity
     * @param fromTs Ignore entities with insert timestamp later than this ts
     * @param secondaryFilters Filter pairs that the entities should match
     * @param fields The set of fields to retrieve
     * @return A list of entities
     * @throws IOException
     */
    private TimelineEntities getEntityByTime(byte[] base, String entityType, Long limit, Long starttime,
            Long endtime, String fromId, Long fromTs, Collection<NameValuePair> secondaryFilters,
            EnumSet<Field> fields, CheckAcl checkAcl) throws IOException {
        // Even if other info and primary filter fields are not included, we
        // still need to load them to match secondary filters when they are
        // non-empty
        if (fields == null) {
            fields = EnumSet.allOf(Field.class);
        }
        boolean addPrimaryFilters = false;
        boolean addOtherInfo = false;
        if (secondaryFilters != null && secondaryFilters.size() > 0) {
            if (!fields.contains(Field.PRIMARY_FILTERS)) {
                fields.add(Field.PRIMARY_FILTERS);
                addPrimaryFilters = true;
            }
            if (!fields.contains(Field.OTHER_INFO)) {
                fields.add(Field.OTHER_INFO);
                addOtherInfo = true;
            }
        }

        LeveldbIterator iterator = null;
        try {
            KeyBuilder kb = KeyBuilder.newInstance().add(base).add(entityType);
            // only db keys matching the prefix (base + entity type) will be parsed
            byte[] prefix = kb.getBytesForLookup();
            if (endtime == null) {
                // if end time is null, place no restriction on end time
                endtime = Long.MAX_VALUE;
            }
            // construct a first key that will be seeked to using end time or fromId
            byte[] first = null;
            if (fromId != null) {
                Long fromIdStartTime = getStartTimeLong(fromId, entityType);
                if (fromIdStartTime == null) {
                    // no start time for provided id, so return empty entities
                    return new TimelineEntities();
                }
                if (fromIdStartTime <= endtime) {
                    // if provided id's start time falls before the end of the window,
                    // use it to construct the seek key
                    first = kb.add(writeReverseOrderedLong(fromIdStartTime)).add(fromId).getBytesForLookup();
                }
            }
            // if seek key wasn't constructed using fromId, construct it using end ts
            if (first == null) {
                first = kb.add(writeReverseOrderedLong(endtime)).getBytesForLookup();
            }
            byte[] last = null;
            if (starttime != null) {
                // if start time is not null, set a last key that will not be
                // iterated past
                last = KeyBuilder.newInstance().add(base).add(entityType).add(writeReverseOrderedLong(starttime))
                        .getBytesForLookup();
            }
            if (limit == null) {
                // if limit is not specified, use the default
                limit = DEFAULT_LIMIT;
            }

            TimelineEntities entities = new TimelineEntities();
            iterator = new LeveldbIterator(db);
            iterator.seek(first);
            // iterate until one of the following conditions is met: limit is
            // reached, there are no more keys, the key prefix no longer matches,
            // or a start time has been specified and reached/exceeded
            while (entities.getEntities().size() < limit && iterator.hasNext()) {
                byte[] key = iterator.peekNext().getKey();
                if (!prefixMatches(prefix, prefix.length, key) || (last != null
                        && WritableComparator.compareBytes(key, 0, key.length, last, 0, last.length) > 0)) {
                    break;
                }
                // read the start time and entity id from the current key
                KeyParser kp = new KeyParser(key, prefix.length);
                Long startTime = kp.getNextLong();
                String entityId = kp.getNextString();

                if (fromTs != null) {
                    long insertTime = readReverseOrderedLong(iterator.peekNext().getValue(), 0);
                    if (insertTime > fromTs) {
                        byte[] firstKey = key;
                        while (iterator.hasNext() && prefixMatches(firstKey, kp.getOffset(), key)) {
                            iterator.next();
                            key = iterator.peekNext().getKey();
                        }
                        continue;
                    }
                }

                // parse the entity that owns this key, iterating over all keys for
                // the entity
                TimelineEntity entity = getEntity(entityId, entityType, startTime, fields, iterator, key,
                        kp.getOffset());
                // determine if the retrieved entity matches the provided secondary
                // filters, and if so add it to the list of entities to return
                boolean filterPassed = true;
                if (secondaryFilters != null) {
                    for (NameValuePair filter : secondaryFilters) {
                        Object v = entity.getOtherInfo().get(filter.getName());
                        if (v == null) {
                            Set<Object> vs = entity.getPrimaryFilters().get(filter.getName());
                            if (vs == null || !vs.contains(filter.getValue())) {
                                filterPassed = false;
                                break;
                            }
                        } else if (!v.equals(filter.getValue())) {
                            filterPassed = false;
                            break;
                        }
                    }
                }
                if (filterPassed) {
                    if (entity.getDomainId() == null) {
                        entity.setDomainId(DEFAULT_DOMAIN_ID);
                    }
                    if (checkAcl == null || checkAcl.check(entity)) {
                        // Remove primary filter and other info if they are added for
                        // matching secondary filters
                        if (addPrimaryFilters) {
                            entity.setPrimaryFilters(null);
                        }
                        if (addOtherInfo) {
                            entity.setOtherInfo(null);
                        }
                        entities.addEntity(entity);
                    }
                }
            }
            return entities;
        } catch (DBException e) {
            throw new IOException(e);
        } finally {
            IOUtils.cleanup(LOG, iterator);
        }
    }

    /**
     * Handle error and set it in response.
     */
    private static void handleError(TimelineEntity entity, TimelinePutResponse response, final int errorCode) {
        TimelinePutError error = new TimelinePutError();
        error.setEntityId(entity.getEntityId());
        error.setEntityType(entity.getEntityType());
        error.setErrorCode(errorCode);
        response.addError(error);
    }

    /**
     * Put a single entity.  If there is an error, add a TimelinePutError to the
     * given response.
     */
    private void put(TimelineEntity entity, TimelinePutResponse response, boolean allowEmptyDomainId) {
        LockMap.CountingReentrantLock<EntityIdentifier> lock = writeLocks
                .getLock(new EntityIdentifier(entity.getEntityId(), entity.getEntityType()));
        lock.lock();
        WriteBatch writeBatch = null;
        List<EntityIdentifier> relatedEntitiesWithoutStartTimes = new ArrayList<EntityIdentifier>();
        byte[] revStartTime = null;
        Map<String, Set<Object>> primaryFilters = null;
        try {
            writeBatch = db.createWriteBatch();
            List<TimelineEvent> events = entity.getEvents();
            // look up the start time for the entity
            StartAndInsertTime startAndInsertTime = getAndSetStartTime(entity.getEntityId(), entity.getEntityType(),
                    entity.getStartTime(), events);
            if (startAndInsertTime == null) {
                // if no start time is found, add an error and return
                handleError(entity, response, TimelinePutError.NO_START_TIME);
                return;
            }
            revStartTime = writeReverseOrderedLong(startAndInsertTime.startTime);

            primaryFilters = entity.getPrimaryFilters();

            // write entity marker
            byte[] markerKey = createEntityMarkerKey(entity.getEntityId(), entity.getEntityType(), revStartTime);
            byte[] markerValue = writeReverseOrderedLong(startAndInsertTime.insertTime);
            writeBatch.put(markerKey, markerValue);
            writePrimaryFilterEntries(writeBatch, primaryFilters, markerKey, markerValue);

            // write event entries
            if (events != null && !events.isEmpty()) {
                for (TimelineEvent event : events) {
                    byte[] revts = writeReverseOrderedLong(event.getTimestamp());
                    byte[] key = createEntityEventKey(entity.getEntityId(), entity.getEntityType(), revStartTime,
                            revts, event.getEventType());
                    byte[] value = GenericObjectMapper.write(event.getEventInfo());
                    writeBatch.put(key, value);
                    writePrimaryFilterEntries(writeBatch, primaryFilters, key, value);
                }
            }

            // write related entity entries
            Map<String, Set<String>> relatedEntities = entity.getRelatedEntities();
            if (relatedEntities != null && !relatedEntities.isEmpty()) {
                for (Entry<String, Set<String>> relatedEntityList : relatedEntities.entrySet()) {
                    String relatedEntityType = relatedEntityList.getKey();
                    for (String relatedEntityId : relatedEntityList.getValue()) {
                        // invisible "reverse" entries (entity -> related entity)
                        byte[] key = createReverseRelatedEntityKey(entity.getEntityId(), entity.getEntityType(),
                                revStartTime, relatedEntityId, relatedEntityType);
                        writeBatch.put(key, EMPTY_BYTES);
                        // look up start time of related entity
                        byte[] relatedEntityStartTime = getStartTime(relatedEntityId, relatedEntityType);
                        // delay writing the related entity if no start time is found
                        if (relatedEntityStartTime == null) {
                            relatedEntitiesWithoutStartTimes
                                    .add(new EntityIdentifier(relatedEntityId, relatedEntityType));
                            continue;
                        } else {
                            // This is the existing entity
                            byte[] domainIdBytes = db.get(
                                    createDomainIdKey(relatedEntityId, relatedEntityType, relatedEntityStartTime));
                            // The timeline data created by the server before 2.6 won't have
                            // the domain field. We assume this timeline data is in the
                            // default timeline domain.
                            String domainId = null;
                            if (domainIdBytes == null) {
                                domainId = TimelineDataManager.DEFAULT_DOMAIN_ID;
                            } else {
                                domainId = new String(domainIdBytes, Charset.forName("UTF-8"));
                            }
                            if (!domainId.equals(entity.getDomainId())) {
                                // in this case the entity will be put, but the relation will be
                                // ignored
                                handleError(entity, response, TimelinePutError.FORBIDDEN_RELATION);
                                continue;
                            }
                        }
                        // write "forward" entry (related entity -> entity)
                        key = createRelatedEntityKey(relatedEntityId, relatedEntityType, relatedEntityStartTime,
                                entity.getEntityId(), entity.getEntityType());
                        writeBatch.put(key, EMPTY_BYTES);
                    }
                }
            }

            // write primary filter entries
            if (primaryFilters != null && !primaryFilters.isEmpty()) {
                for (Entry<String, Set<Object>> primaryFilter : primaryFilters.entrySet()) {
                    for (Object primaryFilterValue : primaryFilter.getValue()) {
                        byte[] key = createPrimaryFilterKey(entity.getEntityId(), entity.getEntityType(),
                                revStartTime, primaryFilter.getKey(), primaryFilterValue);
                        writeBatch.put(key, EMPTY_BYTES);
                        writePrimaryFilterEntries(writeBatch, primaryFilters, key, EMPTY_BYTES);
                    }
                }
            }

            // write other info entries
            Map<String, Object> otherInfo = entity.getOtherInfo();
            if (otherInfo != null && !otherInfo.isEmpty()) {
                for (Entry<String, Object> i : otherInfo.entrySet()) {
                    byte[] key = createOtherInfoKey(entity.getEntityId(), entity.getEntityType(), revStartTime,
                            i.getKey());
                    byte[] value = GenericObjectMapper.write(i.getValue());
                    writeBatch.put(key, value);
                    writePrimaryFilterEntries(writeBatch, primaryFilters, key, value);
                }
            }

            // write domain id entry
            byte[] key = createDomainIdKey(entity.getEntityId(), entity.getEntityType(), revStartTime);
            if (entity.getDomainId() == null || entity.getDomainId().length() == 0) {
                if (!allowEmptyDomainId) {
                    handleError(entity, response, TimelinePutError.NO_DOMAIN);
                    return;
                }
            } else {
                writeBatch.put(key, entity.getDomainId().getBytes(Charset.forName("UTF-8")));
                writePrimaryFilterEntries(writeBatch, primaryFilters, key,
                        entity.getDomainId().getBytes(Charset.forName("UTF-8")));
            }
            db.write(writeBatch);
        } catch (DBException de) {
            LOG.error("Error putting entity " + entity.getEntityId() + " of type " + entity.getEntityType(), de);
            handleError(entity, response, TimelinePutError.IO_EXCEPTION);
        } catch (IOException e) {
            LOG.error("Error putting entity " + entity.getEntityId() + " of type " + entity.getEntityType(), e);
            handleError(entity, response, TimelinePutError.IO_EXCEPTION);
        } finally {
            lock.unlock();
            writeLocks.returnLock(lock);
            IOUtils.cleanup(LOG, writeBatch);
        }

        for (EntityIdentifier relatedEntity : relatedEntitiesWithoutStartTimes) {
            lock = writeLocks.getLock(relatedEntity);
            lock.lock();
            try {
                StartAndInsertTime relatedEntityStartAndInsertTime = getAndSetStartTime(relatedEntity.getId(),
                        relatedEntity.getType(), readReverseOrderedLong(revStartTime, 0), null);
                if (relatedEntityStartAndInsertTime == null) {
                    throw new IOException("Error setting start time for related entity");
                }
                byte[] relatedEntityStartTime = writeReverseOrderedLong(relatedEntityStartAndInsertTime.startTime);
                // This is the new entity, the domain should be the same
                byte[] key = createDomainIdKey(relatedEntity.getId(), relatedEntity.getType(),
                        relatedEntityStartTime);
                db.put(key, entity.getDomainId().getBytes(Charset.forName("UTF-8")));
                db.put(createRelatedEntityKey(relatedEntity.getId(), relatedEntity.getType(),
                        relatedEntityStartTime, entity.getEntityId(), entity.getEntityType()), EMPTY_BYTES);
                db.put(createEntityMarkerKey(relatedEntity.getId(), relatedEntity.getType(),
                        relatedEntityStartTime),
                        writeReverseOrderedLong(relatedEntityStartAndInsertTime.insertTime));
            } catch (DBException de) {
                LOG.error("Error putting related entity " + relatedEntity.getId() + " of type "
                        + relatedEntity.getType() + " for entity " + entity.getEntityId() + " of type "
                        + entity.getEntityType(), de);
                handleError(entity, response, TimelinePutError.IO_EXCEPTION);
            } catch (IOException e) {
                LOG.error("Error putting related entity " + relatedEntity.getId() + " of type "
                        + relatedEntity.getType() + " for entity " + entity.getEntityId() + " of type "
                        + entity.getEntityType(), e);
                handleError(entity, response, TimelinePutError.IO_EXCEPTION);
            } finally {
                lock.unlock();
                writeLocks.returnLock(lock);
            }
        }
    }

    /**
     * For a given key / value pair that has been written to the db,
     * write additional entries to the db for each primary filter.
     */
    private static void writePrimaryFilterEntries(WriteBatch writeBatch, Map<String, Set<Object>> primaryFilters,
            byte[] key, byte[] value) throws IOException {
        if (primaryFilters != null && !primaryFilters.isEmpty()) {
            for (Entry<String, Set<Object>> pf : primaryFilters.entrySet()) {
                for (Object pfval : pf.getValue()) {
                    writeBatch.put(addPrimaryFilterToKey(pf.getKey(), pfval, key), value);
                }
            }
        }
    }

    @Override
    public TimelinePutResponse put(TimelineEntities entities) {
        try {
            deleteLock.readLock().lock();
            TimelinePutResponse response = new TimelinePutResponse();
            for (TimelineEntity entity : entities.getEntities()) {
                put(entity, response, false);
            }
            return response;
        } finally {
            deleteLock.readLock().unlock();
        }
    }

    @Private
    @VisibleForTesting
    public TimelinePutResponse putWithNoDomainId(TimelineEntities entities) {
        try {
            deleteLock.readLock().lock();
            TimelinePutResponse response = new TimelinePutResponse();
            for (TimelineEntity entity : entities.getEntities()) {
                put(entity, response, true);
            }
            return response;
        } finally {
            deleteLock.readLock().unlock();
        }
    }

    /**
     * Get the unique start time for a given entity as a byte array that sorts
     * the timestamps in reverse order (see {@link
     * GenericObjectMapper#writeReverseOrderedLong(long)}).
     *
     * @param entityId The id of the entity
     * @param entityType The type of the entity
     * @return A byte array, null if not found
     * @throws IOException
     */
    private byte[] getStartTime(String entityId, String entityType) throws IOException {
        Long l = getStartTimeLong(entityId, entityType);
        return l == null ? null : writeReverseOrderedLong(l);
    }

    /**
     * Get the unique start time for a given entity as a Long.
     *
     * @param entityId The id of the entity
     * @param entityType The type of the entity
     * @return A Long, null if not found
     * @throws IOException
     */
    private Long getStartTimeLong(String entityId, String entityType) throws IOException {
        EntityIdentifier entity = new EntityIdentifier(entityId, entityType);
        try {
            // start time is not provided, so try to look it up
            if (startTimeReadCache.containsKey(entity)) {
                // found the start time in the cache
                return startTimeReadCache.get(entity);
            } else {
                // try to look up the start time in the db
                byte[] b = createStartTimeLookupKey(entity.getId(), entity.getType());
                byte[] v = db.get(b);
                if (v == null) {
                    // did not find the start time in the db
                    return null;
                } else {
                    // found the start time in the db
                    Long l = readReverseOrderedLong(v, 0);
                    startTimeReadCache.put(entity, l);
                    return l;
                }
            }
        } catch (DBException e) {
            throw new IOException(e);
        }
    }

    /**
     * Get the unique start time for a given entity as a byte array that sorts
     * the timestamps in reverse order (see {@link
     * GenericObjectMapper#writeReverseOrderedLong(long)}). If the start time
     * doesn't exist, set it based on the information provided. Should only be
     * called when a lock has been obtained on the entity.
     *
     * @param entityId The id of the entity
     * @param entityType The type of the entity
     * @param startTime The start time of the entity, or null
     * @param events A list of events for the entity, or null
     * @return A StartAndInsertTime
     * @throws IOException
     */
    private StartAndInsertTime getAndSetStartTime(String entityId, String entityType, Long startTime,
            List<TimelineEvent> events) throws IOException {
        EntityIdentifier entity = new EntityIdentifier(entityId, entityType);
        if (startTime == null) {
            // start time is not provided, so try to look it up
            if (startTimeWriteCache.containsKey(entity)) {
                // found the start time in the cache
                return startTimeWriteCache.get(entity);
            } else {
                if (events != null) {
                    // prepare a start time from events in case it is needed
                    Long min = Long.MAX_VALUE;
                    for (TimelineEvent e : events) {
                        if (min > e.getTimestamp()) {
                            min = e.getTimestamp();
                        }
                    }
                    startTime = min;
                }
                return checkStartTimeInDb(entity, startTime);
            }
        } else {
            // start time is provided
            if (startTimeWriteCache.containsKey(entity)) {
                // always use start time from cache if it exists
                return startTimeWriteCache.get(entity);
            } else {
                // check the provided start time matches the db
                return checkStartTimeInDb(entity, startTime);
            }
        }
    }

    /**
     * Checks db for start time and returns it if it exists.  If it doesn't
     * exist, writes the suggested start time (if it is not null).  This is
     * only called when the start time is not found in the cache,
     * so it adds it back into the cache if it is found. Should only be called
     * when a lock has been obtained on the entity.
     */
    private StartAndInsertTime checkStartTimeInDb(EntityIdentifier entity, Long suggestedStartTime)
            throws IOException {
        StartAndInsertTime startAndInsertTime = null;
        // create lookup key for start time
        byte[] b = createStartTimeLookupKey(entity.getId(), entity.getType());
        try {
            // retrieve value for key
            byte[] v = db.get(b);
            if (v == null) {
                // start time doesn't exist in db
                if (suggestedStartTime == null) {
                    return null;
                }
                startAndInsertTime = new StartAndInsertTime(suggestedStartTime, System.currentTimeMillis());

                // write suggested start time
                v = new byte[16];
                writeReverseOrderedLong(suggestedStartTime, v, 0);
                writeReverseOrderedLong(startAndInsertTime.insertTime, v, 8);
                WriteOptions writeOptions = new WriteOptions();
                writeOptions.sync(true);
                db.put(b, v, writeOptions);
            } else {
                // found start time in db, so ignore suggested start time
                startAndInsertTime = new StartAndInsertTime(readReverseOrderedLong(v, 0),
                        readReverseOrderedLong(v, 8));
            }
        } catch (DBException e) {
            throw new IOException(e);
        }
        startTimeWriteCache.put(entity, startAndInsertTime);
        startTimeReadCache.put(entity, startAndInsertTime.startTime);
        return startAndInsertTime;
    }

    /**
     * Creates a key for looking up the start time of a given entity,
     * of the form START_TIME_LOOKUP_PREFIX + entity type + entity id.
     */
    private static byte[] createStartTimeLookupKey(String entityId, String entityType) throws IOException {
        return KeyBuilder.newInstance().add(START_TIME_LOOKUP_PREFIX).add(entityType).add(entityId).getBytes();
    }

    /**
     * Creates an entity marker, serializing ENTITY_ENTRY_PREFIX + entity type +
     * revstarttime + entity id.
     */
    private static byte[] createEntityMarkerKey(String entityId, String entityType, byte[] revStartTime)
            throws IOException {
        return KeyBuilder.newInstance().add(ENTITY_ENTRY_PREFIX).add(entityType).add(revStartTime).add(entityId)
                .getBytesForLookup();
    }

    /**
     * Creates an index entry for the given key of the form
     * INDEXED_ENTRY_PREFIX + primaryfiltername + primaryfiltervalue + key.
     */
    private static byte[] addPrimaryFilterToKey(String primaryFilterName, Object primaryFilterValue, byte[] key)
            throws IOException {
        return KeyBuilder.newInstance().add(INDEXED_ENTRY_PREFIX).add(primaryFilterName)
                .add(GenericObjectMapper.write(primaryFilterValue), true).add(key).getBytes();
    }

    /**
     * Creates an event key, serializing ENTITY_ENTRY_PREFIX + entity type +
     * revstarttime + entity id + EVENTS_COLUMN + reveventtimestamp + event type.
     */
    private static byte[] createEntityEventKey(String entityId, String entityType, byte[] revStartTime,
            byte[] revEventTimestamp, String eventType) throws IOException {
        return KeyBuilder.newInstance().add(ENTITY_ENTRY_PREFIX).add(entityType).add(revStartTime).add(entityId)
                .add(EVENTS_COLUMN).add(revEventTimestamp).add(eventType).getBytes();
    }

    /**
     * Creates an event object from the given key, offset, and value.  If the
     * event type is not contained in the specified set of event types,
     * returns null.
     */
    private static TimelineEvent getEntityEvent(Set<String> eventTypes, byte[] key, int offset, byte[] value)
            throws IOException {
        KeyParser kp = new KeyParser(key, offset);
        long ts = kp.getNextLong();
        String tstype = kp.getNextString();
        if (eventTypes == null || eventTypes.contains(tstype)) {
            TimelineEvent event = new TimelineEvent();
            event.setTimestamp(ts);
            event.setEventType(tstype);
            Object o = GenericObjectMapper.read(value);
            if (o == null) {
                event.setEventInfo(null);
            } else if (o instanceof Map) {
                @SuppressWarnings("unchecked")
                Map<String, Object> m = (Map<String, Object>) o;
                event.setEventInfo(m);
            } else {
                throw new IOException("Couldn't deserialize event info map");
            }
            return event;
        }
        return null;
    }

    /**
     * Creates a primary filter key, serializing ENTITY_ENTRY_PREFIX +
     * entity type + revstarttime + entity id + PRIMARY_FILTERS_COLUMN + name +
     * value.
     */
    private static byte[] createPrimaryFilterKey(String entityId, String entityType, byte[] revStartTime,
            String name, Object value) throws IOException {
        return KeyBuilder.newInstance().add(ENTITY_ENTRY_PREFIX).add(entityType).add(revStartTime).add(entityId)
                .add(PRIMARY_FILTERS_COLUMN).add(name).add(GenericObjectMapper.write(value)).getBytes();
    }

    /**
     * Parses the primary filter from the given key at the given offset and
     * adds it to the given entity.
     */
    private static void addPrimaryFilter(TimelineEntity entity, byte[] key, int offset) throws IOException {
        KeyParser kp = new KeyParser(key, offset);
        String name = kp.getNextString();
        Object value = GenericObjectMapper.read(key, kp.getOffset());
        entity.addPrimaryFilter(name, value);
    }

    /**
     * Creates an other info key, serializing ENTITY_ENTRY_PREFIX + entity type +
     * revstarttime + entity id + OTHER_INFO_COLUMN + name.
     */
    private static byte[] createOtherInfoKey(String entityId, String entityType, byte[] revStartTime, String name)
            throws IOException {
        return KeyBuilder.newInstance().add(ENTITY_ENTRY_PREFIX).add(entityType).add(revStartTime).add(entityId)
                .add(OTHER_INFO_COLUMN).add(name).getBytes();
    }

    /**
     * Creates a string representation of the byte array from the given offset
     * to the end of the array (for parsing other info keys).
     */
    private static String parseRemainingKey(byte[] b, int offset) {
        return new String(b, offset, b.length - offset, Charset.forName("UTF-8"));
    }

    /**
     * Creates a related entity key, serializing ENTITY_ENTRY_PREFIX +
     * entity type + revstarttime + entity id + RELATED_ENTITIES_COLUMN +
     * relatedentity type + relatedentity id.
     */
    private static byte[] createRelatedEntityKey(String entityId, String entityType, byte[] revStartTime,
            String relatedEntityId, String relatedEntityType) throws IOException {
        return KeyBuilder.newInstance().add(ENTITY_ENTRY_PREFIX).add(entityType).add(revStartTime).add(entityId)
                .add(RELATED_ENTITIES_COLUMN).add(relatedEntityType).add(relatedEntityId).getBytes();
    }

    /**
     * Parses the related entity from the given key at the given offset and
     * adds it to the given entity.
     */
    private static void addRelatedEntity(TimelineEntity entity, byte[] key, int offset) throws IOException {
        KeyParser kp = new KeyParser(key, offset);
        String type = kp.getNextString();
        String id = kp.getNextString();
        entity.addRelatedEntity(type, id);
    }

    /**
     * Creates a reverse related entity key, serializing ENTITY_ENTRY_PREFIX +
     * entity type + revstarttime + entity id +
     * INVISIBLE_REVERSE_RELATED_ENTITIES_COLUMN +
     * relatedentity type + relatedentity id.
     */
    private static byte[] createReverseRelatedEntityKey(String entityId, String entityType, byte[] revStartTime,
            String relatedEntityId, String relatedEntityType) throws IOException {
        return KeyBuilder.newInstance().add(ENTITY_ENTRY_PREFIX).add(entityType).add(revStartTime).add(entityId)
                .add(INVISIBLE_REVERSE_RELATED_ENTITIES_COLUMN).add(relatedEntityType).add(relatedEntityId)
                .getBytes();
    }

    /**
     * Creates a domain id key, serializing ENTITY_ENTRY_PREFIX +
     * entity type + revstarttime + entity id + DOMAIN_ID_COLUMN.
     */
    private static byte[] createDomainIdKey(String entityId, String entityType, byte[] revStartTime)
            throws IOException {
        return KeyBuilder.newInstance().add(ENTITY_ENTRY_PREFIX).add(entityType).add(revStartTime).add(entityId)
                .add(DOMAIN_ID_COLUMN).getBytes();
    }

    /**
     * Clears the cache to test reloading start times from leveldb (only for
     * testing).
     */
    @VisibleForTesting
    void clearStartTimeCache() {
        startTimeWriteCache.clear();
        startTimeReadCache.clear();
    }

    @VisibleForTesting
    static int getStartTimeReadCacheSize(Configuration conf) {
        return conf.getInt(YarnConfiguration.TIMELINE_SERVICE_LEVELDB_START_TIME_READ_CACHE_SIZE,
                YarnConfiguration.DEFAULT_TIMELINE_SERVICE_LEVELDB_START_TIME_READ_CACHE_SIZE);
    }

    @VisibleForTesting
    static int getStartTimeWriteCacheSize(Configuration conf) {
        return conf.getInt(YarnConfiguration.TIMELINE_SERVICE_LEVELDB_START_TIME_WRITE_CACHE_SIZE,
                YarnConfiguration.DEFAULT_TIMELINE_SERVICE_LEVELDB_START_TIME_WRITE_CACHE_SIZE);
    }

    @VisibleForTesting
    List<String> getEntityTypes() throws IOException {
        LeveldbIterator iterator = null;
        try {
            iterator = getDbIterator(false);
            List<String> entityTypes = new ArrayList<String>();
            iterator.seek(ENTITY_ENTRY_PREFIX);
            while (iterator.hasNext()) {
                byte[] key = iterator.peekNext().getKey();
                if (key[0] != ENTITY_ENTRY_PREFIX[0]) {
                    break;
                }
                KeyParser kp = new KeyParser(key, ENTITY_ENTRY_PREFIX.length);
                String entityType = kp.getNextString();
                entityTypes.add(entityType);
                byte[] lookupKey = KeyBuilder.newInstance().add(ENTITY_ENTRY_PREFIX).add(entityType)
                        .getBytesForLookup();
                if (lookupKey[lookupKey.length - 1] != 0x0) {
                    throw new IOException("Found unexpected end byte in lookup key");
                }
                lookupKey[lookupKey.length - 1] = 0x1;
                iterator.seek(lookupKey);
            }
            return entityTypes;
        } catch (DBException e) {
            throw new IOException(e);
        } finally {
            IOUtils.cleanup(LOG, iterator);
        }
    }

    /**
     * Finds all keys in the db that have a given prefix and deletes them on
     * the given write batch.
     */
    private void deleteKeysWithPrefix(WriteBatch writeBatch, byte[] prefix, LeveldbIterator iterator) {
        for (iterator.seek(prefix); iterator.hasNext(); iterator.next()) {
            byte[] key = iterator.peekNext().getKey();
            if (!prefixMatches(prefix, prefix.length, key)) {
                break;
            }
            writeBatch.delete(key);
        }
    }

    @VisibleForTesting
    boolean deleteNextEntity(String entityType, byte[] reverseTimestamp, LeveldbIterator iterator,
            LeveldbIterator pfIterator, boolean seeked) throws IOException {
        WriteBatch writeBatch = null;
        try {
            KeyBuilder kb = KeyBuilder.newInstance().add(ENTITY_ENTRY_PREFIX).add(entityType);
            byte[] typePrefix = kb.getBytesForLookup();
            kb.add(reverseTimestamp);
            if (!seeked) {
                iterator.seek(kb.getBytesForLookup());
            }
            if (!iterator.hasNext()) {
                return false;
            }
            byte[] entityKey = iterator.peekNext().getKey();
            if (!prefixMatches(typePrefix, typePrefix.length, entityKey)) {
                return false;
            }

            // read the start time and entity id from the current key
            KeyParser kp = new KeyParser(entityKey, typePrefix.length + 8);
            String entityId = kp.getNextString();
            int prefixlen = kp.getOffset();
            byte[] deletePrefix = new byte[prefixlen];
            System.arraycopy(entityKey, 0, deletePrefix, 0, prefixlen);

            writeBatch = db.createWriteBatch();

            if (LOG.isDebugEnabled()) {
                LOG.debug("Deleting entity type:" + entityType + " id:" + entityId);
            }
            // remove start time from cache and db
            writeBatch.delete(createStartTimeLookupKey(entityId, entityType));
            EntityIdentifier entityIdentifier = new EntityIdentifier(entityId, entityType);
            startTimeReadCache.remove(entityIdentifier);
            startTimeWriteCache.remove(entityIdentifier);

            // delete current entity
            for (; iterator.hasNext(); iterator.next()) {
                byte[] key = iterator.peekNext().getKey();
                if (!prefixMatches(entityKey, prefixlen, key)) {
                    break;
                }
                writeBatch.delete(key);

                if (key.length == prefixlen) {
                    continue;
                }
                if (key[prefixlen] == PRIMARY_FILTERS_COLUMN[0]) {
                    kp = new KeyParser(key, prefixlen + PRIMARY_FILTERS_COLUMN.length);
                    String name = kp.getNextString();
                    Object value = GenericObjectMapper.read(key, kp.getOffset());
                    deleteKeysWithPrefix(writeBatch, addPrimaryFilterToKey(name, value, deletePrefix), pfIterator);
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Deleting entity type:" + entityType + " id:" + entityId
                                + " primary filter entry " + name + " " + value);
                    }
                } else if (key[prefixlen] == RELATED_ENTITIES_COLUMN[0]) {
                    kp = new KeyParser(key, prefixlen + RELATED_ENTITIES_COLUMN.length);
                    String type = kp.getNextString();
                    String id = kp.getNextString();
                    byte[] relatedEntityStartTime = getStartTime(id, type);
                    if (relatedEntityStartTime == null) {
                        LOG.warn("Found no start time for " + "related entity " + id + " of type " + type
                                + " while " + "deleting " + entityId + " of type " + entityType);
                        continue;
                    }
                    writeBatch.delete(
                            createReverseRelatedEntityKey(id, type, relatedEntityStartTime, entityId, entityType));
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Deleting entity type:" + entityType + " id:" + entityId
                                + " from invisible reverse related entity " + "entry of type:" + type + " id:"
                                + id);
                    }
                } else if (key[prefixlen] == INVISIBLE_REVERSE_RELATED_ENTITIES_COLUMN[0]) {
                    kp = new KeyParser(key, prefixlen + INVISIBLE_REVERSE_RELATED_ENTITIES_COLUMN.length);
                    String type = kp.getNextString();
                    String id = kp.getNextString();
                    byte[] relatedEntityStartTime = getStartTime(id, type);
                    if (relatedEntityStartTime == null) {
                        LOG.warn("Found no start time for reverse " + "related entity " + id + " of type " + type
                                + " while " + "deleting " + entityId + " of type " + entityType);
                        continue;
                    }
                    writeBatch
                            .delete(createRelatedEntityKey(id, type, relatedEntityStartTime, entityId, entityType));
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Deleting entity type:" + entityType + " id:" + entityId
                                + " from related entity entry of type:" + type + " id:" + id);
                    }
                }
            }
            WriteOptions writeOptions = new WriteOptions();
            writeOptions.sync(true);
            db.write(writeBatch, writeOptions);
            return true;
        } catch (DBException e) {
            throw new IOException(e);
        } finally {
            IOUtils.cleanup(LOG, writeBatch);
        }
    }

    /**
     * Discards entities with start timestamp less than or equal to the given
     * timestamp.
     */
    @VisibleForTesting
    void discardOldEntities(long timestamp) throws IOException, InterruptedException {
        byte[] reverseTimestamp = writeReverseOrderedLong(timestamp);
        long totalCount = 0;
        long t1 = System.currentTimeMillis();
        try {
            List<String> entityTypes = getEntityTypes();
            for (String entityType : entityTypes) {
                LeveldbIterator iterator = null;
                LeveldbIterator pfIterator = null;
                long typeCount = 0;
                try {
                    deleteLock.writeLock().lock();
                    iterator = getDbIterator(false);
                    pfIterator = getDbIterator(false);

                    if (deletionThread != null && deletionThread.isInterrupted()) {
                        throw new InterruptedException();
                    }
                    boolean seeked = false;
                    while (deleteNextEntity(entityType, reverseTimestamp, iterator, pfIterator, seeked)) {
                        typeCount++;
                        totalCount++;
                        seeked = true;
                        if (deletionThread != null && deletionThread.isInterrupted()) {
                            throw new InterruptedException();
                        }
                    }
                } catch (IOException e) {
                    LOG.error("Got IOException while deleting entities for type " + entityType
                            + ", continuing to next type", e);
                } finally {
                    IOUtils.cleanup(LOG, iterator, pfIterator);
                    deleteLock.writeLock().unlock();
                    if (typeCount > 0) {
                        LOG.info("Deleted " + typeCount + " entities of type " + entityType);
                    }
                }
            }
        } finally {
            long t2 = System.currentTimeMillis();
            LOG.info("Discarded " + totalCount + " entities for timestamp " + timestamp + " and earlier in "
                    + (t2 - t1) / 1000.0 + " seconds");
        }
    }

    @VisibleForTesting
    LeveldbIterator getDbIterator(boolean fillCache) {
        ReadOptions readOptions = new ReadOptions();
        readOptions.fillCache(fillCache);
        return new LeveldbIterator(db, readOptions);
    }

    Version loadVersion() throws IOException {
        try {
            byte[] data = db.get(bytes(TIMELINE_STORE_VERSION_KEY));
            // if version is not stored previously, treat it as CURRENT_VERSION_INFO.
            if (data == null || data.length == 0) {
                return getCurrentVersion();
            }
            Version version = new VersionPBImpl(VersionProto.parseFrom(data));
            return version;
        } catch (DBException e) {
            throw new IOException(e);
        }
    }

    // Only used for test
    @VisibleForTesting
    void storeVersion(Version state) throws IOException {
        dbStoreVersion(state);
    }

    private void dbStoreVersion(Version state) throws IOException {
        String key = TIMELINE_STORE_VERSION_KEY;
        byte[] data = ((VersionPBImpl) state).getProto().toByteArray();
        try {
            db.put(bytes(key), data);
        } catch (DBException e) {
            throw new IOException(e);
        }
    }

    Version getCurrentVersion() {
        return CURRENT_VERSION_INFO;
    }

    /**
     * 1) Versioning timeline store: major.minor. For e.g. 1.0, 1.1, 1.2...1.25, 2.0 etc.
     * 2) Any incompatible change of TS-store is a major upgrade, and any
     *    compatible change of TS-store is a minor upgrade.
     * 3) Within a minor upgrade, say 1.1 to 1.2:
     *    overwrite the version info and proceed as normal.
     * 4) Within a major upgrade, say 1.2 to 2.0:
     *    throw exception and indicate user to use a separate upgrade tool to
     *    upgrade timeline store or remove incompatible old state.
     */
    private void checkVersion() throws IOException {
        Version loadedVersion = loadVersion();
        LOG.info("Loaded timeline store version info " + loadedVersion);
        if (loadedVersion.equals(getCurrentVersion())) {
            return;
        }
        if (loadedVersion.isCompatibleTo(getCurrentVersion())) {
            LOG.info("Storing timeline store version info " + getCurrentVersion());
            dbStoreVersion(CURRENT_VERSION_INFO);
        } else {
            String incompatibleMessage = "Incompatible version for timeline store: expecting version "
                    + getCurrentVersion() + ", but loading version " + loadedVersion;
            LOG.fatal(incompatibleMessage);
            throw new IOException(incompatibleMessage);
        }
    }

    //TODO: make data retention work with the domain data as well
    @Override
    public void put(TimelineDomain domain) throws IOException {
        WriteBatch writeBatch = null;
        try {
            writeBatch = db.createWriteBatch();
            if (domain.getId() == null || domain.getId().length() == 0) {
                throw new IllegalArgumentException("Domain doesn't have an ID");
            }
            if (domain.getOwner() == null || domain.getOwner().length() == 0) {
                throw new IllegalArgumentException("Domain doesn't have an owner.");
            }

            // Write description
            byte[] domainEntryKey = createDomainEntryKey(domain.getId(), DESCRIPTION_COLUMN);
            byte[] ownerLookupEntryKey = createOwnerLookupKey(domain.getOwner(), domain.getId(),
                    DESCRIPTION_COLUMN);
            if (domain.getDescription() != null) {
                writeBatch.put(domainEntryKey, domain.getDescription().getBytes(Charset.forName("UTF-8")));
                writeBatch.put(ownerLookupEntryKey, domain.getDescription().getBytes(Charset.forName("UTF-8")));
            } else {
                writeBatch.put(domainEntryKey, EMPTY_BYTES);
                writeBatch.put(ownerLookupEntryKey, EMPTY_BYTES);
            }

            // Write owner
            domainEntryKey = createDomainEntryKey(domain.getId(), OWNER_COLUMN);
            ownerLookupEntryKey = createOwnerLookupKey(domain.getOwner(), domain.getId(), OWNER_COLUMN);
            // Null check for owner is done before
            writeBatch.put(domainEntryKey, domain.getOwner().getBytes(Charset.forName("UTF-8")));
            writeBatch.put(ownerLookupEntryKey, domain.getOwner().getBytes(Charset.forName("UTF-8")));

            // Write readers
            domainEntryKey = createDomainEntryKey(domain.getId(), READER_COLUMN);
            ownerLookupEntryKey = createOwnerLookupKey(domain.getOwner(), domain.getId(), READER_COLUMN);
            if (domain.getReaders() != null && domain.getReaders().length() > 0) {
                writeBatch.put(domainEntryKey, domain.getReaders().getBytes(Charset.forName("UTF-8")));
                writeBatch.put(ownerLookupEntryKey, domain.getReaders().getBytes(Charset.forName("UTF-8")));
            } else {
                writeBatch.put(domainEntryKey, EMPTY_BYTES);
                writeBatch.put(ownerLookupEntryKey, EMPTY_BYTES);
            }

            // Write writers
            domainEntryKey = createDomainEntryKey(domain.getId(), WRITER_COLUMN);
            ownerLookupEntryKey = createOwnerLookupKey(domain.getOwner(), domain.getId(), WRITER_COLUMN);
            if (domain.getWriters() != null && domain.getWriters().length() > 0) {
                writeBatch.put(domainEntryKey, domain.getWriters().getBytes(Charset.forName("UTF-8")));
                writeBatch.put(ownerLookupEntryKey, domain.getWriters().getBytes(Charset.forName("UTF-8")));
            } else {
                writeBatch.put(domainEntryKey, EMPTY_BYTES);
                writeBatch.put(ownerLookupEntryKey, EMPTY_BYTES);
            }

            // Write creation time and modification time
            // We put both timestamps together because they are always retrieved
            // together, and store them in the same way as we did for the entity's
            // start time and insert time.
            domainEntryKey = createDomainEntryKey(domain.getId(), TIMESTAMP_COLUMN);
            ownerLookupEntryKey = createOwnerLookupKey(domain.getOwner(), domain.getId(), TIMESTAMP_COLUMN);
            long currentTimestamp = System.currentTimeMillis();
            byte[] timestamps = db.get(domainEntryKey);
            if (timestamps == null) {
                timestamps = new byte[16];
                writeReverseOrderedLong(currentTimestamp, timestamps, 0);
                writeReverseOrderedLong(currentTimestamp, timestamps, 8);
            } else {
                writeReverseOrderedLong(currentTimestamp, timestamps, 8);
            }
            writeBatch.put(domainEntryKey, timestamps);
            writeBatch.put(ownerLookupEntryKey, timestamps);
            db.write(writeBatch);
        } catch (DBException e) {
            throw new IOException(e);
        } finally {
            IOUtils.cleanup(LOG, writeBatch);
        }
    }

    /**
     * Creates a domain entity key with column name suffix,
     * of the form DOMAIN_ENTRY_PREFIX + domain id + column name.
     */
    private static byte[] createDomainEntryKey(String domainId, byte[] columnName) throws IOException {
        return KeyBuilder.newInstance().add(DOMAIN_ENTRY_PREFIX).add(domainId).add(columnName).getBytes();
    }

    /**
     * Creates an owner lookup key with column name suffix,
     * of the form OWNER_LOOKUP_PREFIX + owner + domain id + column name.
     */
    private static byte[] createOwnerLookupKey(String owner, String domainId, byte[] columnName)
            throws IOException {
        return KeyBuilder.newInstance().add(OWNER_LOOKUP_PREFIX).add(owner).add(domainId).add(columnName)
                .getBytes();
    }

    @Override
    public TimelineDomain getDomain(String domainId) throws IOException {
        LeveldbIterator iterator = null;
        try {
            byte[] prefix = KeyBuilder.newInstance().add(DOMAIN_ENTRY_PREFIX).add(domainId).getBytesForLookup();
            iterator = new LeveldbIterator(db);
            iterator.seek(prefix);
            return getTimelineDomain(iterator, domainId, prefix);
        } catch (DBException e) {
            throw new IOException(e);
        } finally {
            IOUtils.cleanup(LOG, iterator);
        }
    }

    @Override
    public TimelineDomains getDomains(String owner) throws IOException {
        LeveldbIterator iterator = null;
        try {
            byte[] prefix = KeyBuilder.newInstance().add(OWNER_LOOKUP_PREFIX).add(owner).getBytesForLookup();
            List<TimelineDomain> domains = new ArrayList<TimelineDomain>();
            for (iterator = new LeveldbIterator(db), iterator.seek(prefix); iterator.hasNext();) {
                byte[] key = iterator.peekNext().getKey();
                if (!prefixMatches(prefix, prefix.length, key)) {
                    break;
                }
                // Iterator to parse the rows of an individual domain
                KeyParser kp = new KeyParser(key, prefix.length);
                String domainId = kp.getNextString();
                byte[] prefixExt = KeyBuilder.newInstance().add(OWNER_LOOKUP_PREFIX).add(owner).add(domainId)
                        .getBytesForLookup();
                TimelineDomain domainToReturn = getTimelineDomain(iterator, domainId, prefixExt);
                if (domainToReturn != null) {
                    domains.add(domainToReturn);
                }
            }
            // Sort the domains to return
            Collections.sort(domains, new Comparator<TimelineDomain>() {
                @Override
                public int compare(TimelineDomain domain1, TimelineDomain domain2) {
                    int result = domain2.getCreatedTime().compareTo(domain1.getCreatedTime());
                    if (result == 0) {
                        return domain2.getModifiedTime().compareTo(domain1.getModifiedTime());
                    } else {
                        return result;
                    }
                }
            });
            TimelineDomains domainsToReturn = new TimelineDomains();
            domainsToReturn.addDomains(domains);
            return domainsToReturn;
        } catch (DBException e) {
            throw new IOException(e);
        } finally {
            IOUtils.cleanup(LOG, iterator);
        }
    }

    private static TimelineDomain getTimelineDomain(LeveldbIterator iterator, String domainId, byte[] prefix)
            throws IOException {
        // Iterate over all the rows whose key starts with prefix to retrieve the
        // domain information.
        TimelineDomain domain = new TimelineDomain();
        domain.setId(domainId);
        boolean noRows = true;
        for (; iterator.hasNext(); iterator.next()) {
            byte[] key = iterator.peekNext().getKey();
            if (!prefixMatches(prefix, prefix.length, key)) {
                break;
            }
            if (noRows) {
                noRows = false;
            }
            byte[] value = iterator.peekNext().getValue();
            if (value != null && value.length > 0) {
                if (key[prefix.length] == DESCRIPTION_COLUMN[0]) {
                    domain.setDescription(new String(value, Charset.forName("UTF-8")));
                } else if (key[prefix.length] == OWNER_COLUMN[0]) {
                    domain.setOwner(new String(value, Charset.forName("UTF-8")));
                } else if (key[prefix.length] == READER_COLUMN[0]) {
                    domain.setReaders(new String(value, Charset.forName("UTF-8")));
                } else if (key[prefix.length] == WRITER_COLUMN[0]) {
                    domain.setWriters(new String(value, Charset.forName("UTF-8")));
                } else if (key[prefix.length] == TIMESTAMP_COLUMN[0]) {
                    domain.setCreatedTime(readReverseOrderedLong(value, 0));
                    domain.setModifiedTime(readReverseOrderedLong(value, 8));
                } else {
                    LOG.error("Unrecognized domain column: " + key[prefix.length]);
                }
            }
        }
        if (noRows) {
            return null;
        } else {
            return domain;
        }
    }
}