com.github.jillesvangurp.persistentcachingmap.PersistentCachingMap.java Source code

Java tutorial

Introduction

Here is the source code for com.github.jillesvangurp.persistentcachingmap.PersistentCachingMap.java

Source

/**
 * Copyright (c) 2013, Jilles van Gurp
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
package com.github.jillesvangurp.persistentcachingmap;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.nio.charset.Charset;
import java.util.Collection;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentSkipListSet;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

import org.apache.commons.lang.StringUtils;

import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.cache.RemovalListener;
import com.google.common.cache.RemovalNotification;
import com.jillesvangurp.iterables.Iterables;
import com.jillesvangurp.iterables.LineIterable;
import com.jillesvangurp.iterables.ProcessingIterable;
import com.jillesvangurp.iterables.Processor;

/**
 * <p>
 * Persistent caching map for working with large map like data structures that do not fit into memory. Like any map,
 * this map uses buckets to store things. Each bucket is persisted in a file and has an id that is a long. To persist
 * and calculate the bucket ids from the key, a {@link PersistentCachingMapCodec} instance is used. An instance of this
 * codec must be provided at construction time.
 * </p>
 *
 * <p>
 * This is not a proper database and it does not make guarantees with respect to the traditional ACID properties. In
 * particular, changes are not written right away, which means you risk data loss in the case of e.g. a crash.
 * </p>
 *
 * <p>
 * Some of the map operations throw UnsupportedOperations because the operation would be too expensive. So, don't use
 * anything that returns a set, size, and containsValue.
 * </p>
 *
 * <p>
 * To support iterating all entries, this class implements Iterable.
 * </p>
 *
 * @param <Key>
 * @param <Value>
 */
public class PersistentCachingMap<Key, Value>
        implements Iterable<Map.Entry<Key, Value>>, Map<Key, Value>, Closeable {
    private final PersistentCachingMapCodec<Key, Value> codec;
    private final LoadingCache<Long, Bucket> cache;
    private final Set<Long> bucketIds = new ConcurrentSkipListSet<>();
    private final IdLock idLock = new IdLock();

    private final String dataDir;

    public PersistentCachingMap(String dataDir, PersistentCachingMapCodec<Key, Value> codec, long cacheSize) {
        this.codec = codec;
        this.dataDir = dataDir;
        CacheLoader<Long, Bucket> loader = new CacheLoader<Long, Bucket>() {
            @Override
            public Bucket load(Long id) throws Exception {
                Bucket bucket = new Bucket(id);
                idLock.acquire(id);
                try {
                    bucket.read();
                } finally {
                    idLock.release(id);
                }
                return bucket;
            }
        };
        cache = CacheBuilder.newBuilder().maximumSize(cacheSize)
                .removalListener(new RemovalListener<Long, Bucket>() {
                    @Override
                    public void onRemoval(RemovalNotification<Long, Bucket> notification) {
                        Long bucketId = notification.getKey();
                        // make sure changed buckets are written on eviction
                        idLock.acquire(bucketId);
                        try {
                            notification.getValue().write();
                        } finally {
                            idLock.release(bucketId);
                        }
                    }
                }).build(loader);
        readBucketIds();
    }

    @SuppressWarnings("unchecked")
    // Map.get is unchecked
    @Override
    public Value get(Object key) {
        try {
            long bucketId = codec.bucketId((Key) key);
            if (bucketIds.contains(bucketId)) {
                Value value = cache.get(bucketId).get((Key) key);
                // return a clone,  this ensures people don't accidentally modify objects in the map.
                if (value != null) {
                    return codec.deserializeValue(codec.serializeValue(value));
                }
            }
        } catch (ExecutionException e) {
            e.printStackTrace();
            return null;
        }
        return null;
    }

    @Override
    public Value put(Key key, Value value) {
        try {
            long bucketId = codec.bucketId(key);
            synchronized (this) {
                if (!bucketIds.contains(bucketId)) {
                    bucketIds.add(bucketId);
                }
                Bucket bucket = cache.get(bucketId);
                bucket.put(key, value);
            }
            return value;
        } catch (ExecutionException e) {
            throw new IllegalStateException(e);
        }
    }

    @SuppressWarnings("unchecked")
    // Map.remove is unchecked
    @Override
    public Value remove(Object key) {
        try {
            long bucketId = codec.bucketId((Key) key);
            synchronized (this) {
                if (!bucketIds.contains(bucketId)) {
                    bucketIds.add(bucketId);
                }
                Bucket bucket = cache.get(bucketId);
                return bucket.remove((Key) key);
            }
        } catch (ExecutionException e) {
            throw new IllegalStateException(e);
        }
    }

    @Override
    public void clear() {
        for (Long bucketId : bucketIds) {
            try {
                cache.get(bucketId).clear();
            } catch (ExecutionException e) {
                throw new IllegalStateException(e);
            }
        }
    }

    void writeBucketIds() {
        if (bucketIds.size() > 0) {
            File path = new File(dataDir, "bucketIds.gz");
            File dir = path.getParentFile();
            if (!dir.exists()) {
                if (!dir.mkdirs()) {
                    throw new IllegalStateException("could not create directory " + dir);
                }
            }

            try (BufferedWriter out = gzipFileWriter(path)) {
                for (Long id : bucketIds) {
                    out.write("" + id + "\n");
                }
            } catch (IOException e) {
                throw new IllegalStateException(e);
            }
        }
    }

    void readBucketIds() {
        File bucketIdFile = new File(dataDir, "bucketIds.gz");
        if (bucketIdFile.exists()) {
            try (LineIterable iterable = new LineIterable(gzipFileReader(bucketIdFile))) {
                for (String line : iterable) {
                    bucketIds.add(Long.valueOf(line.trim()));
                }
            } catch (IOException e) {
                throw new IllegalStateException(e);
            }
        }
    }

    public void flush() {
        cache.invalidateAll();
        writeBucketIds();
    }

    File bucketPath(long id) {
        String paddedId = StringUtils.leftPad("" + id, 10, '0');
        String d1 = paddedId.substring(0, 3);
        String d2 = paddedId.substring(3, 6);

        return new File(dataDir, d1 + "/" + d2 + "/" + paddedId);
    }

    @Override
    public void close() throws IOException {
        flush();
    }

    @Override
    public int size() {
        throw new UnsupportedOperationException(
                "This would be expensive and Map.size returns an int instead of a long.");
    }

    @Override
    public boolean isEmpty() {
        return bucketIds.size() == 0;
    }

    @Override
    public boolean containsKey(Object key) {
        return get(key) != null;
    }

    @Override
    public boolean containsValue(Object value) {
        throw new UnsupportedOperationException("This would be expensive");
    }

    @Override
    public void putAll(Map<? extends Key, ? extends Value> m) {
        for (Entry<? extends Key, ? extends Value> e : m.entrySet()) {
            put(e.getKey(), e.getValue());
        }

    }

    @Override
    public Set<Key> keySet() {
        throw new UnsupportedOperationException("This would be expensive");
    }

    @Override
    public Collection<Value> values() {
        throw new UnsupportedOperationException("This would be expensive");
    }

    @Override
    public Set<java.util.Map.Entry<Key, Value>> entrySet() {
        throw new UnsupportedOperationException("This would be expensive");
    }

    @Override
    public Iterator<java.util.Map.Entry<Key, Value>> iterator() {
        Processor<Long, Iterable<Entry<Key, Value>>> processor = new Processor<Long, Iterable<Entry<Key, Value>>>() {

            @Override
            public Iterable<java.util.Map.Entry<Key, Value>> process(Long bucketId) {
                try {
                    return cache.get(bucketId);
                } catch (ExecutionException e) {
                    throw new IllegalStateException(e);
                }
            }
        };
        ProcessingIterable<Long, Iterable<Map.Entry<Key, Value>>> iterables = new ProcessingIterable<Long, Iterable<Map.Entry<Key, Value>>>(
                bucketIds.iterator(), processor);
        return Iterables.compose(iterables).iterator();
    }

    // private static BufferedWriter bzipFileWriter(File file) throws IOException {
    // return new BufferedWriter(new OutputStreamWriter(new BZip2CompressorOutputStream(new FileOutputStream(file)),
    // Charset.forName("utf-8")),64*1024);
    // }
    //
    // private static BufferedReader bzipFileReader(File file) throws IOException {
    // return new BufferedReader(new InputStreamReader(new BZip2CompressorInputStream(new
    // FileInputStream(file)),Charset.forName("utf-8")));
    // }
    private static BufferedWriter gzipFileWriter(File file) throws IOException {
        return new BufferedWriter(
                new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(file)), Charset.forName("utf-8")),
                64 * 1024);
    }

    private static BufferedReader gzipFileReader(File file) throws IOException {
        return new BufferedReader(
                new InputStreamReader(new GZIPInputStream(new FileInputStream(file)), Charset.forName("utf-8")));
    }

    class IdLock {
        private final Set<Long> ids = new ConcurrentSkipListSet<>();
        Lock bucketLock = new ReentrantLock();

        public void acquire(Long id) {
            while (true) {
                try {
                    bucketLock.lock();
                    if (!ids.contains(id)) {
                        ids.add(id);
                        return;
                    }
                } finally {
                    bucketLock.unlock();
                }
            }
        }

        public void release(Long id) {
            try {
                bucketLock.lock();
                ids.remove(id);
            } finally {
                bucketLock.unlock();
            }

        }
    }

    public class Bucket implements Iterable<Map.Entry<Key, Value>> {
        long id;
        AtomicBoolean changed = new AtomicBoolean(false);

        Map<Key, Value> map = new ConcurrentHashMap<>();

        public Bucket(long id) {
            this.id = id;
        }

        public void clear() {
            map.clear();
        }

        public Value get(Key key) {
            return map.get(key);
        }

        public void put(Key key, Value value) {
            map.put(key, value);
            changed.set(true);
        }

        public Value remove(Key key) {
            changed.set(true);
            return map.remove(key);
        }

        public void write() {
            if (changed.get()) {
                // prevent somebody reading a partially written bucket by synchronizing on the cache
                // need more clever synchronization
                File path = bucketPath(id);
                if (map.isEmpty() && path.exists()) {
                    // map is empty, remove the file if it existed and delete the bucket from the bucketIds
                    if (!path.delete()) {
                        throw new IllegalStateException("could not delete " + path);
                    }
                    bucketIds.remove(id);
                } else {
                    File dir = path.getParentFile();
                    if (!dir.exists()) {
                        if (!dir.mkdirs()) {
                            throw new IllegalStateException("could not create directory " + dir);
                        }
                    }
                    try (BufferedWriter out = gzipFileWriter(path)) {
                        for (Entry<Key, Value> e : map.entrySet()) {
                            out.write("" + codec.serializeKey(e.getKey()) + ";" + codec.serializeValue(e.getValue())
                                    + "\n");
                        }
                    } catch (IOException e) {
                        throw new IllegalStateException("could not write file " + path, e);
                    }
                }
            }
        }

        public void read() {
            File path = bucketPath(id);
            if (path.exists()) {
                try (LineIterable iterable = new LineIterable(gzipFileReader(path))) {
                    for (String line : iterable) {
                        int idx = line.indexOf(';');
                        Key key = codec.deserializeKey(line.substring(0, idx));
                        map.put(key, codec.deserializeValue(line.substring(idx + 1)));
                    }
                } catch (IOException e) {
                    throw new IllegalStateException("could not read file " + path, e);
                }
            }
        }

        public long size() {
            return map.size();
        }

        @Override
        public Iterator<Entry<Key, Value>> iterator() {
            new ProcessingIterable<>(map.entrySet().iterator(),
                    new Processor<Entry<Key, Value>, Entry<Key, Value>>() {

                        @Override
                        public Entry<Key, Value> process(final Entry<Key, Value> input) {

                            return new Entry<Key, Value>() {

                                @Override
                                public Key getKey() {
                                    return input.getKey();
                                }

                                @Override
                                public Value getValue() {
                                    // return a clone
                                    return codec.deserializeValue(codec.serializeValue(input.getValue()));
                                }

                                @Override
                                public Value setValue(Value value) {
                                    throw new UnsupportedOperationException("immutable");
                                }
                            };
                        }
                    });

            return map.entrySet().iterator();
        }
    }
}