v7db.files.mongodb.MongoContentStorage.java Source code

Java tutorial

Introduction

Here is the source code for v7db.files.mongodb.MongoContentStorage.java

Source

/**
 * Copyright (c) 2012, Thilo Planz. All rights reserved.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

package v7db.files.mongodb;

import static v7db.files.mongodb.QueryUtils._ID;

import java.io.IOException;
import java.io.InputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.bson.BSONObject;

import v7db.files.Compression;
import v7db.files.MapUtils;
import v7db.files.ZipFile;
import v7db.files.spi.Content;
import v7db.files.spi.ContentConcatenation;
import v7db.files.spi.ContentPointer;
import v7db.files.spi.ContentSHA;
import v7db.files.spi.ContentStorage;
import v7db.files.spi.GzippedContent;
import v7db.files.spi.InlineContent;
import v7db.files.spi.OffsetAndLength;
import v7db.files.spi.StorageScheme;
import v7db.files.spi.StoredContent;

import com.mongodb.BasicDBObject;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.DBObject;
import com.mongodb.WriteConcern;
import com.mongodb.gridfs.GridFS;

/**
 * ContentStorage implementation that uses MongoDB documents.
 * 
 * <ul>
 * <li>The <code>_id</code> field is the content SHA-1 digest (20 bytes of
 * binary data)
 * <li>"Small" content (that does not need to be chunked) is stored in the
 * binary field <code>in</code>.
 * <li>If the data can be compressed using gzip, it will be stored in compressed
 * form as <code>zin</code>. This mode is indicated by setting the value
 * <code>gz</code> for the <code>store</code> field. The uncompressed length is
 * given in the <code>length</code> field.
 * <li>"Large" content is stored as the concatenation of chunks stored
 * out-of-band (in other documents). For very large documents this can also
 * become nested.
 * <li>Other types of "out-of-band" storage schemes are possible and can be
 * provided by extension code.
 * </ul>
 * 
 * @see https://github.com/thiloplanz/v7files/wiki/StorageFormat
 * 
 * 
 */

public class MongoContentStorage implements ContentStorage {

    private static final int chunkSize = GridFS.DEFAULT_CHUNKSIZE;

    private final DBCollection contentCollection;

    private final Map<String, StorageScheme> storageSchemes = new HashMap<String, StorageScheme>();

    public final static String DEFAULT_CONTENT_COLLECTION_NAME = "v7files.content";

    public MongoContentStorage(DB db) {
        this(db.getCollection(DEFAULT_CONTENT_COLLECTION_NAME));
    }

    public MongoContentStorage(DBCollection contentCollection) {
        this.contentCollection = contentCollection;
        storageSchemes.put("gz", new GzippedContent());
        storageSchemes.put("cat", new ContentConcatenation());
        storageSchemes.put("zip", new ZipFile.ContentFromZipFile());
    }

    public Content getContent(byte[] sha) throws IOException {
        return getContent(contentCollection.findOne(sha));
    }

    public ContentSHA findContentPointerByPrefix(byte[] shaPrefix) throws IOException {
        if (shaPrefix.length == 20) {
            DBObject file = contentCollection.findOne(shaPrefix);
            if (file == null)
                return null;
            Content c = getContent(file);
            return ContentSHA.forDigestAndLength(shaPrefix, c.getLength());
        }

        if (shaPrefix.length > 20)
            throw new IllegalArgumentException();

        byte[] lower = Arrays.copyOf(shaPrefix, 20); // 0-padded
        byte[] higher = Arrays.copyOf(shaPrefix, 20); // FF-padded
        for (int i = shaPrefix.length; i < higher.length; i++) {
            higher[i] = (byte) 0xFF;
        }
        List<DBObject> files = contentCollection.find(QueryUtils.between(_ID, lower, higher), new BasicDBObject())
                .limit(2).toArray();
        if (files.isEmpty())
            return null;
        if (files.size() == 1) {
            Content c = getContent(files.get(0));
            return ContentSHA.forDigestAndLength((byte[]) files.get(0).get(_ID), c.getLength());
        }
        throw new IllegalArgumentException(Hex.encodeHexString(shaPrefix) + " is not a unique SHA prefix");
    }

    public Content findContentByPrefix(byte[] shaPrefix) throws IOException {
        if (shaPrefix.length == 20)
            return getContent(shaPrefix);
        if (shaPrefix.length > 20)
            throw new IllegalArgumentException();

        byte[] lower = Arrays.copyOf(shaPrefix, 20); // 0-padded
        byte[] higher = Arrays.copyOf(shaPrefix, 20); // FF-padded
        for (int i = shaPrefix.length; i < higher.length; i++) {
            higher[i] = (byte) 0xFF;
        }

        List<DBObject> files = contentCollection.find(QueryUtils.between(_ID, lower, higher), new BasicDBObject())
                .limit(2).toArray();
        if (files.isEmpty())
            return null;
        if (files.size() == 1)
            return getContent(files.get(0));
        throw new IllegalArgumentException(Hex.encodeHexString(shaPrefix) + " is not a unique SHA prefix");
    }

    public Content getContent(ContentPointer pointer) throws IOException {
        if (pointer == null)
            return null;
        if (pointer instanceof InlineContent)
            return (Content) pointer;

        if (pointer instanceof ContentSHA) {
            ContentSHA p = (ContentSHA) pointer;
            byte[] sha = p.getSHA();

            Content base = getContent(sha);
            if (base == null)
                throw new IllegalArgumentException("base SHA not found: " + Hex.encodeHexString(sha));

            return base;

        }

        if (pointer instanceof StoredContent) {
            StoredContent p = (StoredContent) pointer;
            byte[] sha = p.getBaseSHA();

            Content base = getContent(sha);
            if (base == null)
                throw new IllegalArgumentException("base SHA not found: " + Hex.encodeHexString(sha));

            if (p.getLength() != base.getLength()) {
                return new OffsetAndLength(base, 0, p.getLength());
            }

            return base;

        }
        throw new IllegalArgumentException(pointer.getClass().toString());

    }

    @SuppressWarnings("unchecked")
    private Content getContent(BSONObject data) throws IOException {
        if (data == null)
            return null;
        data.removeField("_id");
        String store = BSONUtils.getString(data, "store");
        if (store == null || "raw".equals(store)) {
            return InlineContent.deserialize(data.toMap());
        }
        StorageScheme s = storageSchemes.get(store);
        if (s != null)
            return s.getContent(this, data.toMap());
        throw new UnsupportedOperationException(store);
    }

    /**
     * read into the buffer, continuing until the stream is finished or the
     * buffer is full.
     * 
     * @return the number of bytes read, which could be 0 (not -1)
     * @throws IOException
     */
    private static int readFully(InputStream data, byte[] buffer) throws IOException {
        int read = data.read(buffer);
        if (read == -1) {
            return 0;
        }
        while (read < buffer.length) {
            int added = data.read(buffer, read, buffer.length - read);
            if (added == -1)
                return read;
            read += added;
        }
        return read;
    }

    public ContentSHA storeContent(InputStream data) throws IOException {
        try {
            MessageDigest completeSHA = MessageDigest.getInstance("SHA");
            long completeLength = 0;
            byte[] chunk = new byte[chunkSize];
            int read;
            List<ContentSHA> chunks = new ArrayList<ContentSHA>();

            while (0 < (read = readFully(data, chunk))) {
                completeSHA.update(chunk, 0, read);
                completeLength += read;
                chunks.add(storeContentChunk(chunk, 0, read));
            }
            if (chunks.isEmpty())
                return storeContentChunk(ArrayUtils.EMPTY_BYTE_ARRAY, 0, 0);

            if (chunks.size() == 1)
                return chunks.get(0);

            List<Map<String, Object>> bases = new ArrayList<Map<String, Object>>(chunks.size());
            for (ContentSHA c : chunks) {
                bases.add(c.serialize());
            }
            ContentSHA result = ContentSHA.forDigestAndLength(completeSHA.digest(), completeLength);
            long existing = contentCollection.count(new BasicDBObject(_ID, result.getSHA()));
            if (existing == 0) {
                contentCollection.insert(
                        new BasicDBObject(_ID, result.getSHA()).append("store", "cat").append("base", bases),
                        WriteConcern.SAFE);
            }
            return result;
        } catch (NoSuchAlgorithmException e) {
            throw new RuntimeException(e);
        } finally {
            IOUtils.closeQuietly(data);
        }

    }

    private ContentSHA storeContentChunk(byte[] bytes, final int offset, final int length) throws IOException {
        ContentSHA _sha = ContentSHA.calculate(bytes, offset, length);
        byte[] sha = _sha.getSHA();

        long existing = contentCollection.count(new BasicDBObject(_ID, sha));
        if (existing == 0) {
            byte[] gzipped = Compression.gzip(bytes, offset, length);
            if (gzipped != null && gzipped.length > chunkSize)
                gzipped = null;
            if (gzipped != null) {
                bytes = null;
                contentCollection.insert(new BasicDBObject(_ID, sha).append("zin", gzipped).append("store", "gz"),
                        WriteConcern.SAFE);
                gzipped = null;
            } else {
                if (offset > 0 || bytes.length != length) {
                    bytes = ArrayUtils.subarray(bytes, offset, offset + length);
                }
                contentCollection.insert(new BasicDBObject(_ID, sha).append("in", bytes), WriteConcern.SAFE);
            }
        }
        return _sha;
    }

    public ContentPointer storeContent(Map<String, Object> storageScheme) throws IOException {
        StorageScheme s = storageSchemes.get(storageScheme.get("store"));
        if (s == null)
            throw new UnsupportedOperationException(storageScheme.toString());

        DBObject x = new BasicDBObject();
        for (Map.Entry<String, Object> e : storageScheme.entrySet()) {
            x.put(e.getKey(), e.getValue());
        }
        long length = BSONUtils.getRequiredLong(x, "length");
        byte[] sha = DigestUtils.sha(s.getContent(this, storageScheme).getInputStream());

        long existing = contentCollection.count(new BasicDBObject(_ID, sha));
        if (existing == 0) {
            x.put(_ID, sha);
            contentCollection.insert(x, WriteConcern.SAFE);
        }
        return new StoredContent(sha, length);
    }

    /**
     * Supported formats: 1) Serialized ContentPointers, e.g.
     * 
     * <pre>
     * { in: [bytes] }
     * </pre>
     * 
     * and
     * 
     * <pre>
     * { sha: <sha>, length: 123 }
     * </pre>
     * 
     * 2) Internal StorageScheme representations (must have {store: something}")
     */
    public Content getContent(Map<String, Object> data) throws IOException {
        if (data == null)
            return null;
        String store = MapUtils.getString(data, "store");
        if (store == null || "raw".equals(store)) {
            if (data.containsKey("in"))
                return InlineContent.deserialize(data);
            if (data.containsKey("sha")) {
                return new StoredContent((byte[]) data.get("sha"), MapUtils.getRequiredLong(data, "length"))
                        .loadOrLazyLoad(this, 8 * 1024);
            }
            throw new UnsupportedOperationException(data.toString());
        }

        StorageScheme s = storageSchemes.get(store);
        if (s == null)
            throw new UnsupportedOperationException(store);

        return s.getContent(this, data);
    }
}