io.mandrel.document.impl.MongoDocumentStore.java Source code

Java tutorial

Introduction

Here is the source code for io.mandrel.document.impl.MongoDocumentStore.java

Source

/*
 * Licensed to Mandrel under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Mandrel licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package io.mandrel.document.impl;

import io.mandrel.common.service.TaskContext;
import io.mandrel.data.content.DataExtractor;
import io.mandrel.document.Document;
import io.mandrel.document.NavigableDocumentStore;

import java.io.IOException;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map.Entry;
import java.util.function.Function;
import java.util.stream.Collectors;

import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.experimental.Accessors;

import com.fasterxml.jackson.annotation.JsonProperty;
import com.mongodb.MongoClient;
import com.mongodb.MongoClientOptions;
import com.mongodb.MongoClientURI;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoCursor;
import com.mongodb.client.model.Filters;
import com.mongodb.client.model.ReplaceOneModel;
import com.mongodb.client.model.UpdateOptions;

@Data
@EqualsAndHashCode(callSuper = false)
@Accessors(chain = true, fluent = true)
public class MongoDocumentStore extends NavigableDocumentStore {

    @Data
    @Accessors(chain = false, fluent = false)
    @EqualsAndHashCode(callSuper = false)
    public static class MongoDocumentStoreDefinition extends DocumentStoreDefinition<MongoDocumentStore> {

        private static final long serialVersionUID = -9205125497698919267L;

        @JsonProperty("uri")
        private String uri = "mongodb://localhost";
        @JsonProperty("database")
        private String database = "mandrel";
        @JsonProperty("collection")
        private String collection = "document_{0}";
        @JsonProperty("batch_size")
        private int batchSize = 1000;

        @Override
        public String name() {
            return "mongo";
        }

        @Override
        public MongoDocumentStore build(TaskContext context) {
            MongoClientOptions.Builder options = MongoClientOptions.builder();
            // TODO options.description("");
            MongoClientURI uri = new MongoClientURI(this.uri, options);
            return new MongoDocumentStore(context, dataExtractor, new MongoClient(uri), database,
                    MessageFormat.format(collection, context.getSpiderId()), batchSize);
        }
    }

    private final MongoClient mongoClient;
    private final MongoCollection<org.bson.Document> collection;
    private final int batchSize;

    private final static Function<? super Document, ? extends org.bson.Document> toBson = entry -> {
        org.bson.Document document = new org.bson.Document();
        document.putAll(entry);
        document.put("_id", entry.getId());
        return document;
    };

    @SuppressWarnings("unchecked")
    private final static Function<? super org.bson.Document, ? extends Document> fromBson = entry -> {
        Document document = new Document();
        for (Entry<String, Object> item : entry.entrySet()) {
            document.put(item.getKey(), (List<? extends Object>) item.getValue());
        }
        document.setId(entry.getString("_id"));
        return document;
    };

    public MongoDocumentStore(TaskContext context, DataExtractor metadataExtractor, MongoClient mongoClient,
            String databaseName, String collectionName, int batchSize) {
        super(context, metadataExtractor);
        this.mongoClient = mongoClient;
        this.collection = mongoClient.getDatabase(databaseName).getCollection(collectionName);
        this.batchSize = batchSize;
    }

    @Override
    public void save(Document data) {
        if (data != null) {
            collection.insertOne(toBson.apply(data));
        }
    }

    @Override
    public void save(List<Document> data) {
        if (data != null) {
            List<ReplaceOneModel<org.bson.Document>> updates = data.stream().map(toBson)
                    .map(doc -> new ReplaceOneModel<org.bson.Document>(Filters.eq("_id", doc.getString("_id")), doc,
                            new UpdateOptions().upsert(true)))
                    .collect(Collectors.toList());
            collection.bulkWrite(updates);
        }
    }

    @Override
    public boolean check() {
        // TODO
        return true;
    }

    @Override
    public void deleteAll() {
        collection.drop();
    }

    @Override
    public void byPages(int pageSize, Callback callback) {
        MongoCursor<org.bson.Document> cursor = collection.find().iterator();
        boolean loop = true;
        try {
            while (loop) {
                List<org.bson.Document> docs = new ArrayList<>(batchSize);
                int i = 0;
                while (cursor.hasNext() && i < batchSize) {
                    docs.add(cursor.next());
                    i++;
                }
                loop = callback.on(docs.stream().map(fromBson).collect(Collectors.toList()));
            }
        } finally {
            cursor.close();
        }
    }

    @Override
    public long total() {
        return collection.count();
    }

    @Override
    public Collection<Document> byPages(int pageSize, int pageNumber) {
        MongoCursor<org.bson.Document> cursor = collection.find().skip(pageSize * pageNumber).limit(pageSize)
                .iterator();
        List<org.bson.Document> docs = new ArrayList<>(10);
        while (cursor.hasNext()) {
            docs.add(cursor.next());
        }
        return docs.stream().map(fromBson).collect(Collectors.toList());
    }

    @Override
    public void init() {
    }

    @Override
    public void close() throws IOException {
        mongoClient.close();
    }
}