com.jaeksoft.searchlib.crawler.database.DatabaseCrawlMongoDb.java Source code

Java tutorial

Introduction

Here is the source code for com.jaeksoft.searchlib.crawler.database.DatabaseCrawlMongoDb.java

Source

/**   
 * License Agreement for OpenSearchServer
 *
 * Copyright (C) 2010-2014 Emmanuel Keller / Jaeksoft
 * 
 * http://www.open-search-server.com
 * 
 * This file is part of OpenSearchServer.
 *
 * OpenSearchServer is free software: you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 * OpenSearchServer is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with OpenSearchServer. 
 *  If not, see <http://www.gnu.org/licenses/>.
 **/

package com.jaeksoft.searchlib.crawler.database;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.UnknownHostException;
import java.util.Arrays;

import javax.xml.xpath.XPathExpressionException;

import org.apache.commons.lang3.StringUtils;
import org.bson.Document;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;

import com.jaeksoft.searchlib.SearchLibException;
import com.jaeksoft.searchlib.util.Variables;
import com.jaeksoft.searchlib.util.XPathParser;
import com.jaeksoft.searchlib.util.XmlWriter;
import com.mongodb.MongoClient;
import com.mongodb.MongoCredential;
import com.mongodb.ServerAddress;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import com.mongodb.client.MongoIterable;

public class DatabaseCrawlMongoDb extends DatabaseCrawlAbstract {

    private String databaseName;
    private String collectionName;
    private String criteria;
    private String projection;

    public DatabaseCrawlMongoDb(DatabaseCrawlMaster crawlMaster, DatabasePropertyManager propertyManager,
            String name) {
        super(crawlMaster, propertyManager, name);
        databaseName = null;
        collectionName = null;
        criteria = null;
        projection = null;
    }

    public void applyVariables(Variables variables) {
        if (variables == null)
            return;
        databaseName = variables.replace(databaseName);
        collectionName = variables.replace(collectionName);
        criteria = variables.replace(criteria);
        projection = variables.replace(projection);
    }

    public DatabaseCrawlMongoDb(DatabaseCrawlMaster crawlMaster, DatabasePropertyManager propertyManager) {
        this(crawlMaster, propertyManager, null);
    }

    protected DatabaseCrawlMongoDb(DatabaseCrawlMongoDb crawl) {
        super((DatabaseCrawlMaster) crawl.threadMaster, crawl.propertyManager);
        crawl.copyTo(this);
    }

    @Override
    public DatabaseCrawlAbstract duplicate() {
        return new DatabaseCrawlMongoDb(this);
    }

    @Override
    public void copyTo(DatabaseCrawlAbstract crawlAbstract) {
        super.copyTo(crawlAbstract);
        DatabaseCrawlMongoDb crawl = (DatabaseCrawlMongoDb) crawlAbstract;
        crawl.databaseName = this.databaseName;
        crawl.collectionName = this.collectionName;
        crawl.criteria = this.criteria;
        crawl.projection = this.projection;
    }

    @Override
    public DatabaseCrawlEnum getType() {
        return DatabaseCrawlEnum.DB_MONGO_DB;
    }

    protected final static String DBCRAWL_ATTR_DB_NAME = "databaseName";
    protected final static String DBCRAWL_ATTR_COLLECTION_NAME = "collectionName";
    protected final static String DBCRAWL_NODE_NAME_CRITERIA = "criteria";
    protected final static String DBCRAWL_NODE_NAME_PROJECTION = "projection";

    public DatabaseCrawlMongoDb(DatabaseCrawlMaster crawlMaster, DatabasePropertyManager propertyManager,
            XPathParser xpp, Node item) throws XPathExpressionException {
        super(crawlMaster, propertyManager, xpp, item);
        setDatabaseName(XPathParser.getAttributeString(item, DBCRAWL_ATTR_DB_NAME));
        setCollectionName(XPathParser.getAttributeString(item, DBCRAWL_ATTR_COLLECTION_NAME));
        Node sqlNode = xpp.getNode(item, DBCRAWL_NODE_NAME_CRITERIA);
        if (sqlNode != null)
            setCriteria(xpp.getNodeString(sqlNode, true));
        sqlNode = xpp.getNode(item, DBCRAWL_NODE_NAME_PROJECTION);
        if (sqlNode != null)
            setProjection(xpp.getNodeString(sqlNode, true));
    }

    @Override
    public void writeXml(XmlWriter xmlWriter) throws SAXException {
        xmlWriter.startElement(DBCRAWL_NODE_NAME, DBCRAWL_ATTR_NAME, getName(), DBCRAWL_ATTR_TYPE, getType().name(),
                DBCRAWL_ATTR_USER, getUser(), DBCRAWL_ATTR_PASSWORD, getPassword(), DBCRAWL_ATTR_URL, getUrl(),
                DBCRAWL_ATTR_LANG, getLang().getCode(), DBCRAWL_ATTR_BUFFER_SIZE, Integer.toString(getBufferSize()),
                DBCRAWL_ATTR_MSSLEEP, Integer.toString(getMsSleep()), DBCRAWL_ATTR_DB_NAME, getDatabaseName(),
                DBCRAWL_ATTR_COLLECTION_NAME, getCollectionName());
        xmlWriter.startElement(DBCRAWL_NODE_NAME_MAP);
        getFieldMap().store(xmlWriter);
        xmlWriter.endElement();
        String criteria = getCriteria();
        if (!StringUtils.isEmpty(criteria)) {
            xmlWriter.startElement(DBCRAWL_NODE_NAME_CRITERIA);
            xmlWriter.textNode(criteria);
            xmlWriter.endElement();
        }
        String projection = getProjection();
        if (!StringUtils.isEmpty(projection)) {
            xmlWriter.startElement(DBCRAWL_NODE_NAME_PROJECTION);
            xmlWriter.textNode(projection);
            xmlWriter.endElement();
        }
        xmlWriter.endElement();
    }

    /**
     * @return the databaseName
     */
    public String getDatabaseName() {
        return databaseName;
    }

    /**
     * @param databaseName
     *            the databaseName to set
     */
    public void setDatabaseName(String databaseName) {
        this.databaseName = databaseName;
    }

    /**
     * @return the criteria
     */
    public String getCriteria() {
        return criteria;
    }

    /**
     * @param criteria
     *            the criteria to set
     */
    public void setCriteria(String criteria) {
        this.criteria = criteria;
    }

    /**
     * @return the projection
     */
    public String getProjection() {
        return projection;
    }

    /**
     * @param projection
     *            the projection to set
     */
    public void setProjection(String projection) {
        this.projection = projection;
    }

    /**
     * @return the collectionName
     */
    public String getCollectionName() {
        return collectionName;
    }

    /**
     * @param collectionName
     *            the collectionName to set
     */
    public void setCollectionName(String collectionName) {
        this.collectionName = collectionName;
    }

    MongoClient getMongoClient() throws URISyntaxException, UnknownHostException {
        String user = getUser();
        String password = getPassword();
        URI uri = new URI(getUrl());
        MongoCredential credential = null;
        if (!StringUtils.isEmpty(user) && !StringUtils.isEmpty(password)) {
            credential = MongoCredential.createMongoCRCredential(user, databaseName, password.toCharArray());
            return new MongoClient(new ServerAddress(uri.getHost(), uri.getPort()), Arrays.asList(credential));
        }
        return new MongoClient(new ServerAddress(uri.getHost(), uri.getPort()));
    }

    MongoCollection<Document> getCollection(MongoClient mongoClient) throws IOException {
        if (StringUtils.isEmpty(databaseName))
            throw new IOException("No database name.");
        MongoDatabase db = mongoClient.getDatabase(databaseName);
        if (StringUtils.isEmpty(collectionName))
            throw new IOException("No collection name.");
        return db.getCollection(collectionName);
    }

    Document getCriteriaObject() {
        if (StringUtils.isEmpty(criteria))
            return null;
        return Document.parse(criteria);
    }

    Document getProjectionObject() {
        if (StringUtils.isEmpty(projection))
            return null;
        return Document.parse(projection);
    }

    @Override
    public String test() throws Exception {
        URI uri = new URI(getUrl());
        StringBuilder sb = new StringBuilder();
        if (!"mongodb".equals(uri.getScheme()))
            throw new SearchLibException(
                    "Wrong scheme: " + uri.getScheme() + ". The URL should start with: mongodb://");
        MongoClient mongoClient = null;
        try {
            mongoClient = getMongoClient();
            sb.append("Connection established.");
            sb.append(StringUtils.LF);
            if (!StringUtils.isEmpty(databaseName)) {
                MongoDatabase db = mongoClient.getDatabase(databaseName);
                if (db == null)
                    throw new SearchLibException("Database not found: " + databaseName);
                MongoIterable<String> collections = db.listCollectionNames();
                if (collections == null)
                    throw new SearchLibException("No collection found.");
                sb.append("Collections found:");
                sb.append(StringUtils.LF);
                for (String collection : collections) {
                    sb.append(collection);
                    sb.append(StringUtils.LF);
                }
                if (!StringUtils.isEmpty(collectionName)) {
                    MongoCollection<?> dbCollection = db.getCollection(collectionName);
                    if (dbCollection == null)
                        throw new SearchLibException("Collection " + collectionName + " not found.");
                    sb.append(
                            "Collection " + collectionName + " contains " + dbCollection.count() + " document(s).");
                    sb.append(StringUtils.LF);
                    if (!StringUtils.isEmpty(criteria)) {
                        long count = dbCollection.count(getCriteriaObject());
                        sb.append("Query returns " + count + " document(s).");
                        sb.append(StringUtils.LF);
                    }
                }
            }
        } finally {
            if (mongoClient != null)
                mongoClient.close();
        }
        return sb.toString();
    }

}