com.ycchung.crawler.Database.java Source code

Java tutorial

Introduction

Here is the source code for com.ycchung.crawler.Database.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package com.ycchung.crawler;

import java.net.URL;
import com.mongodb.BasicDBObject;
import com.mongodb.BulkWriteOperation;
import com.mongodb.BulkWriteResult;
import com.mongodb.Cursor;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.MongoClient;
import com.mongodb.ParallelScanOptions;
import java.net.UnknownHostException;
import java.util.Date;

import java.util.List;
import java.util.Set;

import static java.util.concurrent.TimeUnit.SECONDS;

/**
 *
 * @author turtlepool
 */
public class Database {

    //mongo Database object
    private MongoClient mongoClient = null;
    private DB database = null;
    private DBCollection coll = null;

    /**
     * private constructor for singleton
     */
    public Database(String host, int port, String DB, String collection) {
        try {
            //connect to database
            mongoClient = new MongoClient(host, port);
        } catch (UnknownHostException ex) {
            ex.printStackTrace();
            return;
        }
        database = mongoClient.getDB(DB);
        coll = database.getCollection(collection);
    }

    public void close() {
        mongoClient.close();
    }

    public boolean update(URL url, String content) {
        if (!isVisited(url)) {
            BasicDBObject doc = new BasicDBObject("url", url.toString()).append("type", url.getProtocol())
                    .append("tag", "html").append("content", content).append("time", new Date().getTime());
            coll.insert(doc);
            return true;
        }
        return false;
    }

    public boolean isVisited(URL url) {
        BasicDBObject query = new BasicDBObject("url", url.toString());
        DBCursor cursor = coll.find(query);
        try {
            while (cursor.hasNext()) {
                return true;
            }
        } finally {
            cursor.close();
        }
        return false;
    }

    public long count() {
        BasicDBObject query = new BasicDBObject("tag", "html");
        return coll.count(query);
    }

    public void show() {
        BasicDBObject query = new BasicDBObject("tag", "html");
        DBCursor cursor = coll.find(query);
        try {
            while (cursor.hasNext()) {
                System.out.println(cursor.next());
            }
        } finally {
            cursor.close();
        }
    }

    public void clear() {
        BasicDBObject query = new BasicDBObject("tag", "html");
        DBCursor cursor = coll.find(query);
        try {
            while (cursor.hasNext()) {
                coll.remove(cursor.next());
            }
        } finally {
            cursor.close();
        }
    }
}