Example usage for com.mongodb MongoClient MongoClient

List of usage examples for com.mongodb MongoClient MongoClient

Introduction

In this page you can find the example usage for com.mongodb MongoClient MongoClient.

Prototype

public MongoClient(final MongoClientURI uri, final MongoDriverInformation mongoDriverInformation) 

Source Link

Document

Creates a Mongo described by a URI.

Usage

From source file:cloud.simple.RuleEngineApplication.java

@Bean
public MongoDatabase dataSource() {
    String servers = env.getProperty("spring.data.mongodb.custom.service");
    String databaseName = env.getProperty("spring.data.mongodb.database");

    List<ServerAddress> seeds = new ArrayList<ServerAddress>();
    String[] servers1 = servers.split(",");
    for (String server : servers1) {
        String[] server1 = server.split(":");
        seeds.add(new ServerAddress(server1[0], Integer.parseInt(server1[1])));
    }//  w  w w . j av  a  2s. c  om

    Builder builder = MongoClientOptions.builder();
    builder.socketKeepAlive(true);
    builder.readPreference(ReadPreference.secondaryPreferred());
    MongoClientOptions options = builder.build();

    @SuppressWarnings("resource")
    MongoClient mongoClient = new MongoClient(seeds, options);
    return mongoClient.getDatabase(databaseName);
}

From source file:cn.edu.hfut.dmic.webcollector.example.DemoSelenium.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Executor executor = new Executor() {
        @Override//from w w w.j ava  2 s  .co  m
        public void execute(CrawlDatum datum, CrawlDatums next) throws Exception {
            MongoClient mongoClient = new MongoClient("localhost", 27017);
            // ?
            // DBCollection dbCollection = mongoClient.getDB("maoyan_crawler").getCollection("rankings_am"); 
            DB db = mongoClient.getDB("maoyan_crawler");
            // ?????
            Set<String> colls = db.getCollectionNames();
            for (String s : colls) {
                // Collection(?"")
                if (s.equals("attend_rate")) {
                    db.getCollection(s).drop();
                }
            }
            DBCollection dbCollection = db.getCollection("attend_rate");
            HtmlUnitDriver driver = new HtmlUnitDriver();
            driver.setJavascriptEnabled(false);
            driver.get(datum.getUrl());
            System.out.println(driver.getPageSource());
            WebElement click_view = driver.findElement(By.xpath("//div[@id='seatContent']//span[1]"));
            click_view.click();
            String gold_seat = driver.getWindowHandle();
            driver.switchTo().window(gold_seat);
            System.out.println(driver.getPageSource());
            WebElement city_name = driver.findElement(By.xpath("//*[@id='all-citys']/div[1]/ul/li[1]/a"));
            System.out.println(city_name.getText());
            WebElement element = driver.findElementByCssSelector("div#seat_table");
            List<WebElement> movie_name = element.findElements(By.className("c1 lineDot"));
            List<WebElement> boxoffice_rate = element.findElements(By.className("c2 red"));
            List<WebElement> visit_pershow = element.findElements(By.className("c3 gray"));
            WebElement cityarea = driver.findElementByCssSelector("span[class='today']");
            System.out.println(cityarea.getText());
            for (int i = 0; i < movie_name.size(); i++) {
                System.out.println(movie_name.get(i).getText());
                System.out.println(boxoffice_rate.get(i).getText());
                System.out.println(visit_pershow.get(i).getText());
                BasicDBObject dbObject = new BasicDBObject();
                dbObject.append("title", cityarea.getText()).append("movie_name", movie_name.get(i).getText())
                        .append("boxoffice_rate", boxoffice_rate.get(i).getText())
                        .append("visit_pershow", visit_pershow.get(i).getText());
                dbCollection.insert(dbObject);
            }
            mongoClient.close();
        }
    };

    //DBDBManager
    DBManager manager = new BerkeleyDBManager("crawl");
    //Crawler?DBManagerExecutor
    Crawler crawler = new Crawler(manager, executor);
    crawler.addSeed("http://pf.maoyan.com/attend/rate");
    crawler.start(1);
}

From source file:cn.edu.hfut.dmic.webcollector.example.Douban2Crawler.java

License:Open Source License

@Override
public void visit(Page page, CrawlDatums next) {
    System.out.println(page.getUrl());
    MongoClient mongoClient = new MongoClient("localhost", 27017);
    // ?/*ww w . j  a  va2s .  c o m*/
    MongoDatabase mongoDatabase = mongoClient.getDatabase("douban_crawler");
    System.out.println("Connect to database successfully");
    MongoCollection<org.bson.Document> collection = mongoDatabase.getCollection("moviereview");
    List<Document> documents = new ArrayList<Document>();
    if (page.matchUrl("https://movie.douban.com/review/\\d*/")) {
        index = index + 1;
        //String title = page.select("div[class=article_title]").first().text();
        //String author = page.select("div[id=blog_userface]").first().text();
        //System.out.println("title:" + title + "\tauthor:" + author);
        //HashMap md = page.getMetaData();
        //String review = page.getMetaData("og:description").toString();
        //Document doc = page.getDoc();
        //System.out.println(doc);
        System.out.println("index:" + index);
        String review = page.select("div.main-bd").text();
        System.out.println("review:" + review);
        String movieName = page.select("div.side-back").text();
        System.out.println("movieName:" + movieName);
        ArrayList rc = page.select("div.comment-item");
        Document document = new Document();
        document.put("moviename", movieName);
        document.put("review", review);
        List<String> subreviews = new ArrayList<String>();
        for (int i = 0; i < rc.size(); i++) {
            //System.out.println(rc.get(i).getClass());
            //((org.jsoup.nodes.Element)rc.get(i)).getAllElements().get(3);
            org.jsoup.nodes.Element element = (org.jsoup.nodes.Element) ((org.jsoup.nodes.Element) ((org.jsoup.nodes.Element) rc
                    .get(i)).childNodes().get(3)).childNode(3);
            //Object nodes = ((org.jsoup.nodes.Element)((org.jsoup.nodes.Element)rc.get(i)).childNodes()).getAllElements().get(3);
            String subReview = element.childNode(0).toString();
            System.out.println("subReview:" + subReview);
            if (subReview != "")
                subreviews.add(subReview);
        }
        document.put("subreviews", subreviews);
        collection.insertOne(document);
        //documents.add(document);
        //collection.insertMany(documents);
    } else if (page.matchUrl("https://movie.douban.com/review/best/\\?start=\\d*")) {
        //Document doc = page.getDoc();
        System.out.println("add seed");
        //this.addSeed(page.getUrl());
    }
}

From source file:cn.edu.hfut.dmic.webcollector.example.FirefoxSelenium2.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Executor executor = new Executor() {
        @Override//  ww  w . j a  va  2s.co  m
        public void execute(CrawlDatum datum, CrawlDatums next) throws Exception {
            MongoClient mongoClient = new MongoClient("localhost", 27017);
            // ?
            // DBCollection dbCollection = mongoClient.getDB("maoyan_crawler").getCollection("rankings_am"); 
            DB db = mongoClient.getDB("maoyan_crawler");
            // ?????
            Set<String> colls = db.getCollectionNames();
            for (String s : colls) {
                // Collection(?"")
                if (s.equals("rankings_am")) {
                    db.getCollection(s).drop();
                }
            }
            DBCollection dbCollection = db.getCollection("attend_rate");
            ProfilesIni pi = new ProfilesIni();
            FirefoxProfile profile = pi.getProfile("default");
            WebClient webClient = new WebClient(BrowserVersion.FIREFOX_38);
            //             driver.setJavascriptEnabled(false);
            webClient.getOptions().setCssEnabled(true);
            HtmlPage page = webClient.getPage(datum.getUrl());
            //                System.out.println(driver.getPageSource());
            //                System.out.println(page.getByXPath("//div[@id='seat_table']//ul//li[@class='c1 lineDot']/text()"));

            System.out.println(page.getByXPath("//span[@class='today']/em/text()"));
            System.out.println(page.getByXPath("//span[@class='today']/text()"));
            List<?> movie_name = page.getByXPath("//div[@id='seat_table']//ul//li[@class='c1 lineDot']/text()");
            List<?> boxoffice_rate = page.getByXPath("//div[@id='seat_table']//ul//li[@class='c2 red']/text()");
            List<?> visit_pershow = page.getByXPath("//div[@id='seat_table']//ul//li[@class='c3 gray']/text()");
            for (int i = 0; i < movie_name.size(); i++) {
                System.out.println(movie_name.get(i));
                System.out.println(boxoffice_rate.get(i));
                System.out.println(visit_pershow.get(i));
            }
            //                   BasicDBObject dbObject = new BasicDBObject();
            //                  dbObject.append("title", title).append("rank", amList.get(0)).append("mov_cnname", cn_name).append("mov_enname", en_name).append("toweek_rev", amList.get(2)).append("total_rev", amList.get(3)).append("val_week", amList.get(4));
            //                   dbCollection.insert(dbObject);
            webClient.closeAllWindows();

        }
    };

    //DBDBManager
    DBManager manager = new BerkeleyDBManager("crawl");
    //Crawler?DBManagerExecutor
    Crawler crawler = new Crawler(manager, executor);
    crawler.addSeed("http://pf.maoyan.com/attend/rate");
    crawler.start(1);
}

From source file:cn.edu.hfut.dmic.webcollector.example.FirefoxSelenium3.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Executor executor = new Executor() {
        @Override/*from  w  ww .j  ava2  s  .co m*/
        public void execute(CrawlDatum datum, CrawlDatums next) throws Exception {
            MongoClient mongoClient = new MongoClient("localhost", 27017);
            // ?
            // DBCollection dbCollection = mongoClient.getDB("maoyan_crawler").getCollection("rankings_am"); 
            DB db = mongoClient.getDB("maoyan_crawler");
            // ?????
            Set<String> colls = db.getCollectionNames();
            for (String s : colls) {
                // Collection(?"")
                if (s.equals("attend_rate")) {
                    db.getCollection(s).drop();
                }
            }
            DBCollection dbCollection = db.getCollection("attend_rate");
            ProfilesIni pi = new ProfilesIni();
            FirefoxProfile profile = pi.getProfile("default");
            WebDriver driver = new FirefoxDriver(profile);
            driver.manage().window().maximize();
            driver.manage().timeouts().pageLoadTimeout(3, TimeUnit.SECONDS);
            //                driver.setJavascriptEnabled(false);
            driver.get(datum.getUrl());
            //                System.out.println(driver.getPageSource());
            driver.findElement(By.xpath("//*[@id='seat_city']")).click();
            driver.switchTo().window(driver.getWindowHandle());

            int city_num = driver.findElements(By.xpath("//div[@id='all-citys']/div/ul/li/a")).size();
            for (int i = 0; i < city_num; i++) {
                System.out.println("A city chosen" + i);
                System.out.println(
                        driver.findElements(By.xpath("//div[@id='all-citys']/div/ul/li/a")).get(i).getText());
                String city = driver.findElements(By.xpath("//div[@id='all-citys']/div/ul/li/a")).get(i)
                        .getText();
                ((JavascriptExecutor) driver).executeScript("arguments[0].scrollIntoView(true);",
                        driver.findElements(By.xpath("//div[@id='all-citys']/div/ul/li/a")).get(i));
                ((JavascriptExecutor) driver).executeScript("window.scrollBy(0, -250)", "");
                Thread.sleep(1000);
                new Actions(driver)
                        .moveToElement(
                                driver.findElements(By.xpath("//div[@id='all-citys']/div/ul/li/a")).get(i))
                        .click().perform();
                driver.switchTo().window(driver.getWindowHandle());
                //                System.out.println(driver.findElement(By.xpath("//span[@class='today']/em")).getText());
                System.out.println(driver.findElement(By.xpath("//span[@class='today']")).getText());
                for (int j = 0; j < driver
                        .findElements(By.xpath("//div[@id='seat_table']//ul//li[@class='c1 lineDot']"))
                        .size(); j++) {
                    System.out.println(driver
                            .findElements(By.xpath("//div[@id='seat_table']//ul//li[@class='c1 lineDot']"))
                            .get(j).getText());
                    System.out.println(
                            driver.findElements(By.xpath("//div[@id='seat_table']//ul//li[@class='c2 red']"))
                                    .get(j).getText());
                    System.out.println(
                            driver.findElements(By.xpath("//div[@id='seat_table']//ul//li[@class='c3 gray']"))
                                    .get(j).getText());
                    BasicDBObject dbObject = new BasicDBObject();
                    dbObject.append("title", driver.findElement(By.xpath("//span[@class='today']")).getText())
                            .append("city", city)
                            .append("mov_cnname",
                                    driver.findElements(
                                            By.xpath("//div[@id='seat_table']//ul//li[@class='c1 lineDot']"))
                                            .get(j).getText())
                            .append("boxoffice_rate", driver
                                    .findElements(By.xpath("//div[@id='seat_table']//ul//li[@class='c2 red']"))
                                    .get(j).getText())
                            .append("visit_pershow", driver
                                    .findElements(By.xpath("//div[@id='seat_table']//ul//li[@class='c3 gray']"))
                                    .get(j).getText());
                    dbCollection.insert(dbObject);
                }
                System.out.println("new city list to choose");
                new Actions(driver).moveToElement(driver.findElement(By.xpath("//*[@id='seat_city']"))).click()
                        .perform();
                driver.switchTo().window(driver.getWindowHandle());
                Thread.sleep(500);
            }
            driver.close();
            driver.quit();
            mongoClient.close();
        }
    };

    //DBDBManager
    DBManager manager = new BerkeleyDBManager("crawl");
    //Crawler?DBManagerExecutor
    Crawler crawler = new Crawler(manager, executor);
    crawler.addSeed("http://pf.maoyan.com/attend/rate");
    crawler.start(1);
}

From source file:cn.edu.hfut.dmic.webcollector.example.FirefoxSelenium4.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Executor executor = new Executor() {
        @Override/*from w  w  w  . j  av  a2 s. c  o  m*/
        public void execute(CrawlDatum datum, CrawlDatums next) throws Exception {
            MongoClient mongoClient = new MongoClient("localhost", 27017);
            // ?
            // DBCollection dbCollection = mongoClient.getDB("maoyan_crawler").getCollection("rankings_am"); 
            DB db = mongoClient.getDB("maoyan_crawler");
            // ?????
            Set<String> colls = db.getCollectionNames();
            for (String s : colls) {
                // Collection(?"")
                if (s.equals("rankings_am")) {
                    db.getCollection(s).drop();
                }
            }
            DBCollection dbCollection = db.getCollection("attend_rate");
            ProfilesIni pi = new ProfilesIni();
            FirefoxProfile profile = pi.getProfile("default");
            WebDriver driver = new FirefoxDriver(profile);
            driver.manage().timeouts().pageLoadTimeout(3, TimeUnit.SECONDS);
            //                driver.setJavascriptEnabled(false);
            driver.get(datum.getUrl());
            //                System.out.println(driver.getPageSource());
            List<WebElement> movie_name = driver
                    .findElements(By.xpath("//div[@id='seat_table']//ul//li[@class='c1 lineDot']"));
            List<WebElement> boxoffice_rate = driver
                    .findElements(By.xpath("//div[@id='seat_table']//ul//li[@class='c2 red']"));
            List<WebElement> visit_pershow = driver
                    .findElements(By.xpath("//div[@id='seat_table']//ul//li[@class='c3 gray']"));
            WebElement title = driver.findElement(By.xpath("//span[@class='today']/em"));
            WebElement title2 = driver.findElement(By.xpath("//span[@class='today']"));
            System.out.println(title.getText());
            System.out.println(title.getText());
            for (int i = 0; i < movie_name.size(); i++) {
                System.out.println(movie_name.get(i).getText());
                System.out.println(boxoffice_rate.get(i).getText());
                System.out.println(visit_pershow.get(i).getText());
                //                   BasicDBObject dbObject = new BasicDBObject();
                //                  dbObject.append("title", title).append("rank", amList.get(0)).append("mov_cnname", cn_name).append("mov_enname", en_name).append("toweek_rev", amList.get(2)).append("total_rev", amList.get(3)).append("val_week", amList.get(4));
                //                   dbCollection.insert(dbObject);
            }
            driver.quit();

        }
    };

    //DBDBManager
    DBManager manager = new BerkeleyDBManager("crawl");
    //Crawler?DBManagerExecutor
    Crawler crawler = new Crawler(manager, executor);
    crawler.addSeed("http://pf.maoyan.com/attend/rate");
    crawler.start(1);
}

From source file:cn.edu.hfut.dmic.webcollector.example.TutorialCrawler.java

License:Open Source License

@Override
public void visit(Page page, CrawlDatums next) {
    if (page.matchUrl("http://pf.maoyan.com/rankings/america.*")) {
        MongoClient mongoClient = new MongoClient("localhost", 27017);
        // ?//  w  w  w .  j  a va  2  s .  c o  m
        // DBCollection dbCollection = mongoClient.getDB("maoyan_crawler").getCollection("rankings_am"); 
        DB db = mongoClient.getDB("maoyan_crawler");
        // ?????
        Set<String> colls = db.getCollectionNames();
        for (String s : colls) {
            // Collection(?"")
            if (s.equals("rankings_am")) {
                db.getCollection(s).drop();
            }
        }
        DBCollection dbCollection = db.getCollection("rankings_am");
        String title = page.select("span[id=year-box]").text();
        Elements table = page.select("table[id=na-list]");
        Elements data_set = table.select("tr");
        List amList = new ArrayList();
        for (Element id : data_set) {
            Elements tds = id.select("td");
            for (Element td : tds) {
                amList.add(td.text());
            }
            String en_name = tds.select("p[class=first-line]").text();
            String cn_name = tds.select("p[class=second-line]").text();
            if (amList.size() > 0) {
                System.out.println(amList);
                BasicDBObject dbObject = new BasicDBObject();
                dbObject.append("title", title).append("rank", amList.get(0)).append("mov_cnname", cn_name)
                        .append("mov_enname", en_name).append("toweek_rev", amList.get(2))
                        .append("total_rev", amList.get(3)).append("val_week", amList.get(4));
                amList.removeAll(amList);
                dbCollection.insert(dbObject);
            }
        }
        mongoClient.close();
    } else if (page.matchUrl("http://pf.maoyan.com/rankings/day.*")) {
        MongoClient mongoClient2 = new MongoClient("localhost", 27017);
        // ?
        DB db = mongoClient2.getDB("maoyan_crawler");
        // ?????
        Set<String> colls = db.getCollectionNames();
        for (String s : colls) {
            // Collection(?"")
            if (s.equals("rankings_day")) {
                db.getCollection(s).drop();
            }
        }
        DBCollection dbCollection2 = db.getCollection("rankings_day");
        String title = page.select("span[id=year-box]").text();
        String update_time = page.select("span[id=update-time]").text();
        title = title + update_time;
        System.out.println(title);
        Elements data_set = page.select("tr");
        List dayList = new ArrayList();
        for (Element id : data_set) {
            Elements tds = id.select("td");
            for (Element td : tds) {
                dayList.add(td.text());
            }
            if (dayList.size() > 0) {
                System.out.println(dayList);
                BasicDBObject dbObject = new BasicDBObject();
                dbObject.append("title", title).append("rank", dayList.get(0))
                        .append("mov_name", dayList.get(1)).append("today_rev", dayList.get(2))
                        .append("date", dayList.get(3)).append("val_week", dayList.get(4));
                dayList.removeAll(dayList);
                dbCollection2.insert(dbObject);
            }
        }
        mongoClient2.close();
    } else if (page.matchUrl("http://pf.maoyan.com/rankings/market.*")) {
        MongoClient mongoClient3 = new MongoClient("localhost", 27017);
        // ?
        DB db = mongoClient3.getDB("maoyan_crawler");
        // ?????
        Set<String> colls = db.getCollectionNames();
        for (String s : colls) {
            // Collection(?"")
            if (s.equals("rankings_market")) {
                db.getCollection(s).drop();
            }
        }
        DBCollection dbCollection3 = db.getCollection("rankings_market");
        String title = page.select("span[id=year-box]").text();
        String update_time = page.select("span[id=update-time]").text();
        title = title + update_time;
        System.out.println(title);
        Elements data_set = page.select("tr");
        List dayList = new ArrayList();
        for (Element id : data_set) {
            Elements tds = id.select("td");
            for (Element td : tds) {
                dayList.add(td.text());
            }
            if (dayList.size() > 0) {
                System.out.println(dayList);
                BasicDBObject dbObject = new BasicDBObject();
                dbObject.append("title", title).append("rank", dayList.get(0)).append("date", dayList.get(1))
                        .append("today_rev", dayList.get(2)).append("total_sessions", dayList.get(3))
                        .append("total_visit_count", dayList.get(4));
                dayList.removeAll(dayList);
                dbCollection3.insert(dbObject);
            }
        }
        mongoClient3.close();
    } else if (page.matchUrl("http://pf.maoyan.com/rankings/year.*")) {
        MongoClient mongoClient4 = new MongoClient("localhost", 27017);
        // ?
        DB db = mongoClient4.getDB("maoyan_crawler");
        // ?????
        Set<String> colls = db.getCollectionNames();
        for (String s : colls) {
            // Collection(?"")
            if (s.equals("rankings_year")) {
                db.getCollection(s).drop();
            }
        }
        DBCollection dbCollection4 = db.getCollection("rankings_year");
        String title = page.select("span[id=year-box]").text();
        String update_time = page.select("span[id=update-time]").text();
        title = title + update_time;
        System.out.println(title);
        Elements table = page.select("div[id=ranks-list]");
        //             System.out.println(table);
        Elements data_set = table.select("ul[class=row]");
        //             System.out.println(data_set);
        List dayList = new ArrayList();
        for (Element id : data_set) {
            Elements lis = id.select("li");
            for (Element li : lis) {
                dayList.add(li.text());
            }
            String cn_name = lis.select("p[class=first-line]").text();
            String release_date = lis.select("p[class=second-line]").text();
            if (dayList.size() > 0) {
                System.out.println(dayList);
                BasicDBObject dbObject = new BasicDBObject();
                dbObject.append("title", title).append("rank", dayList.get(0)).append("name", cn_name)
                        .append("release date", release_date).append("year_rev", dayList.get(2))
                        .append("avg_price", dayList.get(3)).append("avg_visit_count", dayList.get(4));
                dayList.removeAll(dayList);
                dbCollection4.insert(dbObject);
            }
        }
        mongoClient4.close();
    } else if (page.matchUrl("http://pf.maoyan.com/")) {
        MongoClient mongoClient5 = new MongoClient("localhost", 27017);
        // ?
        DB db = mongoClient5.getDB("maoyan_crawler");
        // ?????
        Set<String> colls = db.getCollectionNames();
        for (String s : colls) {
            // Collection(?"")
            if (s.equals("main_page")) {
                db.getCollection(s).drop();
            }
        }
        DBCollection dbCollection5 = db.getCollection("main_page");
        String title = page.select("span[id=dayStr]").text();
        String box_type = page.select("span[id=box-type]").text();
        String ticket_count = page.select("span[id=ticket_count]").text();
        box_type = box_type + ticket_count;
        System.out.println(title + "\n" + box_type);
        Elements table = page.select("div[id=ticket_tbody]");
        //System.out.println(table);
        Elements data_set = table.select("ul");
        //System.out.println(data_set);
        List dayList = new ArrayList();
        for (Element id : data_set) {
            Elements lis = id.select("li");
            for (Element li : lis) {
                dayList.add(li.text());
            }
            //           System.out.println(lis);
            String cn_name = lis.select("b").first().text();
            String comment = lis.select("em").text();
            //         System.out.println(cn_name+ "\n" +comment);
            if (dayList.size() > 0) {
                System.out.println(dayList);
                BasicDBObject dbObject = new BasicDBObject();
                dbObject.append("title", title).append("box_type", box_type).append("name", cn_name)
                        .append("comment", comment).append("realtime_rev", dayList.get(1))
                        .append("rev_percent", dayList.get(2)).append("schedule_percent", dayList.get(3))
                        .append("total_rev", dayList.get(4));
                dayList.removeAll(dayList);
                dbCollection5.insert(dbObject);
            }
        }
        mongoClient5.close();
    }
}

From source file:cn.edu.hfut.dmic.webcollector.example.WeiboCrawler.java

License:Open Source License

@Override
public void visit(Page page, CrawlDatums next) {
    int pageNum = Integer.valueOf(page.getMetaData("pageNum"));
    /*??*//*  w w  w  . ja  va2 s. co  m*/
    Elements weibos = page.select("div.c");
    try {
        MongoClient mongoClient = new MongoClient("localhost", 27017);

        // ?
        MongoDatabase mongoDatabase = mongoClient.getDatabase("weibo_crawler");
        System.out.println("Connect to database successfully");
        MongoCollection<Document> collection = mongoDatabase.getCollection("webpage");
        //?  
        /** 
        * 1.  org.bson.Document ?key-value? 
        * 2. ?List<Document> 
        * 3. ???? mongoCollection.insertMany(List<Document>) ??? mongoCollection.insertOne(Document) 
        * */
        for (Element weibo : weibos) {
            Document document = new Document("content", "" + pageNum + "" + ":" + weibo.text());
            List<Document> documents = new ArrayList<Document>();
            documents.add(document);
            collection.insertMany(documents);
        }
        System.out.println("??");
        mongoClient.close();
    } catch (Exception e) {
        System.err.println(e.getClass().getName() + ": " + e.getMessage());
    }
}

From source file:cn.edu.hfut.dmic.webcollector.example.WeiboCrawler.java

License:Open Source License

public static void main(String[] args) throws Exception {
    MongoClient mongoClient = new MongoClient("localhost", 27017);
    WeiboCrawler crawler = new WeiboCrawler("weibo_crawler", mongoClient, false);
    crawler.setThreads(3);/*  w w  w . j  av a  2  s .  com*/
    /*???5?*/
    for (int i = 1; i <= 5; i++) {
        crawler.addSeed(
                new CrawlDatum("http://weibo.cn/zhouhongyi?vt=4&page=" + i).putMetaData("pageNum", i + ""));
    }
    crawler.start(1);
}

From source file:cn.edu.hfut.dmic.webcollector.lazy.util.MongoHelper.java

License:Open Source License

public MongoHelper(String ip, int port, String dbName, String collectionName) {
    client = new MongoClient(ip, port);
    db = client.getDatabase(dbName);//from w  w w.  j a v a  2  s .co  m
    collection = db.getCollection(collectionName);
}