org.apdplat.search.util.baidu.JsoupBaiduInfoUtil.java Source code

Java tutorial

Introduction

Here is the source code for org.apdplat.search.util.baidu.JsoupBaiduInfoUtil.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */

package org.apdplat.search.util.baidu;

import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringUtils;
import org.apdplat.demo.search.SearchResult;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * @author JONE
 * @mail 858305351@qq.com
 * @time 2013-11-11
 * @description Jsoup ???
 */
public class JsoupBaiduInfoUtil {
    private static final Logger LOG = LoggerFactory.getLogger(JsoupBaiduInfoUtil.class);
    private Document document = null;
    private SearchResult baiduModels = new SearchResult();
    private String url = "http://www.baidu.com/s";
    /**
     * ?13,100
     */
    private static final String cssQuery = "html body div#out div#in div#wrapper div#container p#page span.nums";
    /**
     * ?
     */
    String titleCssQuery = "html body div#out div#in div#wrapper div#container div#content_left table#" + "tableNum"
            + ".result tbody tr td.c-default h3.t a";
    /**
     * ?
     */
    String summaryCssQuery = "html body div#out div#in div#wrapper div#container div#content_left table#"
            + "tableNum" + ".result tbody tr td.c-default div.c-abstract";

    /**
     * @author JONE
     * @param name ?
     * @param page
     * @throws java.io.IOException 
     * @time 2013-11-11
     * @description 
     */
    public JsoupBaiduInfoUtil(String name, int page) throws IOException {
        if (StringUtils.isEmpty(StringUtils.trim(name)) || 0 >= page) {
            throw new NullPointerException();
        }
        this.document = Jsoup.connect(url).data("wd", name).data("pn", String.valueOf((page - 1) * 10)).get();
    }

    /**
    * @author JONE
    * @return String
    * @time 2013-11-11
    * @description ??13100
    */
    public String getResultsCount() {
        String resultsCountText = this.getResultsCountText();
        if (StringUtils.isEmpty(StringUtils.trim(resultsCountText))) {
            return "";
        }
        String regEx = "[^0-9]";
        Pattern p = Pattern.compile(regEx);
        Matcher m = p.matcher(resultsCountText);
        String totalCount = m.replaceAll("").trim();
        baiduModels.setTotal(Integer.parseInt(totalCount));
        return totalCount;
    }

    /**
     * @author JONE
     * @return String
     * @time 2013-11-11
     * @description ??13,100
     */
    public String getResultsCountText() {
        if (null == document) {
            return "";
        }
        LOG.debug("total cssQuery: " + cssQuery);
        Element totalElement = document.select(cssQuery).first();
        String totalText = totalElement.text();
        LOG.info("?" + totalText);
        return totalText;
    }
}