Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package org.apdplat.search.util.baidu; import java.io.IOException; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.lang.StringUtils; import org.apdplat.demo.search.SearchResult; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * @author JONE * @mail 858305351@qq.com * @time 2013-11-11 * @description Jsoup ??? */ public class JsoupBaiduInfoUtil { private static final Logger LOG = LoggerFactory.getLogger(JsoupBaiduInfoUtil.class); private Document document = null; private SearchResult baiduModels = new SearchResult(); private String url = "http://www.baidu.com/s"; /** * ?13,100 */ private static final String cssQuery = "html body div#out div#in div#wrapper div#container p#page span.nums"; /** * ? */ String titleCssQuery = "html body div#out div#in div#wrapper div#container div#content_left table#" + "tableNum" + ".result tbody tr td.c-default h3.t a"; /** * ? */ String summaryCssQuery = "html body div#out div#in div#wrapper div#container div#content_left table#" + "tableNum" + ".result tbody tr td.c-default div.c-abstract"; /** * @author JONE * @param name ? * @param page * @throws java.io.IOException * @time 2013-11-11 * @description */ public JsoupBaiduInfoUtil(String name, int page) throws IOException { if (StringUtils.isEmpty(StringUtils.trim(name)) || 0 >= page) { throw new NullPointerException(); } this.document = Jsoup.connect(url).data("wd", name).data("pn", String.valueOf((page - 1) * 10)).get(); } /** * @author JONE * @return String * @time 2013-11-11 * @description ??13100 */ public String getResultsCount() { String resultsCountText = this.getResultsCountText(); if (StringUtils.isEmpty(StringUtils.trim(resultsCountText))) { return ""; } String regEx = "[^0-9]"; Pattern p = Pattern.compile(regEx); Matcher m = p.matcher(resultsCountText); String totalCount = m.replaceAll("").trim(); baiduModels.setTotal(Integer.parseInt(totalCount)); return totalCount; } /** * @author JONE * @return String * @time 2013-11-11 * @description ??13,100 */ public String getResultsCountText() { if (null == document) { return ""; } LOG.debug("total cssQuery: " + cssQuery); Element totalElement = document.select(cssQuery).first(); String totalText = totalElement.text(); LOG.info("?" + totalText); return totalText; } }