List of usage examples for org.jsoup.nodes Element select
public Elements select(String cssQuery)
From source file:com.storm.function.GsxtFunction.java
private Map<String, Object> getHtmlInfoMapOfShandong(String area, HtmlPage firstInfoPage, String keyword, ChannelLogger LOGGER) throws Exception { Map<String, Object> resultHtmlMap = new LinkedHashMap<String, Object>(); //DomElement checkcode = firstInfoPage.getElementById("checkNo"); WebWindow window = firstInfoPage.getWebClient().getCurrentWindow(); //???/* w ww . j av a 2 s . c om*/ HtmlElement divByXPathyzm = (HtmlElement) firstInfoPage .getFirstByXPath("//div[@class='input-center3']/font"); if (divByXPathyzm != null) { resultHtmlMap.put("statusCodeDef", StatusCodeDef.IMAGECODE_ERROR); } else { HtmlElement divByXPath = ((HtmlElement) firstInfoPage.getFirstByXPath("//div[@class='list']")); //? if (divByXPath != null) { resultHtmlMap.put("statusCodeDef", StatusCodeDef.SCCCESS); } else { resultHtmlMap.put("statusCodeDef", StatusCodeDef.NO_DATA_FOUND); } } @SuppressWarnings("unchecked") List<HtmlAnchor> anchors = (List<HtmlAnchor>) firstInfoPage.getByXPath("//div[@class='list']/ul/li/a"); LOGGER.info(anchors.toString()); if (anchors != null && !anchors.isEmpty()) { boolean matchFlag = false; for (HtmlAnchor anchor : anchors) { String anchorTitle = anchor.getTextContent().toString().trim(); if (anchorTitle.contains(keyword)) { //???? matchFlag = true; //??? HtmlElement target_item_info = (HtmlElement) anchor.getParentNode().getParentNode(); resultHtmlMap.put("target_item_info", target_item_info.asXml()); Elements e1 = Jsoup.parseBodyFragment(target_item_info.asXml()).getElementsByClass("font16"); Element element1 = e1.get(0); Element aElement = element1.select("a").get(0); //String hrefElement = getElementAttr(aElement, "href"); String hrefElement = aElement.hasAttr("href") ? aElement.attr("href") : ""; String encrpripid = hrefElement.substring(hrefElement.lastIndexOf("/") + 1); String enttype = hrefElement.split("/")[1]; String gsurl = "http://218.57.139.24/pub/" + hrefElement; //System.out.println(hrefElement); String mystr = hrefElement.split("gsgsdetail")[1]; //System.out.println(mystr); //?? ?->? // HtmlPage qyxx_page = anchor.click(); if (!qyxx_page.asXml().contains("?")) { return resultHtmlMap; } resultHtmlMap.put("qyxx_gsgsxx", qyxx_page.asXml()); String[] command = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/getSimpleRequestPage.js", "--web-security=no", "--url=" + gsurl }; String qyxx_gsgsxx_djxx = CommandUtil.runCommand(command); resultHtmlMap.put("qyxx_gsgsxx_djxx", qyxx_gsgsxx_djxx); //? ?->?->??var url = webroot+"pub/gsczxx"; WebRequest czxxWebRequest = new WebRequest( new URL("http://218.57.139.24/pub/gsryxx/" + enttype + "?encrpripid=" + encrpripid), HttpMethod.POST); DomElement csrfMetaEle = qyxx_page.getFirstByXPath("//meta[@name='_csrf']"); String csrfToken = ""; if (csrfMetaEle != null) { csrfToken = csrfMetaEle.getAttribute("content"); } czxxWebRequest.setAdditionalHeader("X-CSRF-TOKEN", csrfToken); //window, new WebRequest(new URL( Page zyryxxPage = qyxx_page.getWebClient().getPage(window, czxxWebRequest); resultHtmlMap.put("qyxx_gsgsxx_baxx_zyryxx", zyryxxPage.getWebResponse().getContentAsString()); //System.out.println(zyryxxPage.getWebResponse().getContentAsString()); //? ?->?-> WebRequest czxxWebRequestfzjg = new WebRequest( new URL("http://218.57.139.24/pub/gsfzjg/" + enttype + "?encrpripid=" + encrpripid), HttpMethod.POST); czxxWebRequestfzjg.setAdditionalHeader("X-CSRF-TOKEN", csrfToken); // Page czxxPage = qyxx_page.getWebClient().getPage(window, czxxWebRequestfzjg); resultHtmlMap.put("qyxx_gsgsxx_baxx_fzjgxx", czxxPage.getWebResponse().getContentAsString()); //System.out.println(czxxPage.getWebResponse().getContentAsString()); //? ?->? WebRequest czxxWebRequestdcdydjxx = new WebRequest( new URL("http://218.57.139.24/pub/gsdcdy?encrpripid=" + encrpripid), HttpMethod.POST); czxxWebRequestdcdydjxx.setAdditionalHeader("X-CSRF-TOKEN", csrfToken); // Page czxxPagedcdydjxx = qyxx_page.getWebClient().getPage(window, czxxWebRequestdcdydjxx); resultHtmlMap.put("qyxx_gsgsxx_dcdydjxx", czxxPagedcdydjxx.getWebResponse().getContentAsString()); //System.out.println(czxxPage.getWebResponse().getContentAsString()); //? ?->?? WebRequest czxxWebRequestgqczdjxx = new WebRequest( new URL("http://218.57.139.24/pub/gsgqcz?encrpripid=" + encrpripid), HttpMethod.POST); czxxWebRequestgqczdjxx.setAdditionalHeader("X-CSRF-TOKEN", csrfToken); // Page czxxPagegqczdjxx = qyxx_page.getWebClient().getPage(window, czxxWebRequestdcdydjxx); resultHtmlMap.put("qyxx_gsgsxx_gqczdjxx", czxxPagegqczdjxx.getWebResponse().getContentAsString()); //System.out.println(czxxPage.getWebResponse().getContentAsString()); //? ?->? WebRequest czxxWebRequestxzcfxx = new WebRequest( new URL("http://218.57.139.24/pub/gsxzcfxx?encrpripid=" + encrpripid), HttpMethod.POST); czxxWebRequestxzcfxx.setAdditionalHeader("X-CSRF-TOKEN", csrfToken); // Page czxxPagegxzcfxx = qyxx_page.getWebClient().getPage(window, czxxWebRequestxzcfxx); resultHtmlMap.put("qyxx_gsgsxx_xzcfxx", czxxPagegxzcfxx.getWebResponse().getContentAsString()); //? ?->???var url = webroot+"pub/jyyc/"+enttype; WebRequest czxxWebRequestjyjcxx = new WebRequest( new URL("http://218.57.139.24/pub/jyyc/" + enttype + "?encrpripid=" + encrpripid), HttpMethod.POST); czxxWebRequestjyjcxx.setAdditionalHeader("X-CSRF-TOKEN", csrfToken); // Page czxxPagegjyjcxx = qyxx_page.getWebClient().getPage(window, czxxWebRequestjyjcxx); resultHtmlMap.put("qyxx_gsgsxx_jyjcxx", czxxPagegjyjcxx.getWebResponse().getContentAsString()); //System.out.println( czxxPagegjyjcxx.getWebResponse().getContentAsString()); //? ?->???var url = webroot+"pub/yzwfqy"; WebRequest czxxWebRequestyzwfxx = new WebRequest( new URL("http://218.57.139.24/pub/yzwfqy?encrpripid=" + encrpripid), HttpMethod.POST); czxxWebRequestyzwfxx.setAdditionalHeader("X-CSRF-TOKEN", csrfToken); // Page czxxPagegyzwfxx = qyxx_page.getWebClient().getPage(window, czxxWebRequestyzwfxx); resultHtmlMap.put("qyxx_gsgsxx_yzwfxx", czxxPagegyzwfxx.getWebResponse().getContentAsString()); //? ?->? var url = webroot+"pub/ccjcxx"; WebRequest czxxWebRequestcxjcxx = new WebRequest( new URL("http://218.57.139.24/pub/ccjcxx?encrpripid=" + encrpripid), HttpMethod.POST); czxxWebRequestcxjcxx.setAdditionalHeader("X-CSRF-TOKEN", csrfToken); // Page czxxPagegcxjcxx = qyxx_page.getWebClient().getPage(window, czxxWebRequestyzwfxx); resultHtmlMap.put("qyxx_gsgsxx_cxjcxx", czxxPagegcxjcxx.getWebResponse().getContentAsString()); //???? // HtmlElement qyxx_qygsxx = (HtmlElement)qyxx_page.getByXPath("//div[@id='leftTabs']/ul/li").get(1); // HtmlPage qygsxx_page = (HtmlPage)qyxx_qygsxx.click(); // resultHtmlMap.put("qygsxx", qygsxx_page.asXml()); // System.out.println(qygsxx_page.asXml()); //http://218.57.139.24/pub/qygsdetail/1100/2396ed6cd3e0e1a30bc8098cadaef458e48f827ea3353ac3b826876e37a1ca6f String gsgsxx_sfxzgsxx_url_hqqygsxx = "http://218.57.139.24/pub/qygsdetail" + mystr; HtmlPage qygsxx_page = firstInfoPage.getWebClient().getPage(window, new WebRequest(new URL(gsgsxx_sfxzgsxx_url_hqqygsxx))); resultHtmlMap.put("qygsxx", qygsxx_page.asXml()); // String[] command2 = {"casperjs", "/home/ubuntu/nfs-images/casperjscode/getSimpleRequestPage.js", "--web-security=no", "--url="+gsgsxx_sfxzgsxx_url_hqqygsxx}; // String qygsxx = CommandUtil.runCommand(command2); // resultHtmlMap.put("qygsxx", qygsxx); //<a href="http://218.57.139.24/pub/nb/detail/1100/0453801D2A010583E05012AC9E011868" target="_blank"> List<HtmlAnchor> anchors_detail = (List<HtmlAnchor>) qygsxx_page .getByXPath("//div[@id='qiyenianbao']/table[@class='detailsList']/tbody/tr/td/a"); List<String> nbxx_list = new ArrayList<String>(); for (HtmlAnchor htmlAnchor : anchors_detail) { String attribute = htmlAnchor.getAttribute("href"); String nburldetail = "http://218.57.139.24" + attribute; String[] command2 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/getSimpleRequestPage.js", "--web-security=no", "--url=" + nburldetail }; String nianbaodetail = CommandUtil.runCommand(command2); //HtmlPage nb_detail = htmlAnchor.click(); nbxx_list.add(nianbaodetail); } resultHtmlMap.put("qygsxx_qynb_detail", nbxx_list); //??->??->?? var url = webroot+"pub/qygsjsxxxzczxx"; WebRequest czxxWebRequestgdjczxx = new WebRequest( new URL("http://218.57.139.24/pub/qygsjsxxxzczxx?encrpripid=" + encrpripid), HttpMethod.POST); DomElement csrfMetaEle2 = qygsxx_page.getFirstByXPath("//meta[@name='_csrf']"); String csrfToken2 = ""; if (csrfMetaEle2 != null) { csrfToken2 = csrfMetaEle.getAttribute("content"); } czxxWebRequestgdjczxx.setAdditionalHeader("X-CSRF-TOKEN", csrfToken2); // Page czxxPageggdjczxx = qyxx_page.getWebClient().getPage(window, czxxWebRequestgdjczxx); resultHtmlMap.put("qyxx_qygsxx_gdjczxx", czxxPageggdjczxx.getWebResponse().getContentAsString()); //??->??->?? var url = webroot+"pub/qygsjsxxczxxbgsx"; WebRequest czxxWebRequestbgxx = new WebRequest( new URL("http://218.57.139.24/pub/qygsjsxxczxxbgsx?encrpripid=" + encrpripid), HttpMethod.POST); czxxWebRequestbgxx.setAdditionalHeader("X-CSRF-TOKEN", csrfToken2); // Page czxxPagegbgxx = qyxx_page.getWebClient().getPage(window, czxxWebRequestgdjczxx); resultHtmlMap.put("qyxx_qygsxx_gdjczxx_bgxx", czxxPagegbgxx.getWebResponse().getContentAsString()); //??->??? var url = webroot+"pub/qygsJsxxgqbg"; WebRequest czxxWebRequestgqbgxx = new WebRequest( new URL("http://218.57.139.24/pub/qygsJsxxgqbg?encrpripid=" + encrpripid), HttpMethod.POST); czxxWebRequestgqbgxx.setAdditionalHeader("X-CSRF-TOKEN", csrfToken2); // Page czxxPageggqbgxx = qyxx_page.getWebClient().getPage(window, czxxWebRequestgqbgxx); resultHtmlMap.put("qyxx_qygsxx_gqbgxx", czxxPageggqbgxx.getWebResponse().getContentAsString()); //??->?? var url = webroot+"pub/qygsjsxxxzxk"; WebRequest czxxWebRequestxzxkxx = new WebRequest( new URL("http://218.57.139.24/pub/qygsjsxxxzxk?encrpripid=" + encrpripid), HttpMethod.POST); czxxWebRequestxzxkxx.setAdditionalHeader("X-CSRF-TOKEN", csrfToken2); // Page czxxPagegxzxkxx = qyxx_page.getWebClient().getPage(window, czxxWebRequestxzxkxx); resultHtmlMap.put("qyxx_qygsxx_xzxkxx", czxxPagegxzxkxx.getWebResponse().getContentAsString()); //??->?? var url = webroot+"pub/qygsjsxxzscqcz"; WebRequest czxxWebRequestzscqczdjxx = new WebRequest( new URL("http://218.57.139.24/pub/qygsjsxxzscqcz?encrpripid=" + encrpripid), HttpMethod.POST); czxxWebRequestzscqczdjxx.setAdditionalHeader("X-CSRF-TOKEN", csrfToken2); // Page czxxPagegzscqczdjxx = qyxx_page.getWebClient().getPage(window, czxxWebRequestzscqczdjxx); resultHtmlMap.put("qyxx_qygsxx_zscqczdjxx", czxxPagegzscqczdjxx.getWebResponse().getContentAsString()); //??->? var url = webroot+"pub/qygsjsxxxzcfxx"; WebRequest czxxWebRequestxzcfxxx = new WebRequest( new URL("http://218.57.139.24/pub/qygsjsxxxzcfxx?encrpripid=" + encrpripid), HttpMethod.POST); czxxWebRequestxzcfxxx.setAdditionalHeader("X-CSRF-TOKEN", csrfToken2); // Page czxxPagegxzcfxxx = qyxx_page.getWebClient().getPage(window, czxxWebRequestxzcfxxx); resultHtmlMap.put("qyxx_qygsxx_xzcfxxx", czxxPagegxzcfxxx.getWebResponse().getContentAsString()); //??? String gsgsxx_sfxzgsxx_url_hqqtbmgsxx = "http://218.57.139.24/pub/qtgsdetail" + mystr; HtmlPage qtbmgsxx_page = firstInfoPage.getWebClient().getPage(window, new WebRequest(new URL(gsgsxx_sfxzgsxx_url_hqqtbmgsxx))); resultHtmlMap.put("qtbmgsxx", qtbmgsxx_page.asXml()); // String[] command3 = {"casperjs", "/home/ubuntu/nfs-images/casperjscode/getSimpleRequestPage.js", "--web-security=no", "--url="+gsgsxx_sfxzgsxx_url_hqqtbmgsxx}; // String qtbmgsxx = CommandUtil.runCommand(command3); // resultHtmlMap.put("qygsxx", qtbmgsxx); //?????? //http://218.57.139.24/pub/sfgsdetail/1130/95f6c493f094da93009e08daa27616d8 //String gsgsxx_sfxzgsxx_url = "http://218.57.139.24/pub/sfgsdetail"+mystr ; //HtmlPage gsgsxx_baxx_zyryxx_page = firstInfoPage.getWebClient().getPage(gsgsxx_sfxzgsxx_url); //resultHtmlMap.put("gsgsxx_baxx_zyryxx", gsgsxx_baxx_zyryxx_page.asXml()); //?????? //HtmlElement sfxzgsxx_tab = (HtmlElement)qyxx_page.getByXPath("//div[@id='leftTabs']/ul/li").get(3); String gsgsxx_sfxzgsxx_url = "http://218.57.139.24/pub/sfgsdetail" + mystr; HtmlPage sfxzgsxx_page = firstInfoPage.getWebClient().getPage(window, new WebRequest(new URL(gsgsxx_sfxzgsxx_url))); resultHtmlMap.put("sfxzgsxx_page", sfxzgsxx_page.asXml()); // String[] command4 = {"casperjs", "/home/ubuntu/nfs-images/casperjscode/getSimpleRequestPage.js", "--web-security=no", "--url="+gsgsxx_sfxzgsxx_url}; // String sfxzgsxx = CommandUtil.runCommand(command4); // resultHtmlMap.put("sfxzgsxx_page", sfxzgsxx); //http://218.57.139.24/pub/sfgsgqxxdetail/95f6c493f094da93009e08daa27616d8/1130/12D1EA5D6111126BE054/1 String mystrdetil = ""; if (mystr != null && !"".equals(mystr)) { String mystrspill[] = mystr.split("/"); mystrdetil = "/" + mystrspill[2] + "/" + mystrspill[1]; } String mystrdetilurl = "http://218.57.139.24/pub/sfgsgqxxdetail" + mystrdetil; String urlstring = sfxzgsxx_page.asXml(); List<String> sfxzxxlist = new AbstractParser() { }.getSubStringByRegex(urlstring, "var gqxxliststr ='\\[.*\\]"); String sfxzgsxxzzfc = sfxzxxlist.get(0).substring(19, sfxzxxlist.get(0).length() - 1); if (!"".equals(sfxzgsxxzzfc) && null != sfxzgsxxzzfc) { String[] sfxzgsxxzzfcsplil = sfxzgsxxzzfc.split(","); List<String> urllist = new ArrayList<String>(); String num = ""; for (int m = 0; m < sfxzgsxxzzfcsplil.length; m++) { String strname = sfxzgsxxzzfcsplil[m]; String strname1[] = strname.split(":"); String myname = strname1[0]; String strna = strname1[1]; String namesss = ""; if ("\"frozstate\"".equals(myname)) { // ? namesss = strna.substring(1, strna.length() - 1); num = namesss; } if ("\"pid\"".equals(myname)) { // ??? namesss = strna.substring(1, strna.length() - 1); mystrdetilurl = mystrdetilurl + "/" + namesss; mystrdetilurl = mystrdetilurl + "/" + num; urllist.add(mystrdetilurl); } } List<String> gqdjxx_list = new ArrayList<String>(); for (int i = 0; i < urllist.size(); i++) { HtmlPage sfxzgsxx_page_detail = firstInfoPage.getWebClient().getPage(window, new WebRequest(new URL(urllist.get(i)))); gqdjxx_list.add(sfxzgsxx_page_detail.asXml()); } resultHtmlMap.put("sfxzgsxx_gqdjxx_detail", gqdjxx_list); } break; } } if (!matchFlag) { resultHtmlMap.put("statusCodeDef", StatusCodeDef.NO_DATA_FOUND); LOGGER.info("????"); } } return resultHtmlMap; }
From source file:com.storm.function.GsxtFunction.java
private Map<String, Object> getHtmlInfoMapOfJilin(String area, String keyword, ChannelLogger LOGGER) throws Exception { Map<String, Object> resultHtmlMap = new LinkedHashMap<String, Object>(); String[] command = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/jilin.js", "--web-security=no", "--keyword=" + keyword }; String casperjsResult = CommandUtil.runCommand(command); Elements divDataItems = Jsoup.parse(casperjsResult).getElementsByClass("list"); Elements divNoDataItems = Jsoup.parse(casperjsResult).getElementsByClass("list-a"); if (divDataItems.isEmpty() && !divNoDataItems.isEmpty()) { // ? resultHtmlMap.put("statusCodeDef", StatusCodeDef.NO_DATA_FOUND); } else if (divDataItems.isEmpty() && divDataItems.isEmpty()) { // ?? // ????//from w w w . j a v a2s . com if (casperjsResult.contains("")) { resultHtmlMap.put("statusCodeDef", StatusCodeDef.IMAGECODE_ERROR); } else { resultHtmlMap.put("statusCodeDef", StatusCodeDef.FAILURE); } } else if (!divDataItems.isEmpty() && divNoDataItems.isEmpty()) { // ? // ??????? Element nowCookies = Jsoup.parse(casperjsResult).getElementById("nextParams"); Elements tokenEts = Jsoup.parse(casperjsResult).getElementsByAttributeValue("name", "_csrf"); if (null == nowCookies || null == tokenEts || tokenEts.isEmpty()) { resultHtmlMap.put("statusCodeDef", StatusCodeDef.COOKIE_ERROR); return resultHtmlMap; } String nowCookiesJson = nowCookies.text().trim(); String nowCookiesStr = ((String) new GsonBuilder().create().fromJson(nowCookiesJson, Map.class) .get("Cookie")).trim(); String tokenStr = tokenEts.get(0).attr("content"); String HOST_OF_JILIN = "http://211.141.74.198:8081/aiccips/pub/"; String HOST_OF_XQ = "http://211.141.74.198:8081/"; String htmlAnchorHref = ""; for (Element divDataItem : divDataItems) { Element htmlAnchor = divDataItem.getElementsByTag("a").get(0); String htmlAnchorText = htmlAnchor.text(); if (htmlAnchorText.contains(keyword)) { htmlAnchorHref = HOST_OF_JILIN + htmlAnchor.attr("href"); break; } } if (StringUtils.isEmpty(htmlAnchorHref)) { htmlAnchorHref = "http://211.141.74.198:8081/aiccips/pub/" + divDataItems.get(0).getElementsByTag("a").get(0).attr("href"); } String commonUrl = htmlAnchorHref.split("gsgsdetail")[1]; String commonUrlZ = htmlAnchorHref.substring(htmlAnchorHref.lastIndexOf("/") + 1, htmlAnchorHref.length()); // ?->? String[] command11 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/getSimpleRequestPage.js", "--web-security=no", "--url=" + htmlAnchorHref }; String casperjsResult11 = CommandUtil.runCommand(command11); resultHtmlMap.put("gsgsxx", casperjsResult11); Thread.sleep(1000); // ?->?->?? String baxxZyryxxUrl = HOST_OF_JILIN + "gsryxx/1151?encrpripid=" + commonUrlZ; String[] command121 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + htmlAnchorHref, "--tokenStr=" + tokenStr, "--url=" + baxxZyryxxUrl }; String casperjsResult121 = CommandUtil.runCommand(command121); resultHtmlMap.put("gsgsxx_baxx_zyryxx", casperjsResult121); // ?->?->? String baxxFzjgxxUrl = HOST_OF_JILIN + "gsfzjg/1151?encrpripid=" + commonUrlZ; String[] command123 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + htmlAnchorHref, "--tokenStr=" + tokenStr, "--url=" + baxxFzjgxxUrl }; String casperjsResult123 = CommandUtil.runCommand(command123); resultHtmlMap.put("gsgsxx_baxx_fzjgxx", casperjsResult123); // ?->?->? String dcdydjxxDcdydjxxUrl = HOST_OF_JILIN + "gsdcdy?encrpripid=" + commonUrlZ; String[] command131 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + htmlAnchorHref, "--tokenStr=" + tokenStr, "--url=" + dcdydjxxDcdydjxxUrl }; String casperjsResult131 = CommandUtil.runCommand(command131); resultHtmlMap.put("gsgsxx_dcdydjxx_dcdydjxx", casperjsResult131); // ?->??->?? String gqczdjxxGqczdjxxUrl = HOST_OF_JILIN + "gsgqcz?encrpripid=" + commonUrlZ; String[] command141 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + htmlAnchorHref, "--tokenStr=" + tokenStr, "--url=" + gqczdjxxGqczdjxxUrl }; String casperjsResult141 = CommandUtil.runCommand(command141); resultHtmlMap.put("gsgsxx_gqczdjxx_gqczdjxx", casperjsResult141); // ?->?->? String xzcfxxXzcfxxUrl = HOST_OF_JILIN + "gsxzcfxx?encrpripid=" + commonUrlZ; String[] command151 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + htmlAnchorHref, "--tokenStr=" + tokenStr, "--url=" + xzcfxxXzcfxxUrl }; String casperjsResult151 = CommandUtil.runCommand(command151); resultHtmlMap.put("gsgsxx_xzcfxx_xzcfxx", casperjsResult151); // ?->???->??? String jyycxxJyycxxUrl = HOST_OF_JILIN + "jyyc/1151?encrpripid=" + commonUrlZ; String[] command161 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + htmlAnchorHref, "--tokenStr=" + tokenStr, "--url=" + jyycxxJyycxxUrl }; String casperjsResult161 = CommandUtil.runCommand(command161); resultHtmlMap.put("gsgsxx_jyycxx_jyycxx", casperjsResult161); // ?->???->??? String yzwfxxYzwfxxUrl = HOST_OF_JILIN + "yzwfqy?encrpripid=" + commonUrlZ; String[] command171 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + htmlAnchorHref, "--tokenStr=" + tokenStr, "--url=" + yzwfxxYzwfxxUrl }; String casperjsResult171 = CommandUtil.runCommand(command171); resultHtmlMap.put("gsgsxx_yzwfxx_yzwfxx", casperjsResult171); // ?->?->? String ccjcxxCcjcxxUrl = HOST_OF_JILIN + "ccjcxx?encrpripid=" + commonUrlZ; String[] command181 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + htmlAnchorHref, "--tokenStr=" + tokenStr, "--url=" + ccjcxxCcjcxxUrl }; String casperjsResult181 = CommandUtil.runCommand(command181); resultHtmlMap.put("gsgsxx_ccjcxx_ccjcxx", casperjsResult181); // ?? String qygsUrl = HOST_OF_JILIN + "qygsdetail" + commonUrl; String[] command2 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/getSimpleRequestPage.js", "--web-security=no", "--url=" + qygsUrl }; String casperjsResult2 = CommandUtil.runCommand(command2); resultHtmlMap.put("qygsxx_list", casperjsResult2); // ? ??->?-> Document qygsxxHtml = Jsoup.parseBodyFragment(casperjsResult2); Element qynbDiv = qygsxxHtml.getElementById("qiyenianbao"); if (null != qynbDiv) { Elements qynb_trs = qynbDiv.select("tbody").get(0).select("tr"); if (null != qynb_trs && qynb_trs.size() > 2) { List<Map<String, Object>> qygsxx_qynb_infos = new ArrayList<Map<String, Object>>(); for (int i = 2; i < qynb_trs.size(); i++) { Map<String, Object> qygsxx_qynb_info_map = new LinkedHashMap<String, Object>(); Element wdd = qynb_trs.get(i).select("td").get(1).select("a").get(0); String qygsxx_qynb_list_a_text = wdd.text(); String qygsxx_qynb_list_pubdate = qynb_trs.get(i).select("td").get(2).text(); qygsxx_qynb_info_map.put("qygsxx_qynb_list_a_text", qygsxx_qynb_list_a_text); qygsxx_qynb_info_map.put("qygsxx_qynb_list_pubdate", qygsxx_qynb_list_pubdate); String qynbxqUrl = HOST_OF_XQ + wdd.attr("href"); String[] command21 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/getSimpleRequestPage.js", "--web-security=no", "--url=" + qynbxqUrl }; String casperjsResult21 = CommandUtil.runCommand(command21); qygsxx_qynb_info_map.put("qygsxx_qynb_info_page", casperjsResult21); qygsxx_qynb_infos.add(qygsxx_qynb_info_map); } resultHtmlMap.put("qygsxx_qynb_infos", qygsxx_qynb_infos); } } Thread.sleep(1000); // ??->??->?? String gdjczxxGdjczxxUrl = HOST_OF_JILIN + "qygsjsxxxzczxx?encrpripid=" + commonUrlZ; String[] command221 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + qygsUrl, "--tokenStr=" + tokenStr, "--url=" + gdjczxxGdjczxxUrl }; String casperjsResult221 = CommandUtil.runCommand(command221); resultHtmlMap.put("qygsxx_gdjczxx_gdjczxx", casperjsResult221); // ??->??->?? String gdjczxxBgxxUrl = HOST_OF_JILIN + "qygsjsxxczxxbgsx?encrpripid=" + commonUrlZ; String[] command222 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + qygsUrl, "--tokenStr=" + tokenStr, "--url=" + gdjczxxBgxxUrl }; String casperjsResult222 = CommandUtil.runCommand(command222); resultHtmlMap.put("qygsxx_gdjczxx_bgxx", casperjsResult222); // ??->???->??? String gqbgxxGqbgxxUrl = HOST_OF_JILIN + "qygsJsxxgqbg?encrpripid=" + commonUrlZ; String[] command231 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + qygsUrl, "--tokenStr=" + tokenStr, "--url=" + gqbgxxGqbgxxUrl }; String casperjsResult231 = CommandUtil.runCommand(command231); resultHtmlMap.put("qygsxx_gqbgxx_gqbgxx", casperjsResult231); // ??->??->?? String xzxkxxXzxkxxUrl = HOST_OF_JILIN + "qygsjsxxxzxk?encrpripid=" + commonUrlZ; String[] command241 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + qygsUrl, "--tokenStr=" + tokenStr, "--url=" + xzxkxxXzxkxxUrl }; String casperjsResult241 = CommandUtil.runCommand(command241); resultHtmlMap.put("qygsxx_xzxkxx_xzxkxx", casperjsResult241); // ??->??->?? String zscqczZscqczUrl = HOST_OF_JILIN + "/qygsjsxxzscqcz?encrpripid=" + commonUrlZ; String[] command251 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + qygsUrl, "--tokenStr=" + tokenStr, "--url=" + zscqczZscqczUrl }; String casperjsResult251 = CommandUtil.runCommand(command251); resultHtmlMap.put("qygsxx_zscqcz_zscqcz", casperjsResult251); // ??->?->? String qygsxxXzcfxxUrl = HOST_OF_JILIN + "qygsjsxxxzcfxx?encrpripid=" + commonUrlZ; String[] command261 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + qygsUrl, "--tokenStr=" + tokenStr, "--url=" + qygsxxXzcfxxUrl }; String casperjsResult261 = CommandUtil.runCommand(command261); resultHtmlMap.put("qygsxx_zscqcz_zscqcz", casperjsResult261); // ? String qtbmUrl = HOST_OF_JILIN + "qtgsdetail" + commonUrl; String[] command3 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/getSimpleRequestPage.js", "--web-security=no", "--url=" + qtbmUrl }; String casperjsResult3 = CommandUtil.runCommand(command3); resultHtmlMap.put("qtbmgsxx", casperjsResult3); // ???? String sfxzUrl = HOST_OF_JILIN + "sfgsdetail" + commonUrl; String[] command4 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/getSimpleRequestPage.js", "--web-security=no", "--url=" + sfxzUrl }; String casperjsResult4 = CommandUtil.runCommand(command4); resultHtmlMap.put("sfxzgsxx_list", casperjsResult4); resultHtmlMap.put("statusCodeDef", StatusCodeDef.SCCCESS); } return resultHtmlMap; }
From source file:com.storm.function.GsxtFunction.java
private Map<String, Object> getHtmlInfoMapOfTianjin(String area, HtmlPage firstInfoPage, String keyword, ChannelLogger LOGGER) throws Exception { Map<String, Object> resultHtmlMap = new HashMap<String, Object>(); //????//from www .j a va 2 s. co m // System.out.println(firstInfoPage.asXml()); WebWindow window = firstInfoPage.getWebClient().getCurrentWindow(); @SuppressWarnings("unchecked") List<HtmlAnchor> divByXPath = (List<HtmlAnchor>) firstInfoPage.getByXPath("//div[@class='result-item']"); HtmlElement firstByXPath = ((HtmlElement) firstInfoPage .getFirstByXPath("//div[@class='content']/div[@style='font-size:12px']")); if (divByXPath.size() == 0 && firstByXPath == null) { resultHtmlMap.put("statusCodeDef", StatusCodeDef.IMAGECODE_ERROR); } else { if (firstByXPath != null) { String textContent = firstByXPath.getTextContent(); if (textContent.indexOf("? 0 ?") > 0) { resultHtmlMap.put("statusCodeDef", StatusCodeDef.NO_DATA_FOUND); } else { resultHtmlMap.put("statusCodeDef", StatusCodeDef.SCCCESS); } } else { resultHtmlMap.put("statusCodeDef", StatusCodeDef.SCCCESS); } } @SuppressWarnings("unchecked") List<HtmlAnchor> anchors = (List<HtmlAnchor>) firstInfoPage.getByXPath("//div[@class='result-item']/div/a"); LOGGER.info(anchors.toString()); if (anchors != null && !anchors.isEmpty()) { boolean matchFlag = false; for (HtmlAnchor anchor : anchors) { String anchorTitle = anchor.getTextContent().toString().trim(); if (anchorTitle.contains(keyword)) { //???? matchFlag = true; //??? HtmlElement target_item_info = (HtmlElement) anchor.getParentNode().getParentNode(); resultHtmlMap.put("target_item_info", target_item_info.asXml()); //*****************? ***************** //?? ?->?->? //?entId //? String ent_id = anchor.getAttribute("href"); if (!StringUtils.isEmpty(ent_id)) { ent_id = ent_id.split("=")[1]; } if (!StringUtils.isEmpty(ent_id)) { /*HtmlPage gsgsxx_djxx_jbxx = anchor.click(); Thread.sleep(3000); resultHtmlMap.put("gsgsxx_djxx_jbxx", gsgsxx_djxx_jbxx.asXml()); */ String gsgsxx_djxx_jbxx_url = "http://tjcredit.gov.cn/platform/saic/baseInfo.json?entId=" + ent_id + "&departmentId=scjgw&infoClassId=dj"; Page gsgsxx_djxx_jbxx = firstInfoPage.getWebClient().getPage(window, new WebRequest(new URL(gsgsxx_djxx_jbxx_url))); resultHtmlMap.put("gsgsxx_djxx_jbxx", gsgsxx_djxx_jbxx.getWebResponse().getContentAsString("utf-8")); //?? ?->?->?->?-> Document qygsxxHtml = Jsoup .parseBodyFragment(gsgsxx_djxx_jbxx.getWebResponse().getContentAsString("utf-8")); if (qygsxxHtml != null) { Element qynbDiv = qygsxxHtml.getElementById("touziren"); if (qynbDiv != null) { Elements qynb_trs = qynbDiv.select("tbody").select("tr").select("td").select("a"); if (qynb_trs.size() != 0) { List<String> list = new ArrayList<String>(); for (int i = 0; i < qynb_trs.size(); i++) { //System.out.println(qynb_trs.get(i).toString()); //System.out.println(qynb_trs.get(i).toString().split("\\(\\'")[1].split("\\'\\)")[0].split("\\'\\,\\'")[0]); if (qynb_trs.get(i).toString() != null && qynb_trs.get(i).toString().split("\\(\\'")[1].split("\\'\\)")[0] .split("\\'\\,\\'")[0] != null) { String gsgsxx_djxx_gdxx_detail_url = "http://tjcredit.gov.cn/saicpf/gsgdcz?gdczid=" + qynb_trs.get(i).toString().split("\\(\\'")[1] .split("\\'\\)")[0].split("\\'\\,\\'")[0] + "&entid=" + ent_id + "&issaic=1&hasInfo=0"; Page gsgsxx_djxx_gdxx_detail = firstInfoPage.getWebClient().getPage( window, new WebRequest(new URL(gsgsxx_djxx_gdxx_detail_url))); list.add(gsgsxx_djxx_gdxx_detail.getWebResponse() .getContentAsString("utf-8")); } } if (list.size() != 0) { resultHtmlMap.put("gsgsxx_djxx_gdxx", list); } } } } //? ?->?->?? String gsgsxx_baxx_zyryxx_url = "http://tjcredit.gov.cn/platform/saic/baseInfo.json?entId=" + ent_id + "&departmentId=scjgw&infoClassId=ba"; Page gsgsxx_baxx_zyryxx = firstInfoPage.getWebClient().getPage(window, new WebRequest(new URL(gsgsxx_baxx_zyryxx_url))); resultHtmlMap.put("gsgsxx_baxx_zyryxx", gsgsxx_baxx_zyryxx.getWebResponse().getContentAsString("utf-8")); //? ?->?->? String gsgsxx_dcdydjxx_dcdydjxx_url = "http://tjcredit.gov.cn/platform/saic/baseInfo.json?entId=" + ent_id + "&departmentId=scjgw&infoClassId=dcdydjxx"; Page gsgsxx_dcdydjxx_dcdydjxx = firstInfoPage.getWebClient().getPage(window, new WebRequest(new URL(gsgsxx_dcdydjxx_dcdydjxx_url))); resultHtmlMap.put("gsgsxx_dcdydjxx_dcdydjxx", gsgsxx_dcdydjxx_dcdydjxx.getWebResponse().getContentAsString("utf-8")); //? ?->??->?? String gsgsxx_gqczdjxx_gqczdjxx_url = "http://tjcredit.gov.cn/platform/saic/baseInfo.json?entId=" + ent_id + "&departmentId=scjgw&infoClassId=gqczdjxx"; Page gsgsxx_gqczdjxx_gqczdjxx = firstInfoPage.getWebClient().getPage(window, new WebRequest(new URL(gsgsxx_gqczdjxx_gqczdjxx_url))); resultHtmlMap.put("gsgsxx_gqczdjxx_gqczdjxx", gsgsxx_gqczdjxx_gqczdjxx.getWebResponse().getContentAsString("utf-8")); //? ?->?->? String gsgsxx_xzcfxx_xzcfxx_url = "http://tjcredit.gov.cn/platform/saic/baseInfo.json?entId=" + ent_id + "&departmentId=scjgw&infoClassId=xzcf"; Page gsgsxx_xzcfxx_xzcfxx = firstInfoPage.getWebClient().getPage(window, new WebRequest(new URL(gsgsxx_xzcfxx_xzcfxx_url))); resultHtmlMap.put("gsgsxx_xzcfxx_xzcfxx", gsgsxx_xzcfxx_xzcfxx.getWebResponse().getContentAsString("utf-8")); // Document xzcfxxHtml = Jsoup.parseBodyFragment( gsgsxx_xzcfxx_xzcfxx.getWebResponse().getContentAsString("utf-8")); if (xzcfxxHtml != null) { Elements qynbDiv = xzcfxxHtml.getElementsByClass("result-table"); if (qynbDiv != null && qynbDiv.size() != 0) { Elements qynb_trs = qynbDiv.get(0).select("tbody").select("tr").select("td") .select("a"); if (qynb_trs.size() != 0) { List<String> list = new ArrayList<String>(); for (int i = 0; i < qynb_trs.size(); i++) { if (qynb_trs.get(i).toString() != null && qynb_trs.get(i).toString().split("\\(\\'")[1].split("\\'\\)")[0] .split("\\'\\,\\'")[0] != null) { String gsgsxx_djxx_gdxx_detail_url = "http://tjcredit.gov.cn/saicpf/gsxzcf?id=" + qynb_trs.get(i).toString().split("\\(\\'")[1] .split("\\'\\)")[0].split("\\'\\,\\'")[0] + "&entid=" + ent_id + "&issaic=1&hasInfo=0"; Page gsgsxx_djxx_gdxx_detail = firstInfoPage.getWebClient().getPage( window, new WebRequest(new URL(gsgsxx_djxx_gdxx_detail_url))); list.add(gsgsxx_djxx_gdxx_detail.getWebResponse() .getContentAsString("utf-8")); } } if (list.size() != 0) { resultHtmlMap.put("gsgsxx_xzcfxx_detail", list); } } } } //? ?->???->??? String gsgsxx_jyycxx_jyycxx_url = "http://tjcredit.gov.cn/platform/saic/baseInfo.json?entId=" + ent_id + "&departmentId=scjgw&infoClassId=qyjyycmlxx"; Page gsgsxx_jyycxx_jyycxx = firstInfoPage.getWebClient().getPage(window, new WebRequest(new URL(gsgsxx_jyycxx_jyycxx_url))); resultHtmlMap.put("gsgsxx_jyycxx_jyycxx", gsgsxx_jyycxx_jyycxx.getWebResponse().getContentAsString("utf-8")); //? ?->???->??? String gsgsxx_yzwfxx_yzwfxx_url = "http://tjcredit.gov.cn/platform/saic/baseInfo.json?entId=" + ent_id + "&departmentId=scjgw&infoClassId=yzwfqyxx"; Page gsgsxx_yzwfxx_yzwfxx = firstInfoPage.getWebClient().getPage(window, new WebRequest(new URL(gsgsxx_yzwfxx_yzwfxx_url))); resultHtmlMap.put("gsgsxx_yzwfxx_yzwfxx", gsgsxx_yzwfxx_yzwfxx.getWebResponse().getContentAsString("utf-8")); //? ?->?->? String gsgsxx_ccjcxx_ccjcxx_url = "http://tjcredit.gov.cn/platform/saic/baseInfo.json?entId=" + ent_id + "&departmentId=scjgw&infoClassId=ccjcxx"; Page gsgsxx_ccjcxx_ccjcxx = firstInfoPage.getWebClient().getPage(window, new WebRequest(new URL(gsgsxx_ccjcxx_ccjcxx_url))); resultHtmlMap.put("gsgsxx_ccjcxx_ccjcxx", gsgsxx_ccjcxx_ccjcxx.getWebResponse().getContentAsString("utf-8")); //*****************? ?***************** //*****************?? ***************** //? ??->?-> String qygsxx_qynb_list_url = "http://tjcredit.gov.cn/report/nblist?entid=" + ent_id; Page qygsxx_qynb_list = firstInfoPage.getWebClient().getPage(window, new WebRequest(new URL(qygsxx_qynb_list_url))); resultHtmlMap.put("qygsxx_qynb_list", qygsxx_qynb_list.getWebResponse().getContentAsString("utf-8")); //? ??->?->-> //? Document qynbHtml = Jsoup .parseBodyFragment(qygsxx_qynb_list.getWebResponse().getContentAsString("utf-8")); if (qynbHtml != null) { Element qynbDiv = qynbHtml.getElementById("touziren"); if (qynbDiv != null) { Elements qynb_trs = qynbDiv.select("tbody").select("tr").select("td").select("a"); if (qynb_trs.size() != 0) { List<String> list = new ArrayList<String>(); for (int i = 0; i < qynb_trs.size(); i++) { //System.out.println(qynb_trs.get(i).toString()); //System.out.println(qynb_trs.get(i).toString().split("\\(\\'")[1].split("\\'\\)")[0].split("\\'\\,\\'")[0]); if (qynb_trs.get(i).toString() != null && qynb_trs.get(i).toString().split("\\(\\'")[1].split("\\'\\)")[0] .split("\\'\\,\\'")[1] != null) { String gsgsxx_djxx_gdxx_detail_url = "http://tjcredit.gov.cn/report/annals?entid=" + ent_id + "&year=" + qynb_trs.get(i).toString().split("\\(\\'")[1] .split("\\'\\)")[0].split("\\'\\,\\'")[1] + "&hasInfo=0"; Page gsgsxx_djxx_gdxx_detail = firstInfoPage.getWebClient().getPage( window, new WebRequest(new URL(gsgsxx_djxx_gdxx_detail_url))); System.out.println(gsgsxx_djxx_gdxx_detail.getWebResponse() .getContentAsString("utf-8")); list.add(gsgsxx_djxx_gdxx_detail.getWebResponse() .getContentAsString("utf-8")); } } if (list.size() != 0) { resultHtmlMap.put("qygsxx_qynb_detail", list); } } } } //? ??->?? String qygsxx_xzxkxx_url = "http://tjcredit.gov.cn/report/xzxk?entid=" + ent_id; Page qygsxx_xzxkxx = firstInfoPage.getWebClient().getPage(window, new WebRequest(new URL(qygsxx_xzxkxx_url))); resultHtmlMap.put("qygsxx_xzxkxx", qygsxx_xzxkxx.getWebResponse().getContentAsString("utf-8")); //? ??->?? String qygsxx_gdjczxx_url = "http://tjcredit.gov.cn/report/gdcz?entid=" + ent_id; Page qygsxx_gdjczxx = firstInfoPage.getWebClient().getPage(window, new WebRequest(new URL(qygsxx_gdjczxx_url))); resultHtmlMap.put("qygsxx_gdjczxx", qygsxx_gdjczxx.getWebResponse().getContentAsString("utf-8")); //? ??->??? String qygsxx_gqbgxx_url = "http://tjcredit.gov.cn/report/gqbg?entid=" + ent_id; Page qygsxx_gqbgxx = firstInfoPage.getWebClient().getPage(window, new WebRequest(new URL(qygsxx_gqbgxx_url))); resultHtmlMap.put("qygsxx_gqbgxx", qygsxx_gqbgxx.getWebResponse().getContentAsString("utf-8")); //? ??->?? String qygsxx_zscqczdjxx_url = "http://tjcredit.gov.cn/report/zscq?entid=" + ent_id; Page qygsxx_zscqczdjxx = firstInfoPage.getWebClient().getPage(window, new WebRequest(new URL(qygsxx_zscqczdjxx_url))); resultHtmlMap.put("qygsxx_zscqczdjxx", qygsxx_zscqczdjxx.getWebResponse().getContentAsString("utf-8")); //? ??->? String qygsxx_xzcfxx_url = "http://tjcredit.gov.cn/report/xzcf?entid=" + ent_id; Page qygsxx_xzcfxx = firstInfoPage.getWebClient().getPage(window, new WebRequest(new URL(qygsxx_xzcfxx_url))); resultHtmlMap.put("qygsxx_xzcfxx", qygsxx_xzcfxx.getWebResponse().getContentAsString("utf-8")); //*****************?? ?***************** //*****************???? ***************** //? ????->?? String sfxzgsxx_gqdjxx_list_url = "http://tjcredit.gov.cn/report/gddjlist?entid=" + ent_id; Page sfxzgsxx_gqdjxx_list = firstInfoPage.getWebClient().getPage(window, new WebRequest(new URL(sfxzgsxx_gqdjxx_list_url))); resultHtmlMap.put("sfxzgsxx_gqdjxx_list", sfxzgsxx_gqdjxx_list.getWebResponse().getContentAsString("utf-8")); /* //? ????->??->-> @SuppressWarnings("unchecked") List<HtmlAnchor> anchors4 = (List<HtmlAnchor>) sfxzgsxx_gqdjxx_list_page.getByXPath("//table[@id='touziren']/tbody[@id='table2']/tr/td/a"); if (anchors4!=null && !anchors4.isEmpty()) { List<String> detail=new ArrayList<String>(); for (@SuppressWarnings("unused") HtmlAnchor anchor4 : anchors4) { HtmlPage sfxzgsxx_gqdjxx_detail = anchor4.click(); // System.out.println("gsgsxx_qynb_detail.asXml()="+gsgsxx_qynb_detail.asXml()); detail.add(sfxzgsxx_gqdjxx_detail.asXml()); } resultHtmlMap.put("sfxzgsxx_gqdjxx_details",detail); }*/ //? ??->?? String qygsxx_gdbgxx_list_url = "http://tjcredit.gov.cn/report/gdbglist?entid=" + ent_id; Page qygsxx_gdbgxx_list = firstInfoPage.getWebClient().getPage(window, new WebRequest(new URL(qygsxx_gdbgxx_list_url))); resultHtmlMap.put("qygsxx_gdbgxx_list", qygsxx_gdbgxx_list.getWebResponse().getContentAsString("utf-8")); //? ??->??->-> /* @SuppressWarnings("unchecked") List<HtmlAnchor> anchors5 = (List<HtmlAnchor>) qygsxx_gdbgxx_page.getByXPath("//table[@id='touziren']/tbody[@id='table2']/tr/td/a"); if (anchors5!=null && !anchors5.isEmpty()) { List<String> detail=new ArrayList<String>(); for (@SuppressWarnings("unused") HtmlAnchor anchor5 : anchors5) { HtmlPage qygsxx_gdbgxx_detail = anchor5.click(); // System.out.println("gsgsxx_qynb_detail.asXml()="+gsgsxx_qynb_detail.asXml()); detail.add(qygsxx_gdbgxx_detail.asXml()); } resultHtmlMap.put("qygsxx_gdbgxx_details",detail); }*/ //*****************???? ?***************** } break;// } } if (!matchFlag) { resultHtmlMap.put("statusCodeDef", StatusCodeDef.NO_DATA_FOUND); LOGGER.info("????"); } } return resultHtmlMap; }
From source file:ca.appvelopers.mcgillmobile.model.retrofit.CourseResultConverter.java
@Override public List<CourseResult> convert(ResponseBody value) throws IOException { String html = value.string(); List<CourseResult> courses = new ArrayList<>(); Document document = Jsoup.parse(html, "UTF-8"); //Parse the response body into a list of rows Elements rows = document.getElementsByClass("dddefault"); // Parse the term from the page header Element header = document.getElementsByClass("staticheaders").get(0); Term term = Term.parseTerm(header.childNode(2).toString()); // Get the table in the form of a set of rows Element table = document.getElementsByClass("datadisplaytable").get(0).select("tbody").get(0); // Go through the rows in the table for (Element row : table.select("tr")) { // Check that there at least 19 elements in the row Elements rowElements = row.select("td"); if (rowElements.size() < 19) { // If there aren't, it must not be a course row continue; }//from w w w.j a va 2s. c o m // Create a new course object with the default values double credits = 99; String subject = null; String number = null; String title = ""; String type = ""; List<DayOfWeek> days = new ArrayList<>(); int crn = 0; String instructor = ""; String location = ""; //So that the rounded start time will be 0 LocalTime startTime = ScheduleConverter.getDefaultStartTime(); LocalTime endTime = ScheduleConverter.getDefaultEndTime(); int capacity = 0; int seatsRemaining = 0; int waitlistRemaining = 0; LocalDate startDate = LocalDate.now(); LocalDate endDate = LocalDate.now(); try { for (int i = 0; i < rowElements.size(); i++) { if (rowElements.get(i).toString().contains(" ")) { // Empty row: continue continue; } String rowString = rowElements.get(i).text(); switch (i) { // CRN case 1: crn = Integer.parseInt(rowString); break; // Subject case 2: subject = rowString; break; // Number case 3: number = rowString; break; // Type case 5: type = rowString; break; // Number of credits case 6: credits = Double.parseDouble(rowString); break; // Course title case 7: //Remove the extra period at the end of the course title title = rowString.substring(0, rowString.length() - 1); break; // Days of the week case 8: if (rowString.equals("TBA")) { // TBA Stuff: no time associated so skip the next one // and add a dummy to keep the index correct rowElements.add(9, null); i++; } else { // Day Parsing rowString = rowString.replace('\u00A0', ' ').trim(); for (int k = 0; k < rowString.length(); k++) { days.add(DayUtils.getDay(rowString.charAt(k))); } } break; // Time case 9: String[] times = rowString.split("-"); try { int startHour = Integer.parseInt(times[0].split(" ")[0].split(":")[0]); int startMinute = Integer.parseInt(times[0].split(" ")[0].split(":")[1]); int endHour = Integer.parseInt(times[1].split(" ")[0].split(":")[0]); int endMinute = Integer.parseInt(times[1].split(" ")[0].split(":")[1]); //If it's PM, then add 12 hours to the hours for 24 hours format //Make sure it isn't noon String startPM = times[0].split(" ")[1]; if (startPM.equals("PM") && startHour != 12) { startHour += 12; } String endPM = times[1].split(" ")[1]; if (endPM.equals("PM") && endHour != 12) { endHour += 12; } startTime = LocalTime.of(startHour, startMinute); endTime = LocalTime.of(endHour, endMinute); } catch (NumberFormatException e) { //Courses sometimes don't have assigned times startTime = ScheduleConverter.getDefaultStartTime(); endTime = ScheduleConverter.getDefaultEndTime(); } break; // Capacity case 10: capacity = Integer.parseInt(rowString); break; // Seats remaining case 12: seatsRemaining = Integer.parseInt(rowString); break; // Waitlist remaining case 15: waitlistRemaining = Integer.parseInt(rowString); break; // Instructor case 16: instructor = rowString; break; // Start/end date case 17: Pair<LocalDate, LocalDate> dates = parseDateRange(term, rowString); startDate = dates.first; endDate = dates.second; break; // Location case 18: location = rowString; break; } } } catch (Exception e) { Timber.e(e, "Course Results Parser Error"); } // Don't add any courses with errors if (subject != null && number != null) { // Create a new course object and add it to list // TODO Should we be parsing the course section? courses.add(new CourseResult(term, subject, number, title, crn, "", startTime, endTime, days, type, location, instructor, credits, startDate, endDate, capacity, seatsRemaining, waitlistRemaining)); } } return courses; }
From source file:com.dalthed.tucan.scraper.SingleEventScraper.java
/** * @param materialTable/*from ww w .jav a 2 s . c o m*/ */ private void scrapeMaterials(Iterator<Element> materialTable) { int ct = 0; ArrayList<String> materialNumber = new ArrayList<String>(); ArrayList<String> materialName = new ArrayList<String>(); ArrayList<String> materialDesc = new ArrayList<String>(); materialLink = new ArrayList<String>(); ArrayList<String> materialFile = new ArrayList<String>(); int mod = 0; if (materialTable != null) { while (materialTable.hasNext()) { Element next = materialTable.next(); if (next.select("td").size() > 1) { ct++; if (next.select("td").get(0).text().matches("[0-9]+")) { // First line materialNumber.add(next.select("td").get(0).text()); materialName.add(next.select("td").get(1).text()); if (mod == 1) { materialDesc.add(""); mod = 2; } if (mod == 2) { materialLink.add(""); materialFile.add(""); } mod = 1; } else if (mod == 1) { materialDesc.add(next.select("td").get(1).text()); mod = 2; } else if (mod == 2) { materialLink.add(next.select("td").get(1).select("a").attr("href")); materialFile.add(next.select("td").get(1).select("a").text()); mod = 0; } } } } if (mod == 1) { materialDesc.add(""); mod = 2; } if (mod == 2) { materialLink.add(""); materialFile.add(""); } if (ct > 2) { if (mPageAdapter != null) { mPageAdapter.setAdapter(new AppointmentAdapter(context, materialNumber, materialFile, null, materialName, materialDesc)); mPageAdapter.fileList = materialLink; } } else if (mPageAdapter != null) { mPageAdapter.setAdapter(new ArrayAdapter<String>(context, android.R.layout.simple_list_item_1, new String[] { "Kein Material" })); } }
From source file:com.dalthed.tucan.scraper.SingleEventScraper.java
/** * @param DateTable/*from w ww.ja va 2 s.co m*/ */ private void scrapeAppointments(Iterator<Element> DateTable) { ArrayList<String> eventNumber = new ArrayList<String>(); ArrayList<String> eventDate = new ArrayList<String>(); ArrayList<String> eventTime = new ArrayList<String>(); ArrayList<String> eventRoom = new ArrayList<String>(); ArrayList<String> eventInstructor = new ArrayList<String>(); if (DateTable != null) { while (DateTable.hasNext()) { Element next = DateTable.next(); Elements cols = next.select("td"); if (cols.size() > 5) { eventNumber.add(cols.get(0).text()); eventDate.add(cols.get(1).text()); eventTime.add(cols.get(2).text() + "-" + cols.get(3).text()); eventRoom.add(cols.get(4).text()); eventInstructor.add(cols.get(5).text()); } } } else { eventDate.add(""); eventTime.add(""); eventNumber.add(""); eventRoom.add("Keine Daten vorhanden"); eventInstructor.add(""); } if (mPageAdapter != null) { mPageAdapter.setAdapter( new AppointmentAdapter(context, eventDate, eventTime, eventNumber, eventRoom, eventInstructor)); } }
From source file:com.dalthed.tucan.scraper.SingleEventScraper.java
/** * /*from w w w . j a va2s .c o m*/ */ private void scrapeInformations(Iterator<Element> informationIterator) { while (informationIterator.hasNext()) { Element nextElement = informationIterator.next(); Elements td = nextElement.select("td"); if (td != null && td.hasClass("tbdata")) { Elements Paragraphs = nextElement.select("p"); Iterator<Element> PaIt = Paragraphs.iterator(); ArrayList<String> titles = new ArrayList<String>(); ArrayList<String> values = new ArrayList<String>(); while (PaIt.hasNext()) { Element next = PaIt.next(); String[] information = crop(next.html()); if (information[1].length() > 0) { titles.add(information[0]); values.add(information[1]); } } Log.i(LOG_TAG, "Informationscraper working"); if (mPageAdapter != null) { Log.i(LOG_TAG, "InformationAdapter set"); mPageAdapter.setAdapter(new TwoLinesAdapter(context, titles, values)); } } } }
From source file:com.example.bibliotecauclm.net.ActualizadorListaLibros.java
private List<Libro> obtenerLibros(String usuario, String contrasena) throws Exception { if (!Utiles.isOnline(contexto)) return null; Document doc;/*from w w w. j av a2 s. co m*/ String res; List<Libro> resultado = new ArrayList<Libro>(); res = Utiles.obtenerLinkConexion(usuario, contrasena); if (res == null) return null; /*Para pruebas con gaseosa */ if (!debug) { doc = Jsoup.connect("https://catalogobiblioteca.uclm.es" + res + "?ACC=210").timeout(12000).get(); } else { File input = new File("/sdcard/html.htm"); doc = Jsoup.parse(input, "UTF-8"); } List<Element> Libros = doc.select("table").get(0).select("tr"); Libros.remove(0); for (Element ele : Libros) { List<Element> filas = ele.select("td"); Libro libro = new Libro(filas.get(0).text(), filas.get(1).text(), filas.get(2).text(), filas.get(3).text()); try { libro.setIdentificador(filas.get(4).select("input").first().attr("name").toString()); libro.setRenovar(true); } catch (NullPointerException ex) { libro.setRenovar(false); libro.setIdentificador(null); } resultado.add(libro); } return resultado; }
From source file:com.jp.miaulavirtual.DisplayMessageActivity.java
public void urlsToArray(Elements melem, Boolean isHome, Boolean comun) { Elements elem;/*w ww . ja v a2s. c o m*/ int i = 1; if (isHome) { elem = melem.select("td[headers=contents_name] a, td[headers=folders_name] a").not("[href*=/clubs/]"); //Nombre Asignaturas String !"Comunuidades" urls = new String[(elem.size()) + 1]; urls[0] = "/dotlrn/?page_num=" + panel; for (Element el : elem) { urls[i] = el.select("a").attr("href"); i++; } } else if (comun) { elem = melem.select( "td[headers=contents_name] a[href*=/clubs/], td[headers=folders_name] a[href*=/clubs/]"); //Nombre Asignaturas String "Comunuidades" urls = new String[elem.size() + 1]; urls[0] = onData.get(onData.size() - 2)[0]; for (Element el : elem) { urls[i] = el.select("a").attr("href"); i++; } } else { elem = melem.select("td[headers=contents_name] a[href], td[headers=folders_name] a[href]"); //Nombre Asignaturas String urls = new String[elem.size() + 1]; urls[0] = onData.get(onData.size() - 2)[0]; for (Element el : elem) { urls[i] = el.select("a").attr("href"); i++; } } Log.d("urlsToArray", String.valueOf(urls.length)); }
From source file:com.lloydtorres.stately.issues.IssueDecisionActivity.java
/** * Process the received page into the Issue and its IssueOptions * @param v Activity view/* w w w .j av a 2 s.c om*/ * @param d Document received from NationStates */ private void processIssueInfo(View v, Document d) { // First check if the issue is still available if (d.text().contains(NOT_AVAILABLE)) { mSwipeRefreshLayout.setRefreshing(false); SparkleHelper.makeSnackbar(v, String.format(Locale.US, getString(R.string.issue_unavailable), mNation.name)); return; } Element issueInfoContainer = d.select("div#dilemma").first(); if (issueInfoContainer == null) { // safety check mSwipeRefreshLayout.setRefreshing(false); SparkleHelper.makeSnackbar(v, getString(R.string.login_error_parsing)); return; } Elements issueInfoRaw = issueInfoContainer.children(); String issueText = issueInfoRaw.select("p").first().text(); // If this is an issue chain, grab the second paragraph instead if (d.select("div.dilemmachain").first() != null) { issueText = issueInfoRaw.select("p").get(1).text(); if (d.text().contains(STORY_SO_FAR)) { issueText = issueText + "<br><br>" + issueInfoRaw.select("p").get(2).text(); } } issue.content = issueText; issue.options = new ArrayList<IssueOption>(); Element optionHolderMain = issueInfoRaw.select("ol.diloptions").first(); if (optionHolderMain != null) { Elements optionsHolder = optionHolderMain.select("li"); int i = 0; for (Element option : optionsHolder) { IssueOption issueOption = new IssueOption(); issueOption.index = i++; Element button = option.select("button").first(); if (button != null) { issueOption.header = button.attr("name"); } else { issueOption.header = IssueOption.SELECTED_HEADER; } Element optionContentHolder = option.select("p").first(); if (optionContentHolder == null) { // safety check mSwipeRefreshLayout.setRefreshing(false); SparkleHelper.makeSnackbar(v, getString(R.string.login_error_parsing)); return; } issueOption.content = optionContentHolder.text(); issue.options.add(issueOption); } } IssueOption dismissOption = new IssueOption(); dismissOption.index = -1; dismissOption.header = IssueOption.DISMISS_HEADER; dismissOption.content = ""; issue.options.add(dismissOption); setRecyclerAdapter(issue); mSwipeRefreshLayout.setRefreshing(false); mSwipeRefreshLayout.setEnabled(false); }