Example usage for org.jsoup.nodes Element select

List of usage examples for org.jsoup.nodes Element select

Introduction

In this page you can find the example usage for org.jsoup.nodes Element select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:com.storm.function.GsxtFunction.java

private Map<String, Object> getHtmlInfoMapOfShandong(String area, HtmlPage firstInfoPage, String keyword,
        ChannelLogger LOGGER) throws Exception {
    Map<String, Object> resultHtmlMap = new LinkedHashMap<String, Object>();
    //DomElement checkcode = firstInfoPage.getElementById("checkNo");
    WebWindow window = firstInfoPage.getWebClient().getCurrentWindow();
    //???/* w ww . j  av a 2 s .  c  om*/
    HtmlElement divByXPathyzm = (HtmlElement) firstInfoPage
            .getFirstByXPath("//div[@class='input-center3']/font");
    if (divByXPathyzm != null) {
        resultHtmlMap.put("statusCodeDef", StatusCodeDef.IMAGECODE_ERROR);
    } else {
        HtmlElement divByXPath = ((HtmlElement) firstInfoPage.getFirstByXPath("//div[@class='list']"));
        //?
        if (divByXPath != null) {
            resultHtmlMap.put("statusCodeDef", StatusCodeDef.SCCCESS);
        } else {
            resultHtmlMap.put("statusCodeDef", StatusCodeDef.NO_DATA_FOUND);
        }
    }

    @SuppressWarnings("unchecked")
    List<HtmlAnchor> anchors = (List<HtmlAnchor>) firstInfoPage.getByXPath("//div[@class='list']/ul/li/a");
    LOGGER.info(anchors.toString());
    if (anchors != null && !anchors.isEmpty()) {
        boolean matchFlag = false;
        for (HtmlAnchor anchor : anchors) {
            String anchorTitle = anchor.getTextContent().toString().trim();

            if (anchorTitle.contains(keyword)) { //????   
                matchFlag = true;
                //???
                HtmlElement target_item_info = (HtmlElement) anchor.getParentNode().getParentNode();
                resultHtmlMap.put("target_item_info", target_item_info.asXml());

                Elements e1 = Jsoup.parseBodyFragment(target_item_info.asXml()).getElementsByClass("font16");
                Element element1 = e1.get(0);
                Element aElement = element1.select("a").get(0);
                //String hrefElement = getElementAttr(aElement, "href");
                String hrefElement = aElement.hasAttr("href") ? aElement.attr("href") : "";
                String encrpripid = hrefElement.substring(hrefElement.lastIndexOf("/") + 1);
                String enttype = hrefElement.split("/")[1];
                String gsurl = "http://218.57.139.24/pub/" + hrefElement;

                //System.out.println(hrefElement);
                String mystr = hrefElement.split("gsgsdetail")[1];
                //System.out.println(mystr);

                //?? ?->?
                //                  
                HtmlPage qyxx_page = anchor.click();
                if (!qyxx_page.asXml().contains("?")) {
                    return resultHtmlMap;
                }
                resultHtmlMap.put("qyxx_gsgsxx", qyxx_page.asXml());

                String[] command = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/getSimpleRequestPage.js",
                        "--web-security=no", "--url=" + gsurl };
                String qyxx_gsgsxx_djxx = CommandUtil.runCommand(command);
                resultHtmlMap.put("qyxx_gsgsxx_djxx", qyxx_gsgsxx_djxx);

                //? ?->?->??var url = webroot+"pub/gsczxx";
                WebRequest czxxWebRequest = new WebRequest(
                        new URL("http://218.57.139.24/pub/gsryxx/" + enttype + "?encrpripid=" + encrpripid),
                        HttpMethod.POST);
                DomElement csrfMetaEle = qyxx_page.getFirstByXPath("//meta[@name='_csrf']");
                String csrfToken = "";
                if (csrfMetaEle != null) {
                    csrfToken = csrfMetaEle.getAttribute("content");
                }
                czxxWebRequest.setAdditionalHeader("X-CSRF-TOKEN", csrfToken); //window, new WebRequest(new URL(
                Page zyryxxPage = qyxx_page.getWebClient().getPage(window, czxxWebRequest);
                resultHtmlMap.put("qyxx_gsgsxx_baxx_zyryxx", zyryxxPage.getWebResponse().getContentAsString());
                //System.out.println(zyryxxPage.getWebResponse().getContentAsString());

                //? ?->?->
                WebRequest czxxWebRequestfzjg = new WebRequest(
                        new URL("http://218.57.139.24/pub/gsfzjg/" + enttype + "?encrpripid=" + encrpripid),
                        HttpMethod.POST);
                czxxWebRequestfzjg.setAdditionalHeader("X-CSRF-TOKEN", csrfToken); //
                Page czxxPage = qyxx_page.getWebClient().getPage(window, czxxWebRequestfzjg);
                resultHtmlMap.put("qyxx_gsgsxx_baxx_fzjgxx", czxxPage.getWebResponse().getContentAsString());
                //System.out.println(czxxPage.getWebResponse().getContentAsString());

                //? ?->?
                WebRequest czxxWebRequestdcdydjxx = new WebRequest(
                        new URL("http://218.57.139.24/pub/gsdcdy?encrpripid=" + encrpripid), HttpMethod.POST);
                czxxWebRequestdcdydjxx.setAdditionalHeader("X-CSRF-TOKEN", csrfToken); //
                Page czxxPagedcdydjxx = qyxx_page.getWebClient().getPage(window, czxxWebRequestdcdydjxx);
                resultHtmlMap.put("qyxx_gsgsxx_dcdydjxx",
                        czxxPagedcdydjxx.getWebResponse().getContentAsString());
                //System.out.println(czxxPage.getWebResponse().getContentAsString());

                //? ?->??
                WebRequest czxxWebRequestgqczdjxx = new WebRequest(
                        new URL("http://218.57.139.24/pub/gsgqcz?encrpripid=" + encrpripid), HttpMethod.POST);
                czxxWebRequestgqczdjxx.setAdditionalHeader("X-CSRF-TOKEN", csrfToken); //
                Page czxxPagegqczdjxx = qyxx_page.getWebClient().getPage(window, czxxWebRequestdcdydjxx);
                resultHtmlMap.put("qyxx_gsgsxx_gqczdjxx",
                        czxxPagegqczdjxx.getWebResponse().getContentAsString());
                //System.out.println(czxxPage.getWebResponse().getContentAsString());

                //? ?->?
                WebRequest czxxWebRequestxzcfxx = new WebRequest(
                        new URL("http://218.57.139.24/pub/gsxzcfxx?encrpripid=" + encrpripid), HttpMethod.POST);
                czxxWebRequestxzcfxx.setAdditionalHeader("X-CSRF-TOKEN", csrfToken); //
                Page czxxPagegxzcfxx = qyxx_page.getWebClient().getPage(window, czxxWebRequestxzcfxx);
                resultHtmlMap.put("qyxx_gsgsxx_xzcfxx", czxxPagegxzcfxx.getWebResponse().getContentAsString());

                //? ?->???var url = webroot+"pub/jyyc/"+enttype;
                WebRequest czxxWebRequestjyjcxx = new WebRequest(
                        new URL("http://218.57.139.24/pub/jyyc/" + enttype + "?encrpripid=" + encrpripid),
                        HttpMethod.POST);
                czxxWebRequestjyjcxx.setAdditionalHeader("X-CSRF-TOKEN", csrfToken); //
                Page czxxPagegjyjcxx = qyxx_page.getWebClient().getPage(window, czxxWebRequestjyjcxx);
                resultHtmlMap.put("qyxx_gsgsxx_jyjcxx", czxxPagegjyjcxx.getWebResponse().getContentAsString());
                //System.out.println( czxxPagegjyjcxx.getWebResponse().getContentAsString());

                //? ?->???var url = webroot+"pub/yzwfqy";
                WebRequest czxxWebRequestyzwfxx = new WebRequest(
                        new URL("http://218.57.139.24/pub/yzwfqy?encrpripid=" + encrpripid), HttpMethod.POST);
                czxxWebRequestyzwfxx.setAdditionalHeader("X-CSRF-TOKEN", csrfToken); //
                Page czxxPagegyzwfxx = qyxx_page.getWebClient().getPage(window, czxxWebRequestyzwfxx);
                resultHtmlMap.put("qyxx_gsgsxx_yzwfxx", czxxPagegyzwfxx.getWebResponse().getContentAsString());

                //? ?->?   var url = webroot+"pub/ccjcxx";
                WebRequest czxxWebRequestcxjcxx = new WebRequest(
                        new URL("http://218.57.139.24/pub/ccjcxx?encrpripid=" + encrpripid), HttpMethod.POST);
                czxxWebRequestcxjcxx.setAdditionalHeader("X-CSRF-TOKEN", csrfToken); //
                Page czxxPagegcxjcxx = qyxx_page.getWebClient().getPage(window, czxxWebRequestyzwfxx);
                resultHtmlMap.put("qyxx_gsgsxx_cxjcxx", czxxPagegcxjcxx.getWebResponse().getContentAsString());

                //????
                //                  HtmlElement qyxx_qygsxx = (HtmlElement)qyxx_page.getByXPath("//div[@id='leftTabs']/ul/li").get(1);
                //                  HtmlPage qygsxx_page = (HtmlPage)qyxx_qygsxx.click();
                //                  resultHtmlMap.put("qygsxx", qygsxx_page.asXml());
                //                  System.out.println(qygsxx_page.asXml());
                //http://218.57.139.24/pub/qygsdetail/1100/2396ed6cd3e0e1a30bc8098cadaef458e48f827ea3353ac3b826876e37a1ca6f
                String gsgsxx_sfxzgsxx_url_hqqygsxx = "http://218.57.139.24/pub/qygsdetail" + mystr;
                HtmlPage qygsxx_page = firstInfoPage.getWebClient().getPage(window,
                        new WebRequest(new URL(gsgsxx_sfxzgsxx_url_hqqygsxx)));
                resultHtmlMap.put("qygsxx", qygsxx_page.asXml());

                //                  String[] command2 = {"casperjs", "/home/ubuntu/nfs-images/casperjscode/getSimpleRequestPage.js", "--web-security=no", "--url="+gsgsxx_sfxzgsxx_url_hqqygsxx}; 
                //                   String qygsxx = CommandUtil.runCommand(command2);
                //                   resultHtmlMap.put("qygsxx", qygsxx);
                //<a href="http://218.57.139.24/pub/nb/detail/1100/0453801D2A010583E05012AC9E011868" target="_blank">

                List<HtmlAnchor> anchors_detail = (List<HtmlAnchor>) qygsxx_page
                        .getByXPath("//div[@id='qiyenianbao']/table[@class='detailsList']/tbody/tr/td/a");
                List<String> nbxx_list = new ArrayList<String>();
                for (HtmlAnchor htmlAnchor : anchors_detail) {
                    String attribute = htmlAnchor.getAttribute("href");
                    String nburldetail = "http://218.57.139.24" + attribute;
                    String[] command2 = { "casperjs",
                            "/home/ubuntu/nfs-images/casperjscode/getSimpleRequestPage.js", "--web-security=no",
                            "--url=" + nburldetail };
                    String nianbaodetail = CommandUtil.runCommand(command2);
                    //HtmlPage nb_detail = htmlAnchor.click();
                    nbxx_list.add(nianbaodetail);
                }
                resultHtmlMap.put("qygsxx_qynb_detail", nbxx_list);

                //??->??->??      var url = webroot+"pub/qygsjsxxxzczxx";   
                WebRequest czxxWebRequestgdjczxx = new WebRequest(
                        new URL("http://218.57.139.24/pub/qygsjsxxxzczxx?encrpripid=" + encrpripid),
                        HttpMethod.POST);
                DomElement csrfMetaEle2 = qygsxx_page.getFirstByXPath("//meta[@name='_csrf']");
                String csrfToken2 = "";
                if (csrfMetaEle2 != null) {
                    csrfToken2 = csrfMetaEle.getAttribute("content");
                }
                czxxWebRequestgdjczxx.setAdditionalHeader("X-CSRF-TOKEN", csrfToken2); //
                Page czxxPageggdjczxx = qyxx_page.getWebClient().getPage(window, czxxWebRequestgdjczxx);
                resultHtmlMap.put("qyxx_qygsxx_gdjczxx",
                        czxxPageggdjczxx.getWebResponse().getContentAsString());

                //??->??->??      var url = webroot+"pub/qygsjsxxczxxbgsx";
                WebRequest czxxWebRequestbgxx = new WebRequest(
                        new URL("http://218.57.139.24/pub/qygsjsxxczxxbgsx?encrpripid=" + encrpripid),
                        HttpMethod.POST);
                czxxWebRequestbgxx.setAdditionalHeader("X-CSRF-TOKEN", csrfToken2); //
                Page czxxPagegbgxx = qyxx_page.getWebClient().getPage(window, czxxWebRequestgdjczxx);
                resultHtmlMap.put("qyxx_qygsxx_gdjczxx_bgxx",
                        czxxPagegbgxx.getWebResponse().getContentAsString());

                //??->???      var url = webroot+"pub/qygsJsxxgqbg";
                WebRequest czxxWebRequestgqbgxx = new WebRequest(
                        new URL("http://218.57.139.24/pub/qygsJsxxgqbg?encrpripid=" + encrpripid),
                        HttpMethod.POST);
                czxxWebRequestgqbgxx.setAdditionalHeader("X-CSRF-TOKEN", csrfToken2); //
                Page czxxPageggqbgxx = qyxx_page.getWebClient().getPage(window, czxxWebRequestgqbgxx);
                resultHtmlMap.put("qyxx_qygsxx_gqbgxx", czxxPageggqbgxx.getWebResponse().getContentAsString());

                //??->??      var url = webroot+"pub/qygsjsxxxzxk";
                WebRequest czxxWebRequestxzxkxx = new WebRequest(
                        new URL("http://218.57.139.24/pub/qygsjsxxxzxk?encrpripid=" + encrpripid),
                        HttpMethod.POST);
                czxxWebRequestxzxkxx.setAdditionalHeader("X-CSRF-TOKEN", csrfToken2); //
                Page czxxPagegxzxkxx = qyxx_page.getWebClient().getPage(window, czxxWebRequestxzxkxx);
                resultHtmlMap.put("qyxx_qygsxx_xzxkxx", czxxPagegxzxkxx.getWebResponse().getContentAsString());

                //??->??      var url = webroot+"pub/qygsjsxxzscqcz";
                WebRequest czxxWebRequestzscqczdjxx = new WebRequest(
                        new URL("http://218.57.139.24/pub/qygsjsxxzscqcz?encrpripid=" + encrpripid),
                        HttpMethod.POST);
                czxxWebRequestzscqczdjxx.setAdditionalHeader("X-CSRF-TOKEN", csrfToken2); //
                Page czxxPagegzscqczdjxx = qyxx_page.getWebClient().getPage(window, czxxWebRequestzscqczdjxx);
                resultHtmlMap.put("qyxx_qygsxx_zscqczdjxx",
                        czxxPagegzscqczdjxx.getWebResponse().getContentAsString());

                //??->?   var url = webroot+"pub/qygsjsxxxzcfxx";
                WebRequest czxxWebRequestxzcfxxx = new WebRequest(
                        new URL("http://218.57.139.24/pub/qygsjsxxxzcfxx?encrpripid=" + encrpripid),
                        HttpMethod.POST);
                czxxWebRequestxzcfxxx.setAdditionalHeader("X-CSRF-TOKEN", csrfToken2); //
                Page czxxPagegxzcfxxx = qyxx_page.getWebClient().getPage(window, czxxWebRequestxzcfxxx);
                resultHtmlMap.put("qyxx_qygsxx_xzcfxxx",
                        czxxPagegxzcfxxx.getWebResponse().getContentAsString());

                //???
                String gsgsxx_sfxzgsxx_url_hqqtbmgsxx = "http://218.57.139.24/pub/qtgsdetail" + mystr;
                HtmlPage qtbmgsxx_page = firstInfoPage.getWebClient().getPage(window,
                        new WebRequest(new URL(gsgsxx_sfxzgsxx_url_hqqtbmgsxx)));
                resultHtmlMap.put("qtbmgsxx", qtbmgsxx_page.asXml());

                //                  String[] command3 = {"casperjs", "/home/ubuntu/nfs-images/casperjscode/getSimpleRequestPage.js", "--web-security=no", "--url="+gsgsxx_sfxzgsxx_url_hqqtbmgsxx}; 
                //                   String qtbmgsxx = CommandUtil.runCommand(command3);
                //                   resultHtmlMap.put("qygsxx", qtbmgsxx);

                //??????
                //http://218.57.139.24/pub/sfgsdetail/1130/95f6c493f094da93009e08daa27616d8
                //String gsgsxx_sfxzgsxx_url = "http://218.57.139.24/pub/sfgsdetail"+mystr ;
                //HtmlPage gsgsxx_baxx_zyryxx_page = firstInfoPage.getWebClient().getPage(gsgsxx_sfxzgsxx_url);
                //resultHtmlMap.put("gsgsxx_baxx_zyryxx", gsgsxx_baxx_zyryxx_page.asXml());

                //??????
                //HtmlElement sfxzgsxx_tab = (HtmlElement)qyxx_page.getByXPath("//div[@id='leftTabs']/ul/li").get(3);
                String gsgsxx_sfxzgsxx_url = "http://218.57.139.24/pub/sfgsdetail" + mystr;
                HtmlPage sfxzgsxx_page = firstInfoPage.getWebClient().getPage(window,
                        new WebRequest(new URL(gsgsxx_sfxzgsxx_url)));
                resultHtmlMap.put("sfxzgsxx_page", sfxzgsxx_page.asXml());

                //                  String[] command4 = {"casperjs", "/home/ubuntu/nfs-images/casperjscode/getSimpleRequestPage.js", "--web-security=no", "--url="+gsgsxx_sfxzgsxx_url}; 
                //                   String sfxzgsxx = CommandUtil.runCommand(command4);
                //                   resultHtmlMap.put("sfxzgsxx_page", sfxzgsxx);

                //http://218.57.139.24/pub/sfgsgqxxdetail/95f6c493f094da93009e08daa27616d8/1130/12D1EA5D6111126BE054/1
                String mystrdetil = "";
                if (mystr != null && !"".equals(mystr)) {
                    String mystrspill[] = mystr.split("/");
                    mystrdetil = "/" + mystrspill[2] + "/" + mystrspill[1];
                }
                String mystrdetilurl = "http://218.57.139.24/pub/sfgsgqxxdetail" + mystrdetil;
                String urlstring = sfxzgsxx_page.asXml();
                List<String> sfxzxxlist = new AbstractParser() {
                }.getSubStringByRegex(urlstring, "var gqxxliststr ='\\[.*\\]");

                String sfxzgsxxzzfc = sfxzxxlist.get(0).substring(19, sfxzxxlist.get(0).length() - 1);
                if (!"".equals(sfxzgsxxzzfc) && null != sfxzgsxxzzfc) {
                    String[] sfxzgsxxzzfcsplil = sfxzgsxxzzfc.split(",");
                    List<String> urllist = new ArrayList<String>();
                    String num = "";
                    for (int m = 0; m < sfxzgsxxzzfcsplil.length; m++) {
                        String strname = sfxzgsxxzzfcsplil[m];
                        String strname1[] = strname.split(":");
                        String myname = strname1[0];
                        String strna = strname1[1];
                        String namesss = "";
                        if ("\"frozstate\"".equals(myname)) {
                            // ?
                            namesss = strna.substring(1, strna.length() - 1);
                            num = namesss;
                        }
                        if ("\"pid\"".equals(myname)) {
                            // ???
                            namesss = strna.substring(1, strna.length() - 1);
                            mystrdetilurl = mystrdetilurl + "/" + namesss;
                            mystrdetilurl = mystrdetilurl + "/" + num;
                            urllist.add(mystrdetilurl);
                        }

                    }
                    List<String> gqdjxx_list = new ArrayList<String>();
                    for (int i = 0; i < urllist.size(); i++) {
                        HtmlPage sfxzgsxx_page_detail = firstInfoPage.getWebClient().getPage(window,
                                new WebRequest(new URL(urllist.get(i))));
                        gqdjxx_list.add(sfxzgsxx_page_detail.asXml());
                    }
                    resultHtmlMap.put("sfxzgsxx_gqdjxx_detail", gqdjxx_list);
                }
                break;
            }
        }
        if (!matchFlag) {
            resultHtmlMap.put("statusCodeDef", StatusCodeDef.NO_DATA_FOUND);
            LOGGER.info("????");
        }
    }
    return resultHtmlMap;
}

From source file:com.storm.function.GsxtFunction.java

private Map<String, Object> getHtmlInfoMapOfJilin(String area, String keyword, ChannelLogger LOGGER)
        throws Exception {

    Map<String, Object> resultHtmlMap = new LinkedHashMap<String, Object>();

    String[] command = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/jilin.js", "--web-security=no",
            "--keyword=" + keyword };
    String casperjsResult = CommandUtil.runCommand(command);

    Elements divDataItems = Jsoup.parse(casperjsResult).getElementsByClass("list");
    Elements divNoDataItems = Jsoup.parse(casperjsResult).getElementsByClass("list-a");

    if (divDataItems.isEmpty() && !divNoDataItems.isEmpty()) { // ?
        resultHtmlMap.put("statusCodeDef", StatusCodeDef.NO_DATA_FOUND);
    } else if (divDataItems.isEmpty() && divDataItems.isEmpty()) { // ??
        // ????//from   w w  w . j  a v  a2s . com
        if (casperjsResult.contains("")) {
            resultHtmlMap.put("statusCodeDef", StatusCodeDef.IMAGECODE_ERROR);
        } else {
            resultHtmlMap.put("statusCodeDef", StatusCodeDef.FAILURE);
        }
    } else if (!divDataItems.isEmpty() && divNoDataItems.isEmpty()) { // ?
        // ???????
        Element nowCookies = Jsoup.parse(casperjsResult).getElementById("nextParams");
        Elements tokenEts = Jsoup.parse(casperjsResult).getElementsByAttributeValue("name", "_csrf");
        if (null == nowCookies || null == tokenEts || tokenEts.isEmpty()) {
            resultHtmlMap.put("statusCodeDef", StatusCodeDef.COOKIE_ERROR);
            return resultHtmlMap;
        }
        String nowCookiesJson = nowCookies.text().trim();
        String nowCookiesStr = ((String) new GsonBuilder().create().fromJson(nowCookiesJson, Map.class)
                .get("Cookie")).trim();
        String tokenStr = tokenEts.get(0).attr("content");
        String HOST_OF_JILIN = "http://211.141.74.198:8081/aiccips/pub/";
        String HOST_OF_XQ = "http://211.141.74.198:8081/";
        String htmlAnchorHref = "";
        for (Element divDataItem : divDataItems) {
            Element htmlAnchor = divDataItem.getElementsByTag("a").get(0);
            String htmlAnchorText = htmlAnchor.text();
            if (htmlAnchorText.contains(keyword)) {
                htmlAnchorHref = HOST_OF_JILIN + htmlAnchor.attr("href");
                break;
            }
        }
        if (StringUtils.isEmpty(htmlAnchorHref)) {
            htmlAnchorHref = "http://211.141.74.198:8081/aiccips/pub/"
                    + divDataItems.get(0).getElementsByTag("a").get(0).attr("href");
        }
        String commonUrl = htmlAnchorHref.split("gsgsdetail")[1];
        String commonUrlZ = htmlAnchorHref.substring(htmlAnchorHref.lastIndexOf("/") + 1,
                htmlAnchorHref.length());

        // ?->?
        String[] command11 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/getSimpleRequestPage.js",
                "--web-security=no", "--url=" + htmlAnchorHref };
        String casperjsResult11 = CommandUtil.runCommand(command11);
        resultHtmlMap.put("gsgsxx", casperjsResult11);
        Thread.sleep(1000);

        // ?->?->??
        String baxxZyryxxUrl = HOST_OF_JILIN + "gsryxx/1151?encrpripid=" + commonUrlZ;
        String[] command121 = { "casperjs",
                "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no",
                "--cookieStr=" + nowCookiesStr, "--refererStr=" + htmlAnchorHref, "--tokenStr=" + tokenStr,
                "--url=" + baxxZyryxxUrl };
        String casperjsResult121 = CommandUtil.runCommand(command121);
        resultHtmlMap.put("gsgsxx_baxx_zyryxx", casperjsResult121);

        // ?->?->?
        String baxxFzjgxxUrl = HOST_OF_JILIN + "gsfzjg/1151?encrpripid=" + commonUrlZ;
        String[] command123 = { "casperjs",
                "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no",
                "--cookieStr=" + nowCookiesStr, "--refererStr=" + htmlAnchorHref, "--tokenStr=" + tokenStr,
                "--url=" + baxxFzjgxxUrl };
        String casperjsResult123 = CommandUtil.runCommand(command123);
        resultHtmlMap.put("gsgsxx_baxx_fzjgxx", casperjsResult123);

        // ?->?->?
        String dcdydjxxDcdydjxxUrl = HOST_OF_JILIN + "gsdcdy?encrpripid=" + commonUrlZ;
        String[] command131 = { "casperjs",
                "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no",
                "--cookieStr=" + nowCookiesStr, "--refererStr=" + htmlAnchorHref, "--tokenStr=" + tokenStr,
                "--url=" + dcdydjxxDcdydjxxUrl };
        String casperjsResult131 = CommandUtil.runCommand(command131);
        resultHtmlMap.put("gsgsxx_dcdydjxx_dcdydjxx", casperjsResult131);

        // ?->??->??
        String gqczdjxxGqczdjxxUrl = HOST_OF_JILIN + "gsgqcz?encrpripid=" + commonUrlZ;
        String[] command141 = { "casperjs",
                "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no",
                "--cookieStr=" + nowCookiesStr, "--refererStr=" + htmlAnchorHref, "--tokenStr=" + tokenStr,
                "--url=" + gqczdjxxGqczdjxxUrl };
        String casperjsResult141 = CommandUtil.runCommand(command141);
        resultHtmlMap.put("gsgsxx_gqczdjxx_gqczdjxx", casperjsResult141);

        // ?->?->?
        String xzcfxxXzcfxxUrl = HOST_OF_JILIN + "gsxzcfxx?encrpripid=" + commonUrlZ;
        String[] command151 = { "casperjs",
                "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no",
                "--cookieStr=" + nowCookiesStr, "--refererStr=" + htmlAnchorHref, "--tokenStr=" + tokenStr,
                "--url=" + xzcfxxXzcfxxUrl };
        String casperjsResult151 = CommandUtil.runCommand(command151);
        resultHtmlMap.put("gsgsxx_xzcfxx_xzcfxx", casperjsResult151);

        // ?->???->???
        String jyycxxJyycxxUrl = HOST_OF_JILIN + "jyyc/1151?encrpripid=" + commonUrlZ;
        String[] command161 = { "casperjs",
                "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no",
                "--cookieStr=" + nowCookiesStr, "--refererStr=" + htmlAnchorHref, "--tokenStr=" + tokenStr,
                "--url=" + jyycxxJyycxxUrl };
        String casperjsResult161 = CommandUtil.runCommand(command161);
        resultHtmlMap.put("gsgsxx_jyycxx_jyycxx", casperjsResult161);

        // ?->???->???
        String yzwfxxYzwfxxUrl = HOST_OF_JILIN + "yzwfqy?encrpripid=" + commonUrlZ;
        String[] command171 = { "casperjs",
                "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no",
                "--cookieStr=" + nowCookiesStr, "--refererStr=" + htmlAnchorHref, "--tokenStr=" + tokenStr,
                "--url=" + yzwfxxYzwfxxUrl };
        String casperjsResult171 = CommandUtil.runCommand(command171);
        resultHtmlMap.put("gsgsxx_yzwfxx_yzwfxx", casperjsResult171);

        // ?->?->?
        String ccjcxxCcjcxxUrl = HOST_OF_JILIN + "ccjcxx?encrpripid=" + commonUrlZ;
        String[] command181 = { "casperjs",
                "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no",
                "--cookieStr=" + nowCookiesStr, "--refererStr=" + htmlAnchorHref, "--tokenStr=" + tokenStr,
                "--url=" + ccjcxxCcjcxxUrl };
        String casperjsResult181 = CommandUtil.runCommand(command181);
        resultHtmlMap.put("gsgsxx_ccjcxx_ccjcxx", casperjsResult181);

        // ??
        String qygsUrl = HOST_OF_JILIN + "qygsdetail" + commonUrl;
        String[] command2 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/getSimpleRequestPage.js",
                "--web-security=no", "--url=" + qygsUrl };
        String casperjsResult2 = CommandUtil.runCommand(command2);
        resultHtmlMap.put("qygsxx_list", casperjsResult2);

        // ? ??->?->
        Document qygsxxHtml = Jsoup.parseBodyFragment(casperjsResult2);
        Element qynbDiv = qygsxxHtml.getElementById("qiyenianbao");
        if (null != qynbDiv) {
            Elements qynb_trs = qynbDiv.select("tbody").get(0).select("tr");
            if (null != qynb_trs && qynb_trs.size() > 2) {
                List<Map<String, Object>> qygsxx_qynb_infos = new ArrayList<Map<String, Object>>();
                for (int i = 2; i < qynb_trs.size(); i++) {
                    Map<String, Object> qygsxx_qynb_info_map = new LinkedHashMap<String, Object>();
                    Element wdd = qynb_trs.get(i).select("td").get(1).select("a").get(0);
                    String qygsxx_qynb_list_a_text = wdd.text();
                    String qygsxx_qynb_list_pubdate = qynb_trs.get(i).select("td").get(2).text();
                    qygsxx_qynb_info_map.put("qygsxx_qynb_list_a_text", qygsxx_qynb_list_a_text);
                    qygsxx_qynb_info_map.put("qygsxx_qynb_list_pubdate", qygsxx_qynb_list_pubdate);
                    String qynbxqUrl = HOST_OF_XQ + wdd.attr("href");
                    String[] command21 = { "casperjs",
                            "/home/ubuntu/nfs-images/casperjscode/getSimpleRequestPage.js", "--web-security=no",
                            "--url=" + qynbxqUrl };
                    String casperjsResult21 = CommandUtil.runCommand(command21);
                    qygsxx_qynb_info_map.put("qygsxx_qynb_info_page", casperjsResult21);
                    qygsxx_qynb_infos.add(qygsxx_qynb_info_map);
                }
                resultHtmlMap.put("qygsxx_qynb_infos", qygsxx_qynb_infos);
            }
        }
        Thread.sleep(1000);

        // ??->??->??
        String gdjczxxGdjczxxUrl = HOST_OF_JILIN + "qygsjsxxxzczxx?encrpripid=" + commonUrlZ;
        String[] command221 = { "casperjs",
                "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no",
                "--cookieStr=" + nowCookiesStr, "--refererStr=" + qygsUrl, "--tokenStr=" + tokenStr,
                "--url=" + gdjczxxGdjczxxUrl };
        String casperjsResult221 = CommandUtil.runCommand(command221);
        resultHtmlMap.put("qygsxx_gdjczxx_gdjczxx", casperjsResult221);

        // ??->??->??
        String gdjczxxBgxxUrl = HOST_OF_JILIN + "qygsjsxxczxxbgsx?encrpripid=" + commonUrlZ;
        String[] command222 = { "casperjs",
                "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no",
                "--cookieStr=" + nowCookiesStr, "--refererStr=" + qygsUrl, "--tokenStr=" + tokenStr,
                "--url=" + gdjczxxBgxxUrl };
        String casperjsResult222 = CommandUtil.runCommand(command222);
        resultHtmlMap.put("qygsxx_gdjczxx_bgxx", casperjsResult222);

        // ??->???->???
        String gqbgxxGqbgxxUrl = HOST_OF_JILIN + "qygsJsxxgqbg?encrpripid=" + commonUrlZ;
        String[] command231 = { "casperjs",
                "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no",
                "--cookieStr=" + nowCookiesStr, "--refererStr=" + qygsUrl, "--tokenStr=" + tokenStr,
                "--url=" + gqbgxxGqbgxxUrl };
        String casperjsResult231 = CommandUtil.runCommand(command231);
        resultHtmlMap.put("qygsxx_gqbgxx_gqbgxx", casperjsResult231);

        // ??->??->??
        String xzxkxxXzxkxxUrl = HOST_OF_JILIN + "qygsjsxxxzxk?encrpripid=" + commonUrlZ;
        String[] command241 = { "casperjs",
                "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no",
                "--cookieStr=" + nowCookiesStr, "--refererStr=" + qygsUrl, "--tokenStr=" + tokenStr,
                "--url=" + xzxkxxXzxkxxUrl };
        String casperjsResult241 = CommandUtil.runCommand(command241);
        resultHtmlMap.put("qygsxx_xzxkxx_xzxkxx", casperjsResult241);

        // ??->??->??
        String zscqczZscqczUrl = HOST_OF_JILIN + "/qygsjsxxzscqcz?encrpripid=" + commonUrlZ;
        String[] command251 = { "casperjs",
                "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no",
                "--cookieStr=" + nowCookiesStr, "--refererStr=" + qygsUrl, "--tokenStr=" + tokenStr,
                "--url=" + zscqczZscqczUrl };
        String casperjsResult251 = CommandUtil.runCommand(command251);
        resultHtmlMap.put("qygsxx_zscqcz_zscqcz", casperjsResult251);

        // ??->?->?
        String qygsxxXzcfxxUrl = HOST_OF_JILIN + "qygsjsxxxzcfxx?encrpripid=" + commonUrlZ;
        String[] command261 = { "casperjs",
                "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no",
                "--cookieStr=" + nowCookiesStr, "--refererStr=" + qygsUrl, "--tokenStr=" + tokenStr,
                "--url=" + qygsxxXzcfxxUrl };
        String casperjsResult261 = CommandUtil.runCommand(command261);
        resultHtmlMap.put("qygsxx_zscqcz_zscqcz", casperjsResult261);

        // ?
        String qtbmUrl = HOST_OF_JILIN + "qtgsdetail" + commonUrl;
        String[] command3 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/getSimpleRequestPage.js",
                "--web-security=no", "--url=" + qtbmUrl };
        String casperjsResult3 = CommandUtil.runCommand(command3);
        resultHtmlMap.put("qtbmgsxx", casperjsResult3);

        // ????
        String sfxzUrl = HOST_OF_JILIN + "sfgsdetail" + commonUrl;
        String[] command4 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/getSimpleRequestPage.js",
                "--web-security=no", "--url=" + sfxzUrl };
        String casperjsResult4 = CommandUtil.runCommand(command4);
        resultHtmlMap.put("sfxzgsxx_list", casperjsResult4);

        resultHtmlMap.put("statusCodeDef", StatusCodeDef.SCCCESS);

    }

    return resultHtmlMap;

}

From source file:com.storm.function.GsxtFunction.java

private Map<String, Object> getHtmlInfoMapOfTianjin(String area, HtmlPage firstInfoPage, String keyword,
        ChannelLogger LOGGER) throws Exception {
    Map<String, Object> resultHtmlMap = new HashMap<String, Object>();
    //????//from   www .j a  va 2  s. co  m
    //      System.out.println(firstInfoPage.asXml());
    WebWindow window = firstInfoPage.getWebClient().getCurrentWindow();
    @SuppressWarnings("unchecked")
    List<HtmlAnchor> divByXPath = (List<HtmlAnchor>) firstInfoPage.getByXPath("//div[@class='result-item']");
    HtmlElement firstByXPath = ((HtmlElement) firstInfoPage
            .getFirstByXPath("//div[@class='content']/div[@style='font-size:12px']"));
    if (divByXPath.size() == 0 && firstByXPath == null) {
        resultHtmlMap.put("statusCodeDef", StatusCodeDef.IMAGECODE_ERROR);
    } else {
        if (firstByXPath != null) {
            String textContent = firstByXPath.getTextContent();
            if (textContent.indexOf("? 0 ?") > 0) {
                resultHtmlMap.put("statusCodeDef", StatusCodeDef.NO_DATA_FOUND);
            } else {
                resultHtmlMap.put("statusCodeDef", StatusCodeDef.SCCCESS);
            }
        } else {
            resultHtmlMap.put("statusCodeDef", StatusCodeDef.SCCCESS);
        }
    }
    @SuppressWarnings("unchecked")
    List<HtmlAnchor> anchors = (List<HtmlAnchor>) firstInfoPage.getByXPath("//div[@class='result-item']/div/a");
    LOGGER.info(anchors.toString());
    if (anchors != null && !anchors.isEmpty()) {
        boolean matchFlag = false;
        for (HtmlAnchor anchor : anchors) {
            String anchorTitle = anchor.getTextContent().toString().trim();
            if (anchorTitle.contains(keyword)) { //????
                matchFlag = true;
                //???
                HtmlElement target_item_info = (HtmlElement) anchor.getParentNode().getParentNode();
                resultHtmlMap.put("target_item_info", target_item_info.asXml());

                //*****************?    *****************
                //?? ?->?->?
                //?entId
                //?
                String ent_id = anchor.getAttribute("href");
                if (!StringUtils.isEmpty(ent_id)) {
                    ent_id = ent_id.split("=")[1];
                }
                if (!StringUtils.isEmpty(ent_id)) {
                    /*HtmlPage gsgsxx_djxx_jbxx = anchor.click();
                    Thread.sleep(3000);
                    resultHtmlMap.put("gsgsxx_djxx_jbxx", gsgsxx_djxx_jbxx.asXml());
                    */
                    String gsgsxx_djxx_jbxx_url = "http://tjcredit.gov.cn/platform/saic/baseInfo.json?entId="
                            + ent_id + "&departmentId=scjgw&infoClassId=dj";
                    Page gsgsxx_djxx_jbxx = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(gsgsxx_djxx_jbxx_url)));
                    resultHtmlMap.put("gsgsxx_djxx_jbxx",
                            gsgsxx_djxx_jbxx.getWebResponse().getContentAsString("utf-8"));

                    //?? ?->?->?->?->
                    Document qygsxxHtml = Jsoup
                            .parseBodyFragment(gsgsxx_djxx_jbxx.getWebResponse().getContentAsString("utf-8"));
                    if (qygsxxHtml != null) {
                        Element qynbDiv = qygsxxHtml.getElementById("touziren");
                        if (qynbDiv != null) {
                            Elements qynb_trs = qynbDiv.select("tbody").select("tr").select("td").select("a");
                            if (qynb_trs.size() != 0) {
                                List<String> list = new ArrayList<String>();
                                for (int i = 0; i < qynb_trs.size(); i++) {
                                    //System.out.println(qynb_trs.get(i).toString());
                                    //System.out.println(qynb_trs.get(i).toString().split("\\(\\'")[1].split("\\'\\)")[0].split("\\'\\,\\'")[0]);
                                    if (qynb_trs.get(i).toString() != null
                                            && qynb_trs.get(i).toString().split("\\(\\'")[1].split("\\'\\)")[0]
                                                    .split("\\'\\,\\'")[0] != null) {
                                        String gsgsxx_djxx_gdxx_detail_url = "http://tjcredit.gov.cn/saicpf/gsgdcz?gdczid="
                                                + qynb_trs.get(i).toString().split("\\(\\'")[1]
                                                        .split("\\'\\)")[0].split("\\'\\,\\'")[0]
                                                + "&entid=" + ent_id + "&issaic=1&hasInfo=0";
                                        Page gsgsxx_djxx_gdxx_detail = firstInfoPage.getWebClient().getPage(
                                                window, new WebRequest(new URL(gsgsxx_djxx_gdxx_detail_url)));
                                        list.add(gsgsxx_djxx_gdxx_detail.getWebResponse()
                                                .getContentAsString("utf-8"));
                                    }
                                }
                                if (list.size() != 0) {
                                    resultHtmlMap.put("gsgsxx_djxx_gdxx", list);
                                }
                            }
                        }
                    }

                    //? ?->?->?? 
                    String gsgsxx_baxx_zyryxx_url = "http://tjcredit.gov.cn/platform/saic/baseInfo.json?entId="
                            + ent_id + "&departmentId=scjgw&infoClassId=ba";
                    Page gsgsxx_baxx_zyryxx = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(gsgsxx_baxx_zyryxx_url)));
                    resultHtmlMap.put("gsgsxx_baxx_zyryxx",
                            gsgsxx_baxx_zyryxx.getWebResponse().getContentAsString("utf-8"));

                    //? ?->?->?
                    String gsgsxx_dcdydjxx_dcdydjxx_url = "http://tjcredit.gov.cn/platform/saic/baseInfo.json?entId="
                            + ent_id + "&departmentId=scjgw&infoClassId=dcdydjxx";
                    Page gsgsxx_dcdydjxx_dcdydjxx = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(gsgsxx_dcdydjxx_dcdydjxx_url)));
                    resultHtmlMap.put("gsgsxx_dcdydjxx_dcdydjxx",
                            gsgsxx_dcdydjxx_dcdydjxx.getWebResponse().getContentAsString("utf-8"));

                    //? ?->??->??
                    String gsgsxx_gqczdjxx_gqczdjxx_url = "http://tjcredit.gov.cn/platform/saic/baseInfo.json?entId="
                            + ent_id + "&departmentId=scjgw&infoClassId=gqczdjxx";
                    Page gsgsxx_gqczdjxx_gqczdjxx = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(gsgsxx_gqczdjxx_gqczdjxx_url)));
                    resultHtmlMap.put("gsgsxx_gqczdjxx_gqczdjxx",
                            gsgsxx_gqczdjxx_gqczdjxx.getWebResponse().getContentAsString("utf-8"));

                    //? ?->?->?
                    String gsgsxx_xzcfxx_xzcfxx_url = "http://tjcredit.gov.cn/platform/saic/baseInfo.json?entId="
                            + ent_id + "&departmentId=scjgw&infoClassId=xzcf";
                    Page gsgsxx_xzcfxx_xzcfxx = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(gsgsxx_xzcfxx_xzcfxx_url)));
                    resultHtmlMap.put("gsgsxx_xzcfxx_xzcfxx",
                            gsgsxx_xzcfxx_xzcfxx.getWebResponse().getContentAsString("utf-8"));

                    //
                    Document xzcfxxHtml = Jsoup.parseBodyFragment(
                            gsgsxx_xzcfxx_xzcfxx.getWebResponse().getContentAsString("utf-8"));
                    if (xzcfxxHtml != null) {
                        Elements qynbDiv = xzcfxxHtml.getElementsByClass("result-table");
                        if (qynbDiv != null && qynbDiv.size() != 0) {
                            Elements qynb_trs = qynbDiv.get(0).select("tbody").select("tr").select("td")
                                    .select("a");
                            if (qynb_trs.size() != 0) {
                                List<String> list = new ArrayList<String>();
                                for (int i = 0; i < qynb_trs.size(); i++) {
                                    if (qynb_trs.get(i).toString() != null
                                            && qynb_trs.get(i).toString().split("\\(\\'")[1].split("\\'\\)")[0]
                                                    .split("\\'\\,\\'")[0] != null) {
                                        String gsgsxx_djxx_gdxx_detail_url = "http://tjcredit.gov.cn/saicpf/gsxzcf?id="
                                                + qynb_trs.get(i).toString().split("\\(\\'")[1]
                                                        .split("\\'\\)")[0].split("\\'\\,\\'")[0]
                                                + "&entid=" + ent_id + "&issaic=1&hasInfo=0";
                                        Page gsgsxx_djxx_gdxx_detail = firstInfoPage.getWebClient().getPage(
                                                window, new WebRequest(new URL(gsgsxx_djxx_gdxx_detail_url)));
                                        list.add(gsgsxx_djxx_gdxx_detail.getWebResponse()
                                                .getContentAsString("utf-8"));
                                    }
                                }
                                if (list.size() != 0) {
                                    resultHtmlMap.put("gsgsxx_xzcfxx_detail", list);
                                }
                            }
                        }
                    }

                    //? ?->???->???
                    String gsgsxx_jyycxx_jyycxx_url = "http://tjcredit.gov.cn/platform/saic/baseInfo.json?entId="
                            + ent_id + "&departmentId=scjgw&infoClassId=qyjyycmlxx";
                    Page gsgsxx_jyycxx_jyycxx = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(gsgsxx_jyycxx_jyycxx_url)));
                    resultHtmlMap.put("gsgsxx_jyycxx_jyycxx",
                            gsgsxx_jyycxx_jyycxx.getWebResponse().getContentAsString("utf-8"));

                    //? ?->???->???
                    String gsgsxx_yzwfxx_yzwfxx_url = "http://tjcredit.gov.cn/platform/saic/baseInfo.json?entId="
                            + ent_id + "&departmentId=scjgw&infoClassId=yzwfqyxx";
                    Page gsgsxx_yzwfxx_yzwfxx = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(gsgsxx_yzwfxx_yzwfxx_url)));
                    resultHtmlMap.put("gsgsxx_yzwfxx_yzwfxx",
                            gsgsxx_yzwfxx_yzwfxx.getWebResponse().getContentAsString("utf-8"));

                    //? ?->?->?
                    String gsgsxx_ccjcxx_ccjcxx_url = "http://tjcredit.gov.cn/platform/saic/baseInfo.json?entId="
                            + ent_id + "&departmentId=scjgw&infoClassId=ccjcxx";
                    Page gsgsxx_ccjcxx_ccjcxx = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(gsgsxx_ccjcxx_ccjcxx_url)));
                    resultHtmlMap.put("gsgsxx_ccjcxx_ccjcxx",
                            gsgsxx_ccjcxx_ccjcxx.getWebResponse().getContentAsString("utf-8"));

                    //*****************?   ?*****************
                    //*****************??   *****************

                    //? ??->?-> 
                    String qygsxx_qynb_list_url = "http://tjcredit.gov.cn/report/nblist?entid=" + ent_id;
                    Page qygsxx_qynb_list = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(qygsxx_qynb_list_url)));
                    resultHtmlMap.put("qygsxx_qynb_list",
                            qygsxx_qynb_list.getWebResponse().getContentAsString("utf-8"));
                    //? ??->?->->

                    //?
                    Document qynbHtml = Jsoup
                            .parseBodyFragment(qygsxx_qynb_list.getWebResponse().getContentAsString("utf-8"));
                    if (qynbHtml != null) {
                        Element qynbDiv = qynbHtml.getElementById("touziren");
                        if (qynbDiv != null) {
                            Elements qynb_trs = qynbDiv.select("tbody").select("tr").select("td").select("a");
                            if (qynb_trs.size() != 0) {
                                List<String> list = new ArrayList<String>();
                                for (int i = 0; i < qynb_trs.size(); i++) {
                                    //System.out.println(qynb_trs.get(i).toString());
                                    //System.out.println(qynb_trs.get(i).toString().split("\\(\\'")[1].split("\\'\\)")[0].split("\\'\\,\\'")[0]);
                                    if (qynb_trs.get(i).toString() != null
                                            && qynb_trs.get(i).toString().split("\\(\\'")[1].split("\\'\\)")[0]
                                                    .split("\\'\\,\\'")[1] != null) {
                                        String gsgsxx_djxx_gdxx_detail_url = "http://tjcredit.gov.cn/report/annals?entid="
                                                + ent_id + "&year="
                                                + qynb_trs.get(i).toString().split("\\(\\'")[1]
                                                        .split("\\'\\)")[0].split("\\'\\,\\'")[1]
                                                + "&hasInfo=0";
                                        Page gsgsxx_djxx_gdxx_detail = firstInfoPage.getWebClient().getPage(
                                                window, new WebRequest(new URL(gsgsxx_djxx_gdxx_detail_url)));
                                        System.out.println(gsgsxx_djxx_gdxx_detail.getWebResponse()
                                                .getContentAsString("utf-8"));
                                        list.add(gsgsxx_djxx_gdxx_detail.getWebResponse()
                                                .getContentAsString("utf-8"));
                                    }
                                }
                                if (list.size() != 0) {
                                    resultHtmlMap.put("qygsxx_qynb_detail", list);
                                }
                            }
                        }
                    }

                    //? ??->??
                    String qygsxx_xzxkxx_url = "http://tjcredit.gov.cn/report/xzxk?entid=" + ent_id;
                    Page qygsxx_xzxkxx = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(qygsxx_xzxkxx_url)));
                    resultHtmlMap.put("qygsxx_xzxkxx",
                            qygsxx_xzxkxx.getWebResponse().getContentAsString("utf-8"));

                    //? ??->??
                    String qygsxx_gdjczxx_url = "http://tjcredit.gov.cn/report/gdcz?entid=" + ent_id;
                    Page qygsxx_gdjczxx = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(qygsxx_gdjczxx_url)));
                    resultHtmlMap.put("qygsxx_gdjczxx",
                            qygsxx_gdjczxx.getWebResponse().getContentAsString("utf-8"));

                    //? ??->???
                    String qygsxx_gqbgxx_url = "http://tjcredit.gov.cn/report/gqbg?entid=" + ent_id;
                    Page qygsxx_gqbgxx = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(qygsxx_gqbgxx_url)));
                    resultHtmlMap.put("qygsxx_gqbgxx",
                            qygsxx_gqbgxx.getWebResponse().getContentAsString("utf-8"));

                    //? ??->??
                    String qygsxx_zscqczdjxx_url = "http://tjcredit.gov.cn/report/zscq?entid=" + ent_id;
                    Page qygsxx_zscqczdjxx = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(qygsxx_zscqczdjxx_url)));
                    resultHtmlMap.put("qygsxx_zscqczdjxx",
                            qygsxx_zscqczdjxx.getWebResponse().getContentAsString("utf-8"));

                    //? ??->?
                    String qygsxx_xzcfxx_url = "http://tjcredit.gov.cn/report/xzcf?entid=" + ent_id;
                    Page qygsxx_xzcfxx = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(qygsxx_xzcfxx_url)));
                    resultHtmlMap.put("qygsxx_xzcfxx",
                            qygsxx_xzcfxx.getWebResponse().getContentAsString("utf-8"));

                    //*****************??   ?*****************
                    //*****************????   *****************
                    //? ????->??
                    String sfxzgsxx_gqdjxx_list_url = "http://tjcredit.gov.cn/report/gddjlist?entid=" + ent_id;
                    Page sfxzgsxx_gqdjxx_list = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(sfxzgsxx_gqdjxx_list_url)));
                    resultHtmlMap.put("sfxzgsxx_gqdjxx_list",
                            sfxzgsxx_gqdjxx_list.getWebResponse().getContentAsString("utf-8"));
                    /*   //? ????->??->->
                       @SuppressWarnings("unchecked")
                       List<HtmlAnchor> anchors4 = (List<HtmlAnchor>) sfxzgsxx_gqdjxx_list_page.getByXPath("//table[@id='touziren']/tbody[@id='table2']/tr/td/a");
                       if (anchors4!=null && !anchors4.isEmpty()) {
                          List<String> detail=new ArrayList<String>();
                          for (@SuppressWarnings("unused") HtmlAnchor anchor4 : anchors4) {
                             HtmlPage sfxzgsxx_gqdjxx_detail = anchor4.click();
                    //                     System.out.println("gsgsxx_qynb_detail.asXml()="+gsgsxx_qynb_detail.asXml());
                             detail.add(sfxzgsxx_gqdjxx_detail.asXml());
                          }
                          resultHtmlMap.put("sfxzgsxx_gqdjxx_details",detail);
                       }*/
                    //? ??->??
                    String qygsxx_gdbgxx_list_url = "http://tjcredit.gov.cn/report/gdbglist?entid=" + ent_id;
                    Page qygsxx_gdbgxx_list = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(qygsxx_gdbgxx_list_url)));
                    resultHtmlMap.put("qygsxx_gdbgxx_list",
                            qygsxx_gdbgxx_list.getWebResponse().getContentAsString("utf-8"));
                    //? ??->??->->
                    /*   @SuppressWarnings("unchecked")
                       List<HtmlAnchor> anchors5 = (List<HtmlAnchor>) qygsxx_gdbgxx_page.getByXPath("//table[@id='touziren']/tbody[@id='table2']/tr/td/a");
                       if (anchors5!=null && !anchors5.isEmpty()) {
                          List<String> detail=new ArrayList<String>();
                          for (@SuppressWarnings("unused") HtmlAnchor anchor5 : anchors5) {
                             HtmlPage qygsxx_gdbgxx_detail = anchor5.click();
                    //                     System.out.println("gsgsxx_qynb_detail.asXml()="+gsgsxx_qynb_detail.asXml());
                             detail.add(qygsxx_gdbgxx_detail.asXml());
                          }
                          resultHtmlMap.put("qygsxx_gdbgxx_details",detail);
                       }*/

                    //*****************????   ?*****************
                }
                break;//
            }
        }
        if (!matchFlag) {
            resultHtmlMap.put("statusCodeDef", StatusCodeDef.NO_DATA_FOUND);
            LOGGER.info("????");
        }
    }

    return resultHtmlMap;
}

From source file:ca.appvelopers.mcgillmobile.model.retrofit.CourseResultConverter.java

@Override
public List<CourseResult> convert(ResponseBody value) throws IOException {
    String html = value.string();
    List<CourseResult> courses = new ArrayList<>();
    Document document = Jsoup.parse(html, "UTF-8");
    //Parse the response body into a list of rows
    Elements rows = document.getElementsByClass("dddefault");

    // Parse the term from the page header
    Element header = document.getElementsByClass("staticheaders").get(0);
    Term term = Term.parseTerm(header.childNode(2).toString());

    // Get the table in the form of a set of rows
    Element table = document.getElementsByClass("datadisplaytable").get(0).select("tbody").get(0);

    // Go through the rows in the table
    for (Element row : table.select("tr")) {
        // Check that there at least 19 elements in the row
        Elements rowElements = row.select("td");
        if (rowElements.size() < 19) {
            // If there aren't, it must not be a course row
            continue;
        }//from   w  w  w.j a  va 2s.  c o  m

        // Create a new course object with the default values
        double credits = 99;
        String subject = null;
        String number = null;
        String title = "";
        String type = "";
        List<DayOfWeek> days = new ArrayList<>();
        int crn = 0;
        String instructor = "";
        String location = "";
        //So that the rounded start time will be 0
        LocalTime startTime = ScheduleConverter.getDefaultStartTime();
        LocalTime endTime = ScheduleConverter.getDefaultEndTime();
        int capacity = 0;
        int seatsRemaining = 0;
        int waitlistRemaining = 0;
        LocalDate startDate = LocalDate.now();
        LocalDate endDate = LocalDate.now();

        try {
            for (int i = 0; i < rowElements.size(); i++) {
                if (rowElements.get(i).toString().contains("&nbsp;")) {
                    // Empty row: continue
                    continue;
                }
                String rowString = rowElements.get(i).text();

                switch (i) {
                // CRN
                case 1:
                    crn = Integer.parseInt(rowString);
                    break;
                // Subject
                case 2:
                    subject = rowString;
                    break;
                // Number
                case 3:
                    number = rowString;
                    break;
                // Type
                case 5:
                    type = rowString;
                    break;
                // Number of credits
                case 6:
                    credits = Double.parseDouble(rowString);
                    break;
                // Course title
                case 7:
                    //Remove the extra period at the end of the course title
                    title = rowString.substring(0, rowString.length() - 1);
                    break;
                // Days of the week
                case 8:
                    if (rowString.equals("TBA")) {
                        // TBA Stuff: no time associated so skip the next one
                        // and add a dummy to keep the index correct
                        rowElements.add(9, null);
                        i++;
                    } else {
                        // Day Parsing
                        rowString = rowString.replace('\u00A0', ' ').trim();
                        for (int k = 0; k < rowString.length(); k++) {
                            days.add(DayUtils.getDay(rowString.charAt(k)));
                        }
                    }
                    break;
                // Time
                case 9:
                    String[] times = rowString.split("-");
                    try {
                        int startHour = Integer.parseInt(times[0].split(" ")[0].split(":")[0]);
                        int startMinute = Integer.parseInt(times[0].split(" ")[0].split(":")[1]);
                        int endHour = Integer.parseInt(times[1].split(" ")[0].split(":")[0]);
                        int endMinute = Integer.parseInt(times[1].split(" ")[0].split(":")[1]);

                        //If it's PM, then add 12 hours to the hours for 24 hours format
                        //Make sure it isn't noon
                        String startPM = times[0].split(" ")[1];
                        if (startPM.equals("PM") && startHour != 12) {
                            startHour += 12;
                        }

                        String endPM = times[1].split(" ")[1];
                        if (endPM.equals("PM") && endHour != 12) {
                            endHour += 12;
                        }

                        startTime = LocalTime.of(startHour, startMinute);
                        endTime = LocalTime.of(endHour, endMinute);
                    } catch (NumberFormatException e) {
                        //Courses sometimes don't have assigned times
                        startTime = ScheduleConverter.getDefaultStartTime();
                        endTime = ScheduleConverter.getDefaultEndTime();
                    }
                    break;
                // Capacity
                case 10:
                    capacity = Integer.parseInt(rowString);
                    break;
                // Seats remaining
                case 12:
                    seatsRemaining = Integer.parseInt(rowString);
                    break;
                // Waitlist remaining
                case 15:
                    waitlistRemaining = Integer.parseInt(rowString);
                    break;
                // Instructor
                case 16:
                    instructor = rowString;
                    break;
                // Start/end date
                case 17:
                    Pair<LocalDate, LocalDate> dates = parseDateRange(term, rowString);
                    startDate = dates.first;
                    endDate = dates.second;
                    break;
                // Location
                case 18:
                    location = rowString;
                    break;
                }
            }
        } catch (Exception e) {
            Timber.e(e, "Course Results Parser Error");
        }

        // Don't add any courses with errors
        if (subject != null && number != null) {
            // Create a new course object and add it to list
            // TODO Should we be parsing the course section?
            courses.add(new CourseResult(term, subject, number, title, crn, "", startTime, endTime, days, type,
                    location, instructor, credits, startDate, endDate, capacity, seatsRemaining,
                    waitlistRemaining));
        }
    }

    return courses;
}

From source file:com.dalthed.tucan.scraper.SingleEventScraper.java

/**
 * @param materialTable/*from ww w .jav a  2  s  . c  o  m*/
 */
private void scrapeMaterials(Iterator<Element> materialTable) {
    int ct = 0;
    ArrayList<String> materialNumber = new ArrayList<String>();
    ArrayList<String> materialName = new ArrayList<String>();
    ArrayList<String> materialDesc = new ArrayList<String>();
    materialLink = new ArrayList<String>();
    ArrayList<String> materialFile = new ArrayList<String>();
    int mod = 0;
    if (materialTable != null) {
        while (materialTable.hasNext()) {
            Element next = materialTable.next();

            if (next.select("td").size() > 1) {
                ct++;

                if (next.select("td").get(0).text().matches("[0-9]+")) {
                    // First line

                    materialNumber.add(next.select("td").get(0).text());
                    materialName.add(next.select("td").get(1).text());
                    if (mod == 1) {
                        materialDesc.add("");
                        mod = 2;
                    }
                    if (mod == 2) {
                        materialLink.add("");
                        materialFile.add("");
                    }

                    mod = 1;
                } else if (mod == 1) {

                    materialDesc.add(next.select("td").get(1).text());
                    mod = 2;
                } else if (mod == 2) {

                    materialLink.add(next.select("td").get(1).select("a").attr("href"));
                    materialFile.add(next.select("td").get(1).select("a").text());
                    mod = 0;
                }
            }
        }
    }
    if (mod == 1) {
        materialDesc.add("");
        mod = 2;
    }
    if (mod == 2) {
        materialLink.add("");
        materialFile.add("");
    }
    if (ct > 2) {
        if (mPageAdapter != null) {
            mPageAdapter.setAdapter(new AppointmentAdapter(context, materialNumber, materialFile, null,
                    materialName, materialDesc));

            mPageAdapter.fileList = materialLink;
        }
    } else if (mPageAdapter != null) {
        mPageAdapter.setAdapter(new ArrayAdapter<String>(context, android.R.layout.simple_list_item_1,
                new String[] { "Kein Material" }));

    }
}

From source file:com.dalthed.tucan.scraper.SingleEventScraper.java

/**
 * @param DateTable/*from w ww.ja  va  2  s.co  m*/
 */
private void scrapeAppointments(Iterator<Element> DateTable) {
    ArrayList<String> eventNumber = new ArrayList<String>();
    ArrayList<String> eventDate = new ArrayList<String>();
    ArrayList<String> eventTime = new ArrayList<String>();

    ArrayList<String> eventRoom = new ArrayList<String>();
    ArrayList<String> eventInstructor = new ArrayList<String>();
    if (DateTable != null) {
        while (DateTable.hasNext()) {
            Element next = DateTable.next();
            Elements cols = next.select("td");
            if (cols.size() > 5) {
                eventNumber.add(cols.get(0).text());
                eventDate.add(cols.get(1).text());
                eventTime.add(cols.get(2).text() + "-" + cols.get(3).text());
                eventRoom.add(cols.get(4).text());
                eventInstructor.add(cols.get(5).text());
            }

        }

    } else {
        eventDate.add("");
        eventTime.add("");
        eventNumber.add("");
        eventRoom.add("Keine Daten vorhanden");
        eventInstructor.add("");
    }
    if (mPageAdapter != null) {
        mPageAdapter.setAdapter(
                new AppointmentAdapter(context, eventDate, eventTime, eventNumber, eventRoom, eventInstructor));
    }
}

From source file:com.dalthed.tucan.scraper.SingleEventScraper.java

/**
 * /*from   w w  w .  j a va2s  .c o m*/
 */
private void scrapeInformations(Iterator<Element> informationIterator) {

    while (informationIterator.hasNext()) {

        Element nextElement = informationIterator.next();

        Elements td = nextElement.select("td");
        if (td != null && td.hasClass("tbdata")) {
            Elements Paragraphs = nextElement.select("p");
            Iterator<Element> PaIt = Paragraphs.iterator();
            ArrayList<String> titles = new ArrayList<String>();
            ArrayList<String> values = new ArrayList<String>();

            while (PaIt.hasNext()) {

                Element next = PaIt.next();
                String[] information = crop(next.html());
                if (information[1].length() > 0) {
                    titles.add(information[0]);
                    values.add(information[1]);
                }

            }
            Log.i(LOG_TAG, "Informationscraper working");
            if (mPageAdapter != null) {
                Log.i(LOG_TAG, "InformationAdapter set");
                mPageAdapter.setAdapter(new TwoLinesAdapter(context, titles, values));
            }
        }
    }
}

From source file:com.example.bibliotecauclm.net.ActualizadorListaLibros.java

private List<Libro> obtenerLibros(String usuario, String contrasena) throws Exception {

    if (!Utiles.isOnline(contexto))
        return null;

    Document doc;/*from   w w w.  j  av  a2  s.  co  m*/
    String res;
    List<Libro> resultado = new ArrayList<Libro>();

    res = Utiles.obtenerLinkConexion(usuario, contrasena);

    if (res == null)
        return null;

    /*Para pruebas con gaseosa */
    if (!debug) {
        doc = Jsoup.connect("https://catalogobiblioteca.uclm.es" + res + "?ACC=210").timeout(12000).get();
    } else {

        File input = new File("/sdcard/html.htm");
        doc = Jsoup.parse(input, "UTF-8");
    }

    List<Element> Libros = doc.select("table").get(0).select("tr");
    Libros.remove(0);

    for (Element ele : Libros) {
        List<Element> filas = ele.select("td");

        Libro libro = new Libro(filas.get(0).text(), filas.get(1).text(), filas.get(2).text(),
                filas.get(3).text());

        try {

            libro.setIdentificador(filas.get(4).select("input").first().attr("name").toString());
            libro.setRenovar(true);

        } catch (NullPointerException ex) {
            libro.setRenovar(false);
            libro.setIdentificador(null);
        }

        resultado.add(libro);

    }

    return resultado;

}

From source file:com.jp.miaulavirtual.DisplayMessageActivity.java

public void urlsToArray(Elements melem, Boolean isHome, Boolean comun) {
    Elements elem;/*w ww .  ja v a2s. c o m*/
    int i = 1;
    if (isHome) {
        elem = melem.select("td[headers=contents_name] a, td[headers=folders_name] a").not("[href*=/clubs/]"); //Nombre Asignaturas String !"Comunuidades"
        urls = new String[(elem.size()) + 1];
        urls[0] = "/dotlrn/?page_num=" + panel;
        for (Element el : elem) {
            urls[i] = el.select("a").attr("href");
            i++;
        }
    } else if (comun) {
        elem = melem.select(
                "td[headers=contents_name] a[href*=/clubs/], td[headers=folders_name] a[href*=/clubs/]"); //Nombre Asignaturas String "Comunuidades"
        urls = new String[elem.size() + 1];
        urls[0] = onData.get(onData.size() - 2)[0];
        for (Element el : elem) {
            urls[i] = el.select("a").attr("href");
            i++;
        }
    } else {
        elem = melem.select("td[headers=contents_name] a[href], td[headers=folders_name] a[href]"); //Nombre Asignaturas String 
        urls = new String[elem.size() + 1];
        urls[0] = onData.get(onData.size() - 2)[0];
        for (Element el : elem) {
            urls[i] = el.select("a").attr("href");
            i++;
        }
    }
    Log.d("urlsToArray", String.valueOf(urls.length));
}

From source file:com.lloydtorres.stately.issues.IssueDecisionActivity.java

/**
 * Process the received page into the Issue and its IssueOptions
 * @param v Activity view/* w  w w .j  av a 2 s.c om*/
 * @param d Document received from NationStates
 */
private void processIssueInfo(View v, Document d) {
    // First check if the issue is still available
    if (d.text().contains(NOT_AVAILABLE)) {
        mSwipeRefreshLayout.setRefreshing(false);
        SparkleHelper.makeSnackbar(v,
                String.format(Locale.US, getString(R.string.issue_unavailable), mNation.name));
        return;
    }

    Element issueInfoContainer = d.select("div#dilemma").first();

    if (issueInfoContainer == null) {
        // safety check
        mSwipeRefreshLayout.setRefreshing(false);
        SparkleHelper.makeSnackbar(v, getString(R.string.login_error_parsing));
        return;
    }

    Elements issueInfoRaw = issueInfoContainer.children();

    String issueText = issueInfoRaw.select("p").first().text();
    // If this is an issue chain, grab the second paragraph instead
    if (d.select("div.dilemmachain").first() != null) {
        issueText = issueInfoRaw.select("p").get(1).text();
        if (d.text().contains(STORY_SO_FAR)) {
            issueText = issueText + "<br><br>" + issueInfoRaw.select("p").get(2).text();
        }
    }
    issue.content = issueText;

    issue.options = new ArrayList<IssueOption>();

    Element optionHolderMain = issueInfoRaw.select("ol.diloptions").first();
    if (optionHolderMain != null) {
        Elements optionsHolder = optionHolderMain.select("li");

        int i = 0;
        for (Element option : optionsHolder) {
            IssueOption issueOption = new IssueOption();
            issueOption.index = i++;

            Element button = option.select("button").first();
            if (button != null) {
                issueOption.header = button.attr("name");
            } else {
                issueOption.header = IssueOption.SELECTED_HEADER;
            }

            Element optionContentHolder = option.select("p").first();
            if (optionContentHolder == null) {
                // safety check
                mSwipeRefreshLayout.setRefreshing(false);
                SparkleHelper.makeSnackbar(v, getString(R.string.login_error_parsing));
                return;
            }

            issueOption.content = optionContentHolder.text();
            issue.options.add(issueOption);
        }
    }

    IssueOption dismissOption = new IssueOption();
    dismissOption.index = -1;
    dismissOption.header = IssueOption.DISMISS_HEADER;
    dismissOption.content = "";
    issue.options.add(dismissOption);

    setRecyclerAdapter(issue);
    mSwipeRefreshLayout.setRefreshing(false);
    mSwipeRefreshLayout.setEnabled(false);
}