Example usage for org.jsoup.nodes Element select

List of usage examples for org.jsoup.nodes Element select

Introduction

In this page you can find the example usage for org.jsoup.nodes Element select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:org.shareok.data.sagedata.SageJournalIssueDateProcessor.java

public Map<String, Map<String, String>> updateSageJournalLinks(Map<String, Map<String, String>> journalMap) {
    Document doc = null;/*from w w  w  . j a  v a  2s.c  o m*/
    try {
        doc = Jsoup.connect("http://journals.sagepub.com/action/showPublications?pageSize=20&startPage=199")
                .userAgent(
                        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.152 Safari/537.36")
                .cookie("auth", "token").timeout(300000).get();
        Elements trs = doc.select("form#browsePublicationsForm").get(0).select("table").get(0).select("tbody")
                .get(0).select("tr");
        for (Element tr : trs) {
            Element link = tr.select("td").get(1).select("a").get(0);
            String journalName = link.text();
            String journalLink = SageDataUtil.SAGE_HTTP_PREFIX + link.attr("href");
            String[] linkInfo = journalLink.split("/");
            String journalIssuesLink = SageDataUtil.SAGE_HTTP_PREFIX + "/loi/" + linkInfo[linkInfo.length - 1];
            if (null == journalMap.get(journalName)) {
                Map<String, String> infoMap = new HashMap<>();
                infoMap.put("homeLink", journalLink);
                infoMap.put("issueLink", journalIssuesLink);
                journalMap.put(journalName, infoMap);
            } else {
                Map<String, String> infoMap = journalMap.get(journalName);
                if (null == infoMap.get("homeLink")) {
                    infoMap.put("homeLink", journalLink);
                }
                if (null == infoMap.get("issueLink")) {
                    infoMap.put("issueLink", journalIssuesLink);
                }
            }
        }
    } catch (Exception ex) {
        ex.printStackTrace();
    }
    return journalMap;
}

From source file:com.crawler.app.run.CrawlSite.java

@Override
public void visit(Page page) {
    String url = page.getWebURL().getURL();
    // logger.info("URL: ", url);
    if (ReadXmlConfig() && readXmlConfigDatabase()) {
        status_read_xml = true;//from  www.j a v a2 s. c  o m
    } else {
        return;
    }

    System.out.println("\n URL visit: " + url);

    if (page.getParseData() instanceof HtmlParseData) {

        HtmlParseData htmlParseData = (HtmlParseData) page.getParseData();
        String text = htmlParseData.getText();
        String html = htmlParseData.getHtml();
        String title = htmlParseData.getTitle();
        Document doc = Jsoup.parse(html, "UTF-8");
        Element body = doc.body();
        Elements listDetail = body.select(bodySelect);
        Integer i = 0;
        Integer siteID = siteIDXML;
        Integer provinceID = 1;
        MysqlCrawler.createConn(host, port, dbName, dbUser, dbPwd);
        String jobImage, jobUrl, aJobName, cJobLocation = null, cLocationNear = "", bJobCompany = "",
                dJobCareer, eJobSalary, gJobDescription, gJobDetailShort, gJobDetail, jobDetailImage,
                jobDetailImageName, hJobExpire = null;
        for (Element detail : listDetail) {
            i++;
            try {
                jobImage = "";
                /* job img */

                if (!jobImgQuery.isEmpty()) {
                    if (jobImagePosition > -1) {
                        if (jobImagePosition < detail.select(jobImgQuery).size()) {
                            if (!detail.select(jobImgQuery).get(jobImagePosition).attr(jobImageFormatAttr)
                                    .isEmpty()) {
                                if (!jobImgUrl.isEmpty()) {
                                    if (JobImageSelectPosition.isEmpty()) {
                                        jobImage = jobImgUrl + detail.select(jobImgQuery).get(jobImagePosition)
                                                .attr(jobImageFormatAttr);
                                    } else {
                                        jobImage = jobImgUrl + detail.select(jobImgQuery).get(jobImagePosition)
                                                .select(JobImageSelectPosition).attr(jobImageFormatAttr);
                                    }
                                } else {
                                    if (JobImageSelectPosition.isEmpty()) {
                                        jobImage = detail.select(jobImgQuery).get(jobImagePosition)
                                                .attr(jobImageFormatAttr);
                                    } else {
                                        jobImage = detail.select(jobImgQuery).get(jobImagePosition)
                                                .select(JobImageSelectPosition).attr(jobImageFormatAttr);
                                    }
                                }
                            }
                        }
                    } else {
                        if (!detail.select(jobImgQuery).attr(jobImageFormatAttr).isEmpty()) {
                            if (!jobImgUrl.isEmpty()) {
                                jobImage = jobImgUrl
                                        + detail.select(jobImgQuery).first().attr(jobImageFormatAttr);
                            } else {
                                jobImage = detail.select(jobImgQuery).first().attr(jobImageFormatAttr);
                            }
                        }
                    }
                }
                /* job url */
                jobUrl = "";
                if (!jobUrlQuery.isEmpty()) {
                    if (jobUrlPosition > -1) {
                        if (jobUrlPosition < detail.select(jobUrlQuery).size()) {
                            if (!joburl_url.isEmpty()) {
                                if (JobUrlSelectPosition.isEmpty()) {
                                    jobUrl = joburl_url + detail.select(jobUrlQuery).get(jobUrlPosition)
                                            .attr(jobUrlFormatAttr);
                                } else {
                                    jobUrl = joburl_url + detail.select(jobUrlQuery).get(jobUrlPosition)
                                            .select(JobUrlSelectPosition).attr(jobUrlFormatAttr);
                                }
                            } else {
                                if (JobUrlSelectPosition.isEmpty()) {
                                    jobUrl = detail.select(jobUrlQuery).get(jobUrlPosition)
                                            .attr(jobUrlFormatAttr);
                                } else {
                                    jobUrl = detail.select(jobUrlQuery).get(jobUrlPosition)
                                            .select(JobUrlSelectPosition).attr(jobUrlFormatAttr);
                                }
                            }
                        }
                    } else {
                        if (!joburl_url.isEmpty()) {
                            jobUrl = joburl_url + detail.select(jobUrlQuery).first().attr(jobUrlFormatAttr);
                        } else {
                            jobUrl = detail.select(jobUrlQuery).first().attr(jobUrlFormatAttr);
                        }
                    }
                }

                // change
                org.jsoup.nodes.Element detailJobUrl = convertUrlToDocument(jobUrl);
                //System.out.print(detailJobUrl);
                //System.exit(1);
                /* job location */
                if (!jobLocationQuery.isEmpty()) {
                    if (jobLocationFormatData.toUpperCase().equals("TEXT")) {
                        cJobLocation = detailJobUrl.select(jobLocationQuery).text();
                    } else if (jobLocationFormatData.toUpperCase().equals("HTML")) {
                        cJobLocation = detailJobUrl.select(jobLocationQuery).html();
                    }
                }

                /* job name */
                aJobName = "";
                if (jobNameFormatData.toUpperCase().equals("TEXT")) {
                    aJobName = detailJobUrl.select(jobNameQuery).text();
                } else if (jobNameFormatData.toUpperCase().equals("HTML")) {
                    aJobName = detailJobUrl.select(jobNameQuery).html();
                }

                /* job description */
                gJobDescription = "";
                if (!JobDescriptionQuery.isEmpty()) {
                    if (jobDescriptionFormatData.toUpperCase().equals("TEXT")) {
                        gJobDescription = detailJobUrl.select(JobDescriptionQuery).text();
                    } else if (jobDescriptionFormatData.toUpperCase().equals("HTML")) {
                        gJobDescription = detailJobUrl.select(JobDescriptionQuery).html();
                    }
                }
                /* job detail short */
                gJobDetailShort = "";
                if (!JobDetailShortQuery.isEmpty()) {
                    if (jobDetailShortFormatData.toUpperCase().equals("TEXT")) {
                        gJobDetailShort = detailJobUrl.select(JobDetailShortQuery).text();
                    } else if (jobDetailShortFormatData.toUpperCase().equals("HTML")) {
                        gJobDetailShort = detailJobUrl.select(JobDetailShortQuery).html();
                    }
                }
                /* job detail */
                gJobDetail = "";
                if (!JobDetailQuery.isEmpty()) {
                    if (jobDetailFormatData.toUpperCase().equals("TEXT")) {
                        gJobDetail = detailJobUrl.select(JobDetailQuery).text();
                    } else if (jobDetailFormatData.toUpperCase().equals("HTML")) {
                        gJobDetail = detailJobUrl.select(JobDetailQuery).html();
                    }
                }
                /* job detail img*/
                jobDetailImage = "";
                jobDetailImageName = "";
                if (!jobDetailImgQuery.isEmpty()) {
                    if (jobDetailImagePosition > -1) {
                        if (jobDetailImagePosition < detailJobUrl.select(jobDetailImgQuery).size()) {
                            if (!detailJobUrl.select(jobDetailImgQuery).get(jobDetailImagePosition)
                                    .attr(jobDetailImageFormatAttr).isEmpty()) {
                                if (!jobDetailImgUrl.isEmpty()) {
                                    if (JobDetailImageSelectPosition.isEmpty()) {
                                        jobDetailImage = jobDetailImgUrl + detailJobUrl
                                                .select(jobDetailImgQuery).get(jobDetailImagePosition)
                                                .attr(jobDetailImageFormatAttr);
                                    } else {
                                        jobDetailImage = jobDetailImgUrl + detailJobUrl
                                                .select(jobDetailImgQuery).get(jobDetailImagePosition)
                                                .select(JobDetailImageSelectPosition)
                                                .attr(jobDetailImageFormatAttr);
                                    }
                                } else {
                                    if (JobDetailImageSelectPosition.isEmpty()) {
                                        jobDetailImage = detailJobUrl.select(jobDetailImgQuery)
                                                .get(jobDetailImagePosition).attr(jobDetailImageFormatAttr);
                                    } else {
                                        jobDetailImage = detailJobUrl.select(jobDetailImgQuery)
                                                .get(jobDetailImagePosition)
                                                .select(JobDetailImageSelectPosition)
                                                .attr(jobDetailImageFormatAttr);
                                    }
                                }
                            }
                        }
                    } else {
                        if (!detailJobUrl.select(jobDetailImgQuery).attr(jobDetailImageFormatAttr).isEmpty()) {
                            if (!jobDetailImgUrl.isEmpty()) {
                                jobDetailImage = jobDetailImgUrl + detailJobUrl.select(jobDetailImgQuery)
                                        .first().attr(jobDetailImageFormatAttr);
                            } else {
                                jobDetailImage = detailJobUrl.select(jobDetailImgQuery).first()
                                        .attr(jobDetailImageFormatAttr);
                            }
                        }
                    }
                    if (!jobDetailImage.isEmpty()) {
                        jobDetailImageName = DownloadImage.downloadImage(jobDetailImage, "D:\\/Java\\/storage");
                    }
                }
                /* job location near */
                cLocationNear = "";
                if (!locationNearQuery.isEmpty()) {
                    if (locationNearFormatData.toUpperCase().equals("TEXT")) {
                        cLocationNear = detailJobUrl.select(locationNearQuery).text();
                    } else if (locationNearFormatData.toUpperCase().equals("HTML")) {
                        cLocationNear = detailJobUrl.select(locationNearQuery).html();
                    }
                }
                /* job salary */
                eJobSalary = "";
                if (!JobSalaryQuery.isEmpty()) {
                    if (jobSalaryFormatData.toUpperCase().equals("TEXT")) {
                        eJobSalary = detailJobUrl.select(JobSalaryQuery).text();
                    } else if (jobSalaryFormatData.toUpperCase().equals("HTML")) {
                        eJobSalary = detailJobUrl.select(JobSalaryQuery).html();
                    }
                }

                /* job expire */
                hJobExpire = "";
                if (!JobExpireQuery.isEmpty()) {
                    if (jobExpireFormatData.toUpperCase().equals("TEXT")) {
                        hJobExpire = detailJobUrl.select(JobExpireQuery).text();
                    } else if (jobExpireFormatData.toUpperCase().equals("HTML")) {
                        hJobExpire = detailJobUrl.select(JobExpireQuery).html();
                    }
                }
                /* job company */
                bJobCompany = "";
                if (!JobCompanyQuery.isEmpty()) {
                    if (jobCompanyFormatData.toUpperCase().equals("TEXT")) {
                        bJobCompany = detailJobUrl.select(JobCompanyQuery).text();
                    } else if (jobCompanyFormatData.toUpperCase().equals("HTML")) {
                        bJobCompany = detailJobUrl.select(JobCompanyQuery).html();
                    }
                }
                /* job type */
                String fJobType = "";
                if (!JobTypeQuery.isEmpty()) {
                    if (jobTypeFormatData.toUpperCase().equals("TEXT")) {
                        fJobType = detailJobUrl.select(JobTypeQuery).text();
                    } else if (jobTypeFormatData.toUpperCase().equals("HTML")) {
                        fJobType = detailJobUrl.select(JobTypeQuery).html();
                    }
                }
                /* job address */
                String jobAddress = "";
                if (!JobAddressQuery.isEmpty()) {
                    if (jobAddressFormatData.toUpperCase().equals("TEXT")) {
                        jobAddress = detailJobUrl.select(JobAddressQuery).text();
                    } else if (jobAddressFormatData.toUpperCase().equals("HTML")) {
                        jobAddress = detailJobUrl.select(JobAddressQuery).html();
                    }
                }
                dJobCareer = "";
                if (!JobCareerQuery.isEmpty()) {
                    if (jobCareerFormatData.toUpperCase().equals("TEXT")) {
                        dJobCareer = detailJobUrl.select(JobCareerQuery).text();
                    } else if (jobCareerFormatData.toUpperCase().equals("HTML")) {
                        dJobCareer = detailJobUrl.select(JobCareerQuery).html();
                    }
                }

                System.out.println("\n Url : " + jobUrl);
                System.out.println("\n Image : " + jobImage);
                System.out.println("\n Title : " + aJobName);
                System.out.println("\n Title SEO : " + StringUtils.removeAccent(aJobName));
                //System.out.println("\n Location : " + cJobLocation + "\n"
                // + cLocationNear);
                System.out.println("\n jobDetailImageName : " + jobDetailImageName);
                // System.out.println("\n Detail : " + gJobDetail);
                // System.out.println("\n Salary : " + eJobSalary);
                // System.out.println("\n expire Date : " + hJobExpire);
                // System.out.println("\n Company : " + bJobCompany);
                // System.out.println("\n JobType : " + fJobType);
                //
                System.out.println("\n Full I : " + i);
                String news_title = aJobName;
                String news_title_seo = StringUtils.removeAccent(aJobName);
                String news_meta = aJobName;
                String news_description = gJobDescription;
                String news_tag = aJobName.replace(" ", ", ");
                String news_pic = jobDetailImageName;
                String pic_note = aJobName;
                String news_subcontent = "<p>" + gJobDescription + "</p>";
                String news_content = gJobDetailShort + "<p><img src='http://" + jobDetailImageName + "'></p>"
                        + gJobDetail;
                int type = 4;
                int status = 0;
                int kind = 0;
                String source = "Theo http://monngonmoingay.com";
                String author = null;
                int user_posted = 0;
                int user_activated = 0;
                int cate_id = 43;
                String list_productid_relation = "13,28,30";

                if (!MysqlCrawler.getInstance().checkNewsUrl(news_title_seo)) {
                    MysqlCrawler.getInstance().insertNewsContent(news_title, news_title_seo, news_meta,
                            news_description, news_tag, news_pic, pic_note, news_subcontent, news_content, type,
                            status, kind, source, author, user_posted, user_activated, cate_id,
                            list_productid_relation);
                }

                // System.exit(1);
            } catch (Exception ex) {
                System.out.println("\n Fail I : " + i);
                System.out.println("\n Ex : " + ex);
            }
        }

    }

    /*
     * Header[] responseHeaders = page.getFetchResponseHeaders(); if
     * (responseHeaders != null) { logger.debug("Response headers:"); for
     * (Header header : responseHeaders) { logger.debug("\t{}: {}",
     * header.getName(), header.getValue()); } }
     */
    logger.debug("=============");
}

From source file:de.geeksfactory.opacclient.apis.IOpac.java

protected DetailledItem parse_result(String html) throws IOException {
    Document doc = Jsoup.parse(html);

    DetailledItem result = new DetailledItem();

    String id = null;//from   w w  w  .j av a 2  s . c  o m
    if (doc.select("input[name=mednr]").size() > 0) {
        id = doc.select("input[name=mednr]").first().val().trim();
    } else if (doc.select("a[href*=mednr]").size() > 0) {
        String href = doc.select("a[href*=mednr]").first().attr("href");
        id = getQueryParamsFirst(href).get("mednr").trim();
    }

    result.setId(id);

    // check if new share button is available (allows to share a link to the standard
    // frameset of the OPAC instead of only the detail frame)
    newShareLinks = doc.select("#sharebutton").size() > 0;

    Elements table = doc.select("table").get(1).select("tr");

    // GET COVER IMAGE
    String imgUrl = table.get(0)
            .select("img[src~=^https?://(:?images(?:-[^\\.]*)?\\.|[^\\.]*\\" + ".images-)amazon\\.com]")
            .attr("src");
    result.setCover(imgUrl);

    // GET INFORMATION
    Copy copy = new Copy();

    for (Element element : table) {
        String detail = element.select("td").text().trim().replace("\u00a0", "");
        String title = element.select("th").text().trim().replace("\u00a0", "");

        if (!title.equals("")) {

            if (title.contains("verliehen bis")) {
                if (detail.equals("")) {
                    copy.setStatus("verfgbar");
                } else {
                    copy.setStatus("verliehen bis " + detail);
                }
            } else if (title.contains("Abteilung")) {
                copy.setDepartment(detail);
            } else if (title.contains("Signatur")) {
                copy.setShelfmark(detail);
            } else if (title.contains("Titel")) {
                result.setTitle(detail);
            } else if (!title.contains("Cover")) {
                result.addDetail(new Detail(title, detail));
            }
        }
    }

    // GET RESERVATION INFO
    if ("verfgbar".equals(copy.getStatus())
            || doc.select("a[href^=/cgi-bin/di.exe?mode=10], input.resbutton").size() == 0) {
        result.setReservable(false);
    } else {
        result.setReservable(true);
        if (doc.select("a[href^=/cgi-bin/di.exe?mode=10]").size() > 0) {
            // Reservation via link
            result.setReservation_info(doc.select("a[href^=/cgi-bin/di.exe?mode=10]").first().attr("href")
                    .substring(1).replace(" ", ""));
        } else {
            // Reservation via form (method="get")
            Element form = doc.select("input.resbutton").first().parent();
            result.setReservation_info(generateQuery(form));
        }
    }

    if (copy.notEmpty())
        result.addCopy(copy);

    return result;
}

From source file:de.geeksfactory.opacclient.apis.IOpac.java

@Override
public ProlongResult prolong(String media, Account account, int useraction, String Selection)
        throws IOException {
    // internal convention: We add "NEW" to the media ID to show that we have the new iOPAC
    // version//from   w  ww .  ja va  2 s.c o m
    if (media.startsWith("NEW")) {
        String mediaNr = media.substring(3);
        String html = httpGet(
                opac_url + "/cgi-bin/di.exe?mode=42&MedNrVerlAll=" + URLEncoder.encode(mediaNr, "UTF-8"),
                getDefaultEncoding());

        Document doc = Jsoup.parse(html);
        if (doc.text().contains("1 Medium wurde verl")) {
            return new ProlongResult(MultiStepResult.Status.OK);
        } else {
            return new ProlongResult(MultiStepResult.Status.ERROR, doc.text());
        }
    } else {
        String html = httpGet(opac_url + "/" + media, getDefaultEncoding());
        Document doc = Jsoup.parse(html);
        if (doc.select("table th").size() > 0) {
            if (doc.select("h1").size() > 0) {
                if (doc.select("h1").first().text().contains("Hinweis")) {
                    return new ProlongResult(MultiStepResult.Status.ERROR,
                            doc.select("table th").first().text());
                }
            }
            try {
                Element form = doc.select("form[name=form1]").first();
                String sessionid = form.select("input[name=sessionid]").attr("value");
                String mednr = form.select("input[name=mednr]").attr("value");
                httpGet(opac_url + "/cgi-bin/di.exe?mode=8&kndnr=" + account.getName() + "&mednr=" + mednr
                        + "&sessionid=" + sessionid + "&psh100=Verl%C3%A4ngern", getDefaultEncoding());
                return new ProlongResult(MultiStepResult.Status.OK);
            } catch (Throwable e) {
                e.printStackTrace();
                return new ProlongResult(MultiStepResult.Status.ERROR);
            }
        }
        return new ProlongResult(MultiStepResult.Status.ERROR);
    }
}

From source file:de.geeksfactory.opacclient.apis.Zones.java

private DetailledItem parse_result(String id, String html) {
    Document doc = Jsoup.parse(html);

    DetailledItem result = new DetailledItem();
    result.setTitle("");
    boolean title_is_set = false;

    result.setId(id);//from   w w w  .j  ava2  s .  com

    String detailTrsQuery = version18 ? ".inRoundBox1 table table tr"
            : ".DetailDataCell table table:not(.inRecordHeader) tr";
    Elements detailtrs1 = doc.select(detailTrsQuery);
    for (int i = 0; i < detailtrs1.size(); i++) {
        Element tr = detailtrs1.get(i);
        int s = tr.children().size();
        if (tr.child(0).text().trim().equals("Titel") && !title_is_set) {
            result.setTitle(tr.child(s - 1).text().trim());
            title_is_set = true;
        } else if (s > 1) {
            Element valchild = tr.child(s - 1);
            if (valchild.select("table").isEmpty()) {
                String val = valchild.text().trim();
                if (val.length() > 0) {
                    result.addDetail(new Detail(tr.child(0).text().trim(), val));
                }
            }
        }
    }

    for (Element a : doc.select("a.SummaryActionLink")) {
        if (a.text().contains("Vormerken")) {
            result.setReservable(true);
            result.setReservation_info(a.attr("href"));
        }
    }

    Elements detaildiv = doc.select("div.record-item-new");
    if (!detaildiv.isEmpty()) {
        for (int i = 0; i < detaildiv.size(); i++) {
            Element dd = detaildiv.get(i);
            String text = "";
            for (Node node : dd.childNodes()) {
                if (node instanceof TextNode) {
                    String snip = ((TextNode) node).text();
                    if (snip.length() > 0) {
                        text += snip;
                    }
                } else if (node instanceof Element) {
                    if (((Element) node).tagName().equals("br")) {
                        text += "\n";
                    } else {
                        String snip = ((Element) node).text().trim();
                        if (snip.length() > 0) {
                            text += snip;
                        }
                    }
                }
            }
            result.addDetail(new Detail("", text));
        }
    }

    if (doc.select("span.z3988").size() > 0) {
        // Sometimes there is a <span class="Z3988"> item which provides
        // data in a standardized format.
        String z3988data = doc.select("span.z3988").first().attr("title").trim();
        for (String pair : z3988data.split("&")) {
            String[] nv = pair.split("=", 2);
            if (nv.length == 2) {
                if (!nv[1].trim().equals("")) {
                    if (nv[0].equals("rft.btitle") && result.getTitle().length() == 0) {
                        result.setTitle(nv[1]);
                    } else if (nv[0].equals("rft.atitle") && result.getTitle().length() == 0) {
                        result.setTitle(nv[1]);
                    } else if (nv[0].equals("rft.au")) {
                        result.addDetail(new Detail("Author", nv[1]));
                    }
                }
            }
        }
    }

    // Cover
    if (doc.select(".BookCover, .LargeBookCover").size() > 0) {
        result.setCover(doc.select(".BookCover, .LargeBookCover").first().attr("src"));
    }

    Elements copydivs = doc.select("div[id^=stock_]");
    String pop = "";
    for (int i = 0; i < copydivs.size(); i++) {
        Element div = copydivs.get(i);

        if (div.attr("id").startsWith("stock_head")) {
            pop = div.text().trim();
            continue;
        }

        Copy copy = new Copy();
        DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);

        // This is getting very ugly - check if it is valid for libraries which are not Hamburg.
        // Seems to also work in Kiel (Zones 1.8, checked 10.10.2015)
        int j = 0;
        for (Node node : div.childNodes()) {
            try {
                if (node instanceof Element) {
                    if (((Element) node).tag().getName().equals("br")) {
                        copy.setBranch(pop);
                        result.addCopy(copy);
                        j = -1;
                    } else if (((Element) node).tag().getName().equals("b") && j == 1) {
                        copy.setLocation(((Element) node).text());
                    } else if (((Element) node).tag().getName().equals("b") && j > 1) {
                        copy.setStatus(((Element) node).text());
                    }
                    j++;
                } else if (node instanceof TextNode) {
                    if (j == 0) {
                        copy.setDepartment(((TextNode) node).text());
                    }
                    if (j == 2) {
                        copy.setBarcode(((TextNode) node).getWholeText().trim().split("\n")[0].trim());
                    }
                    if (j == 6) {
                        String text = ((TextNode) node).text().trim();
                        String date = text.substring(text.length() - 10);
                        try {
                            copy.setReturnDate(fmt.parseLocalDate(date));
                        } catch (IllegalArgumentException e) {
                            e.printStackTrace();
                        }
                    }
                    j++;
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    return result;
}

From source file:com.astamuse.asta4d.render.RenderUtil.java

private final static void apply(Element target, List<Renderer> rendererList, RenderAction renderAction,
        int startIndex, int count) {

    // The renderer list have to be applied recursively because the
    // transformer will always return a new Element clone.

    if (startIndex >= count) {
        return;/*from w  ww  . ja v  a  2 s.  com*/
    }

    final Renderer currentRenderer = rendererList.get(startIndex);

    RendererType rendererType = currentRenderer.getRendererType();

    switch (rendererType) {
    case GO_THROUGH:
        apply(target, rendererList, renderAction, startIndex + 1, count);
        return;
    /*
    case DEBUG:
    currentRenderer.getTransformerList().get(0).invoke(target);
    apply(target, rendererList, renderAction, startIndex + 1, count);
    return;
    */
    case RENDER_ACTION:
        ((RenderActionRenderer) currentRenderer).getStyle().apply(renderAction);
        apply(target, rendererList, renderAction, startIndex + 1, count);
        return;
    default:
        // do nothing
        break;
    }

    String selector = currentRenderer.getSelector();
    List<Transformer<?>> transformerList = currentRenderer.getTransformerList();

    List<Element> elemList;
    if (PSEUDO_ROOT_SELECTOR.equals(selector)) {
        elemList = new LinkedList<Element>();
        elemList.add(target);
    } else {
        elemList = new ArrayList<>(target.select(selector));
    }

    if (elemList.isEmpty()) {
        if (rendererType == RendererType.ELEMENT_NOT_FOUND_HANDLER) {
            elemList.add(target);
            transformerList.clear();
            transformerList.add(
                    new RendererTransformer(((ElementNotFoundHandler) currentRenderer).alternativeRenderer()));
        } else if (renderAction.isOutputMissingSelectorWarning()) {
            String creationInfo = currentRenderer.getCreationSiteInfo();
            if (creationInfo == null) {
                creationInfo = "";
            } else {
                creationInfo = " at [ " + creationInfo + " ]";
            }
            logger.warn(
                    "There is no element found for selector [{}]{}, if it is deserved, try Renderer#disableMissingSelectorWarning() "
                            + "to disable this message and Renderer#enableMissingSelectorWarning could enable this warning again in "
                            + "your renderer chain",
                    selector, creationInfo);
            apply(target, rendererList, renderAction, startIndex + 1, count);
            return;
        }

    } else {
        if (rendererType == RendererType.ELEMENT_NOT_FOUND_HANDLER) {
            apply(target, rendererList, renderAction, startIndex + 1, count);
            return;
        }
    }

    Element delayedElement = null;
    Element resultNode;
    // TODO we suppose that the element is listed as the order from parent
    // to children, so we reverse it. Perhaps we need a real order process
    // to ensure the wanted order.
    Collections.reverse(elemList);
    boolean renderForRoot;
    for (Element elem : elemList) {
        renderForRoot = PSEUDO_ROOT_SELECTOR.equals(selector)
                || rendererType == RendererType.ELEMENT_NOT_FOUND_HANDLER;
        if (!renderForRoot) {
            // faked group node will be not applied by renderers(only when the current selector is not the pseudo :root)
            if (elem.tagName().equals(ExtNodeConstants.GROUP_NODE_TAG)
                    && ExtNodeConstants.GROUP_NODE_ATTR_TYPE_FAKE
                            .equals(elem.attr(ExtNodeConstants.GROUP_NODE_ATTR_TYPE))) {
                continue;
            }
        }

        if (elem == target) {
            delayedElement = elem;
            continue;
        }
        for (Transformer<?> transformer : transformerList) {
            resultNode = transformer.invoke(elem);
            elem.before(resultNode);
        } // for transformer
        elem.remove();
    } // for element

    // if the root element is one of the process targets, we can not apply
    // the left renderers to original element because it will be replaced by
    // a new element even it is not necessary (that is how Transformer
    // works).
    if (delayedElement == null) {
        apply(target, rendererList, renderAction, startIndex + 1, count);
    } else {
        if (rendererType == RendererType.ELEMENT_NOT_FOUND_HANDLER && delayedElement instanceof Document) {
            delayedElement = delayedElement.child(0);
        }
        for (Transformer<?> transformer : transformerList) {
            resultNode = transformer.invoke(delayedElement);
            delayedElement.before(resultNode);
            apply(resultNode, rendererList, renderAction, startIndex + 1, count);
        } // for transformer
        delayedElement.remove();
    }

}

From source file:de.geeksfactory.opacclient.apis.Pica.java

protected DetailledItem parse_result(String html) {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);/* w w w  . ja v a2 s.c  om*/

    DetailledItem result = new DetailledItem();
    for (Element a : doc.select("a[href*=PPN")) {
        Map<String, String> hrefq = getQueryParamsFirst(a.absUrl("href"));
        String ppn = hrefq.get("PPN");
        result.setId(ppn);
        break;
    }

    // GET COVER
    if (doc.select("td.preslabel:contains(ISBN) + td.presvalue").size() > 0) {
        Element isbnElement = doc.select("td.preslabel:contains(ISBN) + td.presvalue").first();
        String isbn = "";
        for (Node child : isbnElement.childNodes()) {
            if (child instanceof TextNode) {
                isbn = ((TextNode) child).text().trim();
                break;
            }
        }
        result.setCover(ISBNTools.getAmazonCoverURL(isbn, true));
    }

    // GET TITLE AND SUBTITLE
    String titleAndSubtitle;
    Element titleAndSubtitleElem = null;
    String titleRegex = ".*(Titel|Aufsatz|Zeitschrift|Gesamttitel"
            + "|Title|Article|Periodical|Collective\\stitle" + "|Titre|Article|P.riodique|Titre\\sg.n.ral).*";
    String selector = "td.preslabel:matches(" + titleRegex + ") + td.presvalue";
    if (doc.select(selector).size() > 0) {
        titleAndSubtitleElem = doc.select(selector).first();
        titleAndSubtitle = titleAndSubtitleElem.text().trim();
        int slashPosition = Math.min(titleAndSubtitle.indexOf("/"), titleAndSubtitle.indexOf(":"));
        String title;
        if (slashPosition > 0) {
            title = titleAndSubtitle.substring(0, slashPosition).trim();
            String subtitle = titleAndSubtitle.substring(slashPosition + 1).trim();
            result.addDetail(new Detail(stringProvider.getString(StringProvider.SUBTITLE), subtitle));
        } else {
            title = titleAndSubtitle;
        }
        result.setTitle(title);
    } else {
        result.setTitle("");
    }

    // Details
    int line = 0;
    Elements lines = doc.select("td.preslabel + td.presvalue");
    if (titleAndSubtitleElem != null) {
        lines.remove(titleAndSubtitleElem);
    }
    for (Element element : lines) {
        Element titleElem = element.firstElementSibling();
        String detail = "";
        if (element.select("div").size() > 1 && element.select("div").text().equals(element.text())) {
            boolean first = true;
            for (Element div : element.select("div")) {
                if (!div.text().replace("\u00a0", " ").trim().equals("")) {
                    if (!first) {
                        detail += "\n" + div.text().replace("\u00a0", " ").trim();
                    } else {
                        detail += div.text().replace("\u00a0", " ").trim();
                        first = false;
                    }
                }
            }
        } else {
            detail = element.text().replace("\u00a0", " ").trim();
        }
        String title = titleElem.text().replace("\u00a0", " ").trim();

        if (element.select("hr").size() > 0)
        // after the separator we get the copies
        {
            break;
        }

        if (detail.length() == 0 && title.length() == 0) {
            line++;
            continue;
        }
        if (title.contains(":")) {
            title = title.substring(0, title.indexOf(":")); // remove colon
        }
        result.addDetail(new Detail(title, detail));

        if (element.select("a").size() == 1 && !element.select("a").get(0).text().trim().equals("")) {
            String url = element.select("a").first().absUrl("href");
            if (!url.startsWith(opac_url)) {
                result.addDetail(new Detail(stringProvider.getString(StringProvider.LINK), url));
            }
        }

        line++;
    }
    line++; // next line after separator

    // Copies
    Copy copy = new Copy();
    String location = "";

    // reservation info will be stored as JSON
    JSONArray reservationInfo = new JSONArray();

    while (line < lines.size()) {
        Element element = lines.get(line);
        if (element.select("hr").size() == 0) {
            Element titleElem = element.firstElementSibling();
            String detail = element.text().trim();
            String title = titleElem.text().replace("\u00a0", " ").trim();

            if (detail.length() == 0 && title.length() == 0) {
                line++;
                continue;
            }

            if (title.contains("Standort") || title.contains("Vorhanden in") || title.contains("Location")) {
                location += detail;
            } else if (title.contains("Sonderstandort")) {
                location += " - " + detail;
            } else if (title.contains("Systemstelle") || title.contains("Subject")) {
                copy.setDepartment(detail);
            } else if (title.contains("Fachnummer") || title.contains("locationnumber")) {
                copy.setLocation(detail);
            } else if (title.contains("Signatur") || title.contains("Shelf mark")) {
                copy.setShelfmark(detail);
            } else if (title.contains("Anmerkung")) {
                location += " (" + detail + ")";
            } else if (title.contains("Link")) {
                result.addDetail(new Detail(title.replace(":", "").trim(), detail));
            } else if (title.contains("Status") || title.contains("Ausleihinfo")
                    || title.contains("Ausleihstatus") || title.contains("Request info")) {
                // Find return date
                Pattern pattern = Pattern.compile("(till|bis) (\\d{2}-\\d{2}-\\d{4})");
                Matcher matcher = pattern.matcher(detail);
                if (matcher.find()) {
                    DateTimeFormatter fmt = DateTimeFormat.forPattern("dd-MM-yyyy").withLocale(Locale.GERMAN);
                    try {
                        copy.setStatus(detail.substring(0, matcher.start() - 1).trim());
                        copy.setReturnDate(fmt.parseLocalDate(matcher.group(2)));
                    } catch (IllegalArgumentException e) {
                        e.printStackTrace();
                        copy.setStatus(detail);
                    }
                } else {
                    copy.setStatus(detail);
                }
                // Get reservation info
                if (element.select("a:has(img[src*=inline_arrow])").size() > 0) {
                    Element a = element.select("a:has(img[src*=inline_arrow])").first();
                    boolean multipleCopies = a.text().matches(".*(Exemplare|Volume list).*");
                    JSONObject reservation = new JSONObject();
                    try {
                        reservation.put("multi", multipleCopies);
                        reservation.put("link", _extract_url(a.absUrl("href")));
                        reservation.put("desc", location);
                        reservationInfo.put(reservation);
                    } catch (JSONException e1) {
                        e1.printStackTrace();
                    }
                    result.setReservable(true);
                }
            }
        } else {
            copy.setBranch(location);
            result.addCopy(copy);
            location = "";
            copy = new Copy();
        }
        line++;
    }

    if (copy.notEmpty()) {
        copy.setBranch(location);
        result.addCopy(copy);
    }

    if (reservationInfo.length() == 0) {
        // No reservation info found yet, because we didn't find any copies.
        // If there is a reservation link somewhere in the rows we interpreted
        // as details, we still want to use it.
        if (doc.select("td a:has(img[src*=inline_arrow])").size() > 0) {
            Element a = doc.select("td a:has(img[src*=inline_arrow])").first();
            boolean multipleCopies = a.text().matches(".*(Exemplare|Volume list).*");
            JSONObject reservation = new JSONObject();
            try {
                reservation.put("multi", multipleCopies);
                reservation.put("link", _extract_url(a.attr("href")));
                reservation.put("desc", location);
                reservationInfo.put(reservation);
            } catch (JSONException e1) {
                e1.printStackTrace();
            }
            result.setReservable(true);
        }
    }
    result.setReservation_info(reservationInfo.toString());

    // Volumes
    if (doc.select("a[href^=FAM?PPN=]").size() > 0) {
        String href = doc.select("a[href^=FAM?PPN=]").attr("href");
        String ppn = getQueryParamsFirst(href).get("PPN");
        Map<String, String> data = new HashMap<>();
        data.put("ppn", ppn);
        result.setVolumesearch(data);
    }

    return result;
}

From source file:de.geeksfactory.opacclient.apis.Pica.java

@Override
public List<SearchField> getSearchFields() throws IOException, JSONException {
    if (!initialised) {
        start();/*from   w  w  w  .  j a v  a  2  s  .  c  o  m*/
    }

    String html = httpGet(opac_url + "/LNG=" + getLang() + "/DB=" + db + "/ADVANCED_SEARCHFILTER",
            getDefaultEncoding());
    Document doc = Jsoup.parse(html);
    List<SearchField> fields = new ArrayList<>();

    Elements options = doc.select("select[name=IKT0] option");
    for (Element option : options) {
        TextSearchField field = new TextSearchField();
        field.setDisplayName(option.text());
        field.setId(option.attr("value"));
        field.setHint("");
        field.setData(new JSONObject("{\"ADI\": false}"));

        Pattern pattern = Pattern.compile("\\[X?[A-Za-z]{2,3}:?\\]|\\(X?[A-Za-z]{2,3}:?\\)");
        Matcher matcher = pattern.matcher(field.getDisplayName());
        if (matcher.find()) {
            field.getData().put("meaning", matcher.group().replace(":", "").toUpperCase());
            field.setDisplayName(matcher.replaceFirst("").trim());
        }

        fields.add(field);
    }

    Elements sort = doc.select("select[name=SRT]");
    if (sort.size() > 0) {
        DropdownSearchField field = new DropdownSearchField();
        field.setDisplayName(sort.first().parent().parent().select(".longval").first().text());
        field.setId("SRT");
        for (Element option : sort.select("option")) {
            field.addDropdownValue(option.attr("value"), option.text());
        }
        fields.add(field);
    }

    for (Element input : doc.select("input[type=text][name^=ADI]")) {
        TextSearchField field = new TextSearchField();
        field.setDisplayName(input.parent().parent().select(".longkey").text());
        field.setId(input.attr("name"));
        field.setHint(input.parent().select("span").text());
        field.setData(new JSONObject("{\"ADI\": true}"));
        fields.add(field);
    }

    for (Element dropdown : doc.select("select[name^=ADI]")) {
        DropdownSearchField field = new DropdownSearchField();
        field.setDisplayName(dropdown.parent().parent().select(".longkey").text());
        field.setId(dropdown.attr("name"));
        for (Element option : dropdown.select("option")) {
            field.addDropdownValue(option.attr("value"), option.text());
        }
        fields.add(field);
    }

    Elements fuzzy = doc.select("input[name=FUZZY]");
    if (fuzzy.size() > 0) {
        CheckboxSearchField field = new CheckboxSearchField();
        field.setDisplayName(fuzzy.first().parent().parent().select(".longkey").first().text());
        field.setId("FUZZY");
        fields.add(field);
    }

    Elements mediatypes = doc.select("input[name=ADI_MAT]");
    if (mediatypes.size() > 0) {
        DropdownSearchField field = new DropdownSearchField();
        field.setDisplayName("Materialart");
        field.setId("ADI_MAT");

        field.addDropdownValue("", "Alle");
        for (Element mt : mediatypes) {
            field.addDropdownValue(mt.attr("value"),
                    mt.parent().nextElementSibling().text().replace("\u00a0", ""));
        }
        fields.add(field);
    }

    return fields;
}

From source file:de.geeksfactory.opacclient.apis.Bibliotheca.java

@Override
public List<SearchField> getSearchFields() throws IOException, JSONException {
    if (!initialised) {
        start();//from  w w  w  . ja  v  a2  s  . com
    }

    List<SearchField> fields = new ArrayList<>();
    // Read branches and media types
    List<NameValuePair> nameValuePairs = new ArrayList<>(2);
    nameValuePairs.add(new BasicNameValuePair("link_profis.x", "0"));
    nameValuePairs.add(new BasicNameValuePair("link_profis.y", "1"));
    String html = httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs),
            getDefaultEncoding());
    Document doc = Jsoup.parse(html);

    Elements fieldElems = doc.select(".suchfeldinhalt");
    for (Element fieldElem : fieldElems) {
        String name = fieldElem.select(".suchfeld_inhalt_titel label").text();
        String hint = "";
        if (fieldElem.select(".suchfeld_inhalt_input").size() > 0) {
            List<TextNode> textNodes = fieldElem.select(".suchfeld_inhalt_input").first().textNodes();
            if (textNodes.size() > 0) {
                for (TextNode node : textNodes) {
                    String text = node.getWholeText().replace("\n", "");
                    if (!text.equals("")) {
                        hint = node.getWholeText().replace("\n", "");
                        break;
                    }
                }
            }
        }

        Elements inputs = fieldElem
                .select(".suchfeld_inhalt_input input[type=text], " + ".suchfeld_inhalt_input select");
        if (inputs.size() == 1) {
            fields.add(createSearchField(name, hint, inputs.get(0)));
        } else if (inputs.size() == 2 && inputs.select("input[type=text]").size() == 2) {
            // Two text fields, e.g. year from/to or two keywords
            fields.add(createSearchField(name, hint, inputs.get(0)));
            TextSearchField secondField = (TextSearchField) createSearchField(name, hint, inputs.get(1));
            secondField.setHalfWidth(true);
            fields.add(secondField);
        } else if (inputs.size() == 2 && inputs.get(0).tagName().equals("select")
                && inputs.get(1).tagName().equals("input") && inputs.get(0).attr("name").equals("feld1")) {
            // A dropdown to select from different search field types.
            // Break it down into single text fields.
            for (Element option : inputs.get(0).select("option")) {
                TextSearchField field = new TextSearchField();
                field.setHint(hint);
                field.setDisplayName(option.text());
                field.setId(inputs.get(1).attr("name") + "$" + option.attr("value"));

                JSONObject data = new JSONObject();
                JSONObject params = new JSONObject();
                params.put(inputs.get(0).attr("name"), option.attr("value"));
                data.put("additional_params", params);
                field.setData(data);

                fields.add(field);
            }
        }
    }

    DropdownSearchField orderField = new DropdownSearchField("orderselect",
            stringProvider.getString(StringProvider.ORDER), false, null);
    orderField.addDropdownValue("1", stringProvider.getString(StringProvider.ORDER_DEFAULT));
    orderField.addDropdownValue("2:desc", stringProvider.getString(StringProvider.ORDER_YEAR_DESC));
    orderField.addDropdownValue("2:asc", stringProvider.getString(StringProvider.ORDER_YEAR_ASC));
    orderField.addDropdownValue("3:desc", stringProvider.getString(StringProvider.ORDER_CATEGORY_DESC));
    orderField.addDropdownValue("3:asc", stringProvider.getString(StringProvider.ORDER_CATEGORY_ASC));
    orderField.setMeaning(Meaning.ORDER);
    fields.add(orderField);

    return fields;
}

From source file:de.geeksfactory.opacclient.apis.TouchPoint.java

private void parseDropdown(Element dropdownElement, List<SearchField> fields) {
    Elements options = dropdownElement.select("option");
    DropdownSearchField dropdown = new DropdownSearchField();
    dropdown.setId(dropdownElement.attr("name"));
    // Some fields make no sense or are not supported in the app
    if (dropdown.getId().equals("numberOfHits") || dropdown.getId().equals("timeOut")
            || dropdown.getId().equals("rememberList")) {
        return;/*from ww w . j  av  a2 s  . com*/
    }
    for (Element option : options) {
        dropdown.addDropdownValue(option.attr("value"), option.text());
    }
    dropdown.setDisplayName(dropdownElement.parent().select("label").text());
    fields.add(dropdown);
}