Example usage for org.jsoup.nodes Element select

List of usage examples for org.jsoup.nodes Element select

Introduction

In this page you can find the example usage for org.jsoup.nodes Element select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:de.geeksfactory.opacclient.apis.TouchPoint.java

@Override
public AccountData account(Account acc) throws IOException, JSONException, OpacErrorException {
    start();/*w  w w .ja v  a  2 s  .  c  om*/
    LoginResponse login = login(acc);
    if (!login.success) {
        return null;
    }
    AccountData adata = new AccountData(acc.getId());
    if (login.warning != null) {
        adata.setWarning(login.warning);
    }

    // Lent media
    httpGet(opac_url + "/userAccount.do?methodToCall=start", ENCODING);
    String html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&accountTyp=loaned", ENCODING);
    List<LentItem> lent = new ArrayList<>();
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);
    List<LentItem> nextpageLent = parse_medialist(doc);
    if (nextpageLent != null) {
        lent.addAll(nextpageLent);
    }
    if (doc.select(".pagination").size() > 0 && lent != null) {
        Element pagination = doc.select(".pagination").first();
        Elements pages = pagination.select("a");
        for (Element page : pages) {
            if (!page.hasAttr("href")) {
                continue;
            }
            html = httpGet(page.attr("abs:href"), ENCODING);
            doc = Jsoup.parse(html);
            doc.setBaseUri(opac_url);
            nextpageLent = parse_medialist(doc);
            if (nextpageLent != null) {
                lent.addAll(nextpageLent);
            }
        }
    }
    adata.setLent(lent);

    // Requested media ("Vormerkungen")
    html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&accountTyp=requested", ENCODING);
    doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);

    List<ReservedItem> requested = new ArrayList<>();
    List<ReservedItem> nextpageRes = parse_reslist(doc);
    if (nextpageRes != null) {
        requested.addAll(nextpageRes);
    }
    if (doc.select(".pagination").size() > 0 && requested != null) {
        Element pagination = doc.select(".pagination").first();
        Elements pages = pagination.select("a");
        for (Element page : pages) {
            if (!page.hasAttr("href")) {
                continue;
            }
            html = httpGet(page.attr("abs:href"), ENCODING);
            doc = Jsoup.parse(html);
            doc.setBaseUri(opac_url);
            nextpageRes = parse_reslist(doc);
            if (nextpageRes != null) {
                requested.addAll(nextpageRes);
            }
        }
    }

    // Ordered media ("Bestellungen")
    html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&accountTyp=ordered", ENCODING);
    doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);
    List<ReservedItem> nextpageOrd = parse_reslist(doc);
    if (nextpageOrd != null) {
        requested.addAll(nextpageOrd);
    }
    if (doc.select(".pagination").size() > 0 && requested != null) {
        Element pagination = doc.select(".pagination").first();
        Elements pages = pagination.select("a");
        for (Element page : pages) {
            if (!page.hasAttr("href")) {
                continue;
            }
            html = httpGet(page.attr("abs:href"), ENCODING);
            doc = Jsoup.parse(html);
            doc.setBaseUri(opac_url);
            nextpageOrd = parse_reslist(doc);
            if (nextpageOrd != null) {
                requested.addAll(nextpageOrd);
            }
        }
    }
    adata.setReservations(requested);

    // Fees
    if (doc.select("#fees").size() > 0) {
        String text = doc.select("#fees").first().text().trim();
        if (text.matches("Geb.+hren[^\\(]+\\(([0-9.,]+)[^0-9A-Z]*(|EUR|CHF|Fr)\\)")) {
            text = text.replaceAll("Geb.+hren[^\\(]+\\(([0-9.,]+)[^0-9A-Z]*(|EUR|CHF|Fr)\\)", "$1 $2");
            adata.setPendingFees(text);
        }
    }

    return adata;
}

From source file:de.geeksfactory.opacclient.apis.IOpac.java

protected SearchRequestResult parse_search(String html, int page)
        throws OpacErrorException, NotReachableException {
    Document doc = Jsoup.parse(html);

    if (doc.select("h4").size() > 0) {
        if (doc.select("h4").text().trim().startsWith("0 gefundene Medien")) {
            // nothing found
            return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1);
        } else if (!doc.select("h4").text().trim().contains("gefundene Medien")
                && !doc.select("h4").text().trim().contains("Es wurden mehr als")) {
            // error
            throw new OpacErrorException(doc.select("h4").text().trim());
        }/*from  w w  w  . ja v  a 2  s  . c  o m*/
    } else if (doc.select("h1").size() > 0) {
        if (doc.select("h1").text().trim().contains("RUNTIME ERROR")) {
            // Server Error
            throw new NotReachableException("IOPAC RUNTIME ERROR");
        } else {
            throw new OpacErrorException(stringProvider.getFormattedString(
                    StringProvider.UNKNOWN_ERROR_WITH_DESCRIPTION, doc.select("h1").text().trim()));
        }
    } else {
        return null;
    }

    updateRechnr(doc);

    reusehtml = html;

    results_total = -1;

    if (doc.select("h4").text().trim().contains("Es wurden mehr als")) {
        results_total = 200;
    } else {
        String resultnumstr = doc.select("h4").first().text();
        resultnumstr = resultnumstr.substring(0, resultnumstr.indexOf(" ")).trim();
        results_total = Integer.parseInt(resultnumstr);
    }

    List<SearchResult> results = new ArrayList<>();

    Elements tables = doc.select("table").first().select("tr:has(td)");

    Map<String, Integer> colmap = new HashMap<>();
    Element thead = doc.select("table").first().select("tr:has(th)").first();
    int j = 0;
    for (Element th : thead.select("th")) {
        String text = th.text().trim().toLowerCase(Locale.GERMAN);
        if (text.contains("cover")) {
            colmap.put("cover", j);
        } else if (text.contains("titel")) {
            colmap.put("title", j);
        } else if (text.contains("verfasser")) {
            colmap.put("author", j);
        } else if (text.contains("mtyp")) {
            colmap.put("category", j);
        } else if (text.contains("jahr")) {
            colmap.put("year", j);
        } else if (text.contains("signatur")) {
            colmap.put("shelfmark", j);
        } else if (text.contains("info")) {
            colmap.put("info", j);
        } else if (text.contains("abteilung")) {
            colmap.put("department", j);
        } else if (text.contains("verliehen") || text.contains("verl.")) {
            colmap.put("returndate", j);
        } else if (text.contains("anz.res")) {
            colmap.put("reservations", j);
        }
        j++;
    }
    if (colmap.size() == 0) {
        colmap.put("cover", 0);
        colmap.put("title", 1);
        colmap.put("author", 2);
        colmap.put("publisher", 3);
        colmap.put("year", 4);
        colmap.put("department", 5);
        colmap.put("shelfmark", 6);
        colmap.put("returndate", 7);
        colmap.put("category", 8);
    }

    for (int i = 0; i < tables.size(); i++) {
        Element tr = tables.get(i);
        SearchResult sr = new SearchResult();

        if (tr.select("td").get(colmap.get("cover")).select("img").size() > 0) {
            String imgUrl = tr.select("td").get(colmap.get("cover")).select("img").first().attr("src");
            sr.setCover(imgUrl);
        }

        // Media Type
        if (colmap.get("category") != null) {
            String mType = tr.select("td").get(colmap.get("category")).text().trim().replace("\u00a0", "");
            if (data.has("mediatypes")) {
                try {
                    sr.setType(MediaType.valueOf(
                            data.getJSONObject("mediatypes").getString(mType.toLowerCase(Locale.GERMAN))));
                } catch (JSONException | IllegalArgumentException e) {
                    sr.setType(defaulttypes.get(mType.toLowerCase(Locale.GERMAN)));
                }
            } else {
                sr.setType(defaulttypes.get(mType.toLowerCase(Locale.GERMAN)));
            }
        }

        // Title and additional info
        String title;
        String additionalInfo = "";
        if (colmap.get("info") != null) {
            Element info = tr.select("td").get(colmap.get("info"));
            title = info.select("a[title=Details-Info]").text().trim();
            String authorIn = info.text().substring(0, info.text().indexOf(title));
            if (authorIn.contains(":")) {
                authorIn = authorIn.replaceFirst("^([^:]*):(.*)$", "$1");
                additionalInfo += " - " + authorIn;
            }
        } else {
            title = tr.select("td").get(colmap.get("title")).text().trim().replace("\u00a0", "");
            if (title.contains("(") && title.indexOf("(") > 0) {
                additionalInfo += title.substring(title.indexOf("("));
                title = title.substring(0, title.indexOf("(") - 1).trim();
            }

            // Author
            if (colmap.containsKey("author")) {
                String author = tr.select("td").get(colmap.get("author")).text().trim().replace("\u00a0", "");
                additionalInfo += " - " + author;
            }
        }

        // Publisher
        if (colmap.containsKey("publisher")) {
            String publisher = tr.select("td").get(colmap.get("publisher")).text().trim().replace("\u00a0", "");
            additionalInfo += " (" + publisher;
        }

        // Year
        if (colmap.containsKey("year")) {
            String year = tr.select("td").get(colmap.get("year")).text().trim().replace("\u00a0", "");
            additionalInfo += ", " + year + ")";
        }

        sr.setInnerhtml("<b>" + title + "</b><br>" + additionalInfo);

        // Status
        String status = tr.select("td").get(colmap.get("returndate")).text().trim().replace("\u00a0", "");
        SimpleDateFormat df = new SimpleDateFormat("dd.MM.yyyy", Locale.GERMAN);
        try {
            df.parse(status);
            // this is a return date
            sr.setStatus(Status.RED);
            sr.setInnerhtml(sr.getInnerhtml() + "<br><i>" + stringProvider.getString(StringProvider.LENT_UNTIL)
                    + " " + status + "</i>");
        } catch (ParseException e) {
            // this is a different status text
            String lc = status.toLowerCase(Locale.GERMAN);
            if ((lc.equals("") || lc.toLowerCase(Locale.GERMAN).contains("onleihe") || lc.contains("verleihbar")
                    || lc.contains("entleihbar") || lc.contains("ausleihbar")) && !lc.contains("nicht")) {
                sr.setStatus(Status.GREEN);
            } else {
                sr.setStatus(Status.YELLOW);
                sr.setInnerhtml(sr.getInnerhtml() + "<br><i>" + status + "</i>");
            }
        }

        // In some libraries (for example search for "atelier" in Preetz)
        // the results are sorted differently than their numbers suggest, so
        // we need to detect the number ("recno") from the link
        String link = tr.select("a[href^=/cgi-bin/di.exe?page=]").attr("href");
        Map<String, String> params = getQueryParamsFirst(link);
        if (params.containsKey("recno")) {
            int recno = Integer.valueOf(params.get("recno"));
            sr.setNr(recno - 1);
        } else {
            // the above should work, but fall back to this if it doesn't
            sr.setNr(10 * (page - 1) + i);
        }

        // In some libraries (for example Preetz) we can detect the media ID
        // here using another link present in the search results
        Elements idLinks = tr.select("a[href^=/cgi-bin/di.exe?cMedNr]");
        if (idLinks.size() > 0) {
            Map<String, String> idParams = getQueryParamsFirst(idLinks.first().attr("href"));
            String id = idParams.get("cMedNr");
            sr.setId(id);
        } else {
            sr.setId(null);
        }

        results.add(sr);
    }
    return new SearchRequestResult(results, results_total, page);
}

From source file:de.geeksfactory.opacclient.apis.Zones.java

@Override
public AccountData account(Account acc) throws IOException, JSONException, OpacErrorException {
    Document login = login(acc);// w  w w. j a va  2  s . com
    if (login == null) {
        return null;
    }

    AccountData res = new AccountData(acc.getId());

    String lentLink = null;
    String resLink = null;
    int lent_cnt = -1;
    int res_cnt = -1;
    for (Element td : login.select(".AccountSummaryCounterNameCell, .AccountSummaryCounterNameCellStripe, "
            + ".CAccountDetailFieldNameCellStripe, .CAccountDetailFieldNameCell")) {
        String section = td.text().trim();
        if (section.contains("Entliehene Medien")) {
            lentLink = td.select("a").attr("href");
            lent_cnt = Integer.parseInt(td.nextElementSibling().text().trim());
        } else if (section.contains("Vormerkungen")) {
            resLink = td.select("a").attr("href");
            res_cnt = Integer.parseInt(td.nextElementSibling().text().trim());
        } else if (section.contains("Kontostand")) {
            res.setPendingFees(td.nextElementSibling().text().trim());
        } else if (section.matches("Ausweis g.ltig bis")) {
            res.setValidUntil(td.nextElementSibling().text().trim());
        }
    }
    for (Element a : login.select("a.AccountMenuLink")) {
        if (a.text().contains("Ausleihen")) {
            lentLink = a.attr("href");
        } else if (a.text().contains("Vormerkungen")) {
            resLink = a.attr("href");
        }
    }
    if (lentLink == null) {
        return null;
    }

    List<LentItem> lentItems = new ArrayList<>();
    String lentUrl = opac_url + "/" + lentLink.replace("utf-8?Method", "utf-8&Method");
    String lentHtml = httpGet(lentUrl, getDefaultEncoding());
    Document lentDoc = Jsoup.parse(lentHtml);
    lentDoc.setBaseUri(lentUrl);
    loadMediaList(lentDoc, lentItems);
    res.setLent(lentItems);

    // In Koeln, the reservations link only doesn't show on the overview page
    if (resLink == null) {
        for (Element a : lentDoc.select("a.AccountMenuLink")) {
            if (a.text().contains("Vormerkungen")) {
                resLink = a.attr("href");
            }
        }
    }

    List<ReservedItem> reservedItems = new ArrayList<>();
    String resHtml = httpGet(opac_url + "/" + resLink, getDefaultEncoding());
    Document resDoc = Jsoup.parse(resHtml);
    loadResList(resDoc, reservedItems);
    res.setReservations(reservedItems);

    return res;
}

From source file:de.geeksfactory.opacclient.apis.TouchPoint.java

protected SearchRequestResult parse_search(String html, int page) throws OpacErrorException, IOException {
    Document doc = Jsoup.parse(html);

    if (doc.select("#RefineHitListForm").size() > 0) {
        // the results are located on a different page loaded via AJAX
        html = httpGet(opac_url + "/speedHitList.do?_=" + String.valueOf(System.currentTimeMillis() / 1000)
                + "&hitlistindex=0&exclusionList=", ENCODING);
        doc = Jsoup.parse(html);//  www .j a  v  a  2 s  .c o  m
    }

    if (doc.select(".nodata").size() > 0) {
        return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1);
    }

    doc.setBaseUri(opac_url + "/searchfoo");

    int results_total = -1;

    String resultnumstr = doc.select(".box-header h2").first().text();
    if (resultnumstr.contains("(1/1)") || resultnumstr.contains(" 1/1")) {
        reusehtml = html;
        throw new OpacErrorException("is_a_redirect");
    } else if (resultnumstr.contains("(")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*\\(([0-9]+)\\).*", "$1"));
    } else if (resultnumstr.contains(": ")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*: ([0-9]+)$", "$1"));
    }

    Elements table = doc.select("table.data > tbody > tr");
    identifier = null;

    Elements links = doc.select("table.data a");
    boolean haslink = false;
    for (Element node : links) {
        if (node.hasAttr("href") & node.attr("href").contains("singleHit.do") && !haslink) {
            haslink = true;
            try {
                List<NameValuePair> anyurl = URLEncodedUtils
                        .parse(new URI(node.attr("href").replace(" ", "%20").replace("&amp;", "&")), ENCODING);
                for (NameValuePair nv : anyurl) {
                    if (nv.getName().equals("identifier")) {
                        identifier = nv.getValue();
                        break;
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
            }

        }
    }

    List<SearchResult> results = new ArrayList<>();
    for (int i = 0; i < table.size(); i++) {
        Element tr = table.get(i);
        SearchResult sr = new SearchResult();
        if (tr.select(".icn, img[width=32]").size() > 0) {
            String[] fparts = tr.select(".icn, img[width=32]").first().attr("src").split("/");
            String fname = fparts[fparts.length - 1];
            String changedFname = fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "")
                    .replace(".png", "");

            // File names can look like this: "20_DVD_Video.gif"
            Pattern pattern = Pattern.compile("(\\d+)_.*");
            Matcher matcher = pattern.matcher(changedFname);
            if (matcher.find()) {
                changedFname = matcher.group(1);
            }

            MediaType defaulttype = defaulttypes.get(changedFname);
            if (data.has("mediatypes")) {
                try {
                    sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname)));
                } catch (JSONException | IllegalArgumentException e) {
                    sr.setType(defaulttype);
                }
            } else {
                sr.setType(defaulttype);
            }
        }
        String title;
        String text;
        if (tr.select(".results table").size() > 0) { // e.g. RWTH Aachen
            title = tr.select(".title a").text();
            text = tr.select(".title div").text();
        } else { // e.g. Schaffhausen, BSB Mnchen
            title = tr.select(".title, .hitlistTitle").text();
            text = tr.select(".results, .hitlistMetadata").first().ownText();
        }

        // we need to do some evil javascript parsing here to get the cover
        // and loan status of the item

        // get cover
        if (tr.select(".cover script").size() > 0) {
            String js = tr.select(".cover script").first().html();
            String isbn = matchJSVariable(js, "isbn");
            String ajaxUrl = matchJSVariable(js, "ajaxUrl");
            if (!"".equals(isbn) && !"".equals(ajaxUrl)) {
                String url = new URL(new URL(opac_url + "/"), ajaxUrl).toString();
                String coverUrl = httpGet(url + "?isbn=" + isbn + "&size=small", ENCODING);
                if (!"".equals(coverUrl)) {
                    sr.setCover(coverUrl.replace("\r\n", "").trim());
                }
            }
        }
        // get loan status and media ID
        if (tr.select("div[id^=loanstatus] + script").size() > 0) {
            String js = tr.select("div[id^=loanstatus] + script").first().html();
            String[] variables = new String[] { "loanstateDBId", "itemIdentifier", "hitlistIdentifier",
                    "hitlistPosition", "duplicateHitlistIdentifier", "itemType", "titleStatus", "typeofHit",
                    "context" };
            String ajaxUrl = matchJSVariable(js, "ajaxUrl");
            if (!"".equals(ajaxUrl)) {
                JSONObject id = new JSONObject();
                List<NameValuePair> map = new ArrayList<>();
                for (String variable : variables) {
                    String value = matchJSVariable(js, variable);
                    if (!"".equals(value)) {
                        map.add(new BasicNameValuePair(variable, value));
                    }
                    try {
                        if (variable.equals("itemIdentifier")) {
                            id.put("id", value);
                        } else if (variable.equals("loanstateDBId")) {
                            id.put("db", value);
                        }
                    } catch (JSONException e) {
                        e.printStackTrace();
                    }
                }
                sr.setId(id.toString());
                String url = new URL(new URL(opac_url + "/"), ajaxUrl).toString();
                String loanStatusHtml = httpGet(url + "?" + URLEncodedUtils.format(map, "UTF-8"), ENCODING)
                        .replace("\r\n", "").trim();
                Document loanStatusDoc = Jsoup.parse(loanStatusHtml);
                String loanstatus = loanStatusDoc.text().replace("\u00bb", "").trim();

                if ((loanstatus.startsWith("entliehen") && loanstatus.contains("keine Vormerkung mglich")
                        || loanstatus.contains("Keine Exemplare verfgbar"))) {
                    sr.setStatus(SearchResult.Status.RED);
                } else if (loanstatus.startsWith("entliehen") || loanstatus.contains("andere Zweigstelle")) {
                    sr.setStatus(SearchResult.Status.YELLOW);
                } else if ((loanstatus.startsWith("bestellbar") && !loanstatus.contains("nicht bestellbar"))
                        || (loanstatus.startsWith("vorbestellbar")
                                && !loanstatus.contains("nicht vorbestellbar"))
                        || (loanstatus.startsWith("vorbestellbar")
                                && !loanstatus.contains("nicht vorbestellbar"))
                        || (loanstatus.startsWith("vormerkbar") && !loanstatus.contains("nicht vormerkbar"))
                        || (loanstatus.contains("heute zurckgebucht"))
                        || (loanstatus.contains("ausleihbar") && !loanstatus.contains("nicht ausleihbar"))) {
                    sr.setStatus(SearchResult.Status.GREEN);
                }
                if (sr.getType() != null) {
                    if (sr.getType().equals(MediaType.EBOOK) || sr.getType().equals(MediaType.EVIDEO)
                            || sr.getType().equals(MediaType.MP3))
                    // Especially Onleihe.de ebooks are often marked
                    // green though they are not available.
                    {
                        sr.setStatus(SearchResult.Status.UNKNOWN);
                    }
                }
            }
        }

        sr.setInnerhtml(("<b>" + title + "</b><br/>") + text);

        sr.setNr(10 * (page - 1) + i + 1);
        results.add(sr);
    }
    resultcount = results.size();
    return new SearchRequestResult(results, results_total, page);
}

From source file:de.geeksfactory.opacclient.apis.Zones.java

@Override
public List<SearchField> getSearchFields() throws IOException {
    if (!initialised)
        start();// w w  w .j a va  2s.co  m
    List<SearchField> fields = new ArrayList<>();
    String html = httpGet(opac_url + "/APS_ZONES?fn=AdvancedSearch&Style=Portal3&SubStyle=&Lang=GER"
            + "&ResponseEncoding=utf-8", getDefaultEncoding());

    Document doc = Jsoup.parse(html);

    // find text fields
    Elements txt_opts = doc.select("#formSelectTerm_1 option");
    for (Element opt : txt_opts) {
        TextSearchField field = new TextSearchField();
        field.setId(opt.attr("value"));
        field.setHint("");
        field.setDisplayName(opt.text());
        fields.add(field);
    }

    // find filters
    String filtersQuery = version18 ? ".inSearchLimits .floatingBox" : ".TabRechAv .limitBlock";
    Elements filters = doc.select(filtersQuery);
    int i = 0;
    for (Element filter : filters) {
        DropdownSearchField dropdown = new DropdownSearchField();
        dropdown.addDropdownValue("", "Alle");
        // All dropdowns use "q.limits.limit" as URL param, but they must not have the same ID
        dropdown.setId("dropdown_" + i);

        if (version18) {
            dropdown.setDisplayName(filter.select("tr").get(0).text().trim());
            Elements opts = filter.select("tr").get(1).select("table td:has(input)");
            for (Element opt : opts) {
                dropdown.addDropdownValue(opt.select("input").attr("value"), opt.text().trim());
            }
        } else {
            dropdown.setDisplayName(filter.parent().previousElementSibling().text().trim());
            Elements opts = filter.select(".limitChoice label");
            for (Element opt : opts) {
                dropdown.addDropdownValue(opt.attr("for"), opt.text().trim());
            }
        }
        fields.add(dropdown);
        i++;
    }

    return fields;
}

From source file:de.geeksfactory.opacclient.apis.TouchPoint.java

protected DetailledItem parse_result(String html) throws IOException {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);//from   w ww. ja  v  a 2  s.co m

    DetailledItem result = new DetailledItem();

    if (doc.select("#cover script").size() > 0) {
        String js = doc.select("#cover script").first().html();
        String isbn = matchJSVariable(js, "isbn");
        String ajaxUrl = matchJSVariable(js, "ajaxUrl");
        if (ajaxUrl == null) {
            ajaxUrl = matchJSParameter(js, "url");
        }
        if (ajaxUrl != null && !"".equals(ajaxUrl)) {
            if (!"".equals(isbn) && isbn != null) {
                String url = new URL(new URL(opac_url + "/"), ajaxUrl).toString();
                String coverUrl = httpGet(url + "?isbn=" + isbn + "&size=medium", ENCODING);
                if (!"".equals(coverUrl)) {
                    result.setCover(coverUrl.replace("\r\n", "").trim());
                }
            } else {
                String url = new URL(new URL(opac_url + "/"), ajaxUrl).toString();
                String coverJs = httpGet(url, ENCODING);
                result.setCover(matchHTMLAttr(coverJs, "src"));
            }
        }
    }

    result.setTitle(doc.select("h1").first().text());
    for (Element tr : doc.select(".titleinfo tr")) {
        // Sometimes there is one th and one td, sometimes two tds
        String detailName = tr.select("th, td").first().text().trim();
        String detailValue = tr.select("td").last().text().trim();
        result.addDetail(new Detail(detailName, detailValue));
        if (detailName.contains("ID in diesem Katalog")) {
            result.setId(detailValue);
        }
    }
    if (result.getDetails().size() == 0 && doc.select("#details").size() > 0) {
        // e.g. Bayreuth_Uni
        String dname = "";
        String dval = "";
        boolean in_value = true;
        for (Node n : doc.select("#details").first().childNodes()) {
            if (n instanceof Element && ((Element) n).tagName().equals("strong")) {
                if (in_value) {
                    if (dname.length() > 0 && dval.length() > 0) {
                        result.addDetail(new Detail(dname, dval));
                    }
                    dname = ((Element) n).text();
                    in_value = false;
                } else {
                    dname += ((Element) n).text();
                }
            } else {
                String t = null;
                if (n instanceof TextNode) {
                    t = ((TextNode) n).text();
                } else if (n instanceof Element) {
                    t = ((Element) n).text();
                }
                if (t != null) {
                    if (in_value) {
                        dval += t;
                    } else {
                        in_value = true;
                        dval = t;
                    }
                }
            }
        }

    }

    // Copies
    String copiesParameter = doc.select("div[id^=ajax_holdings_url").attr("ajaxParameter").replace("&amp;", "");
    if (!"".equals(copiesParameter)) {
        String copiesHtml = httpGet(opac_url + "/" + copiesParameter, ENCODING);
        Document copiesDoc = Jsoup.parse(copiesHtml);
        List<String> table_keys = new ArrayList<>();
        for (Element th : copiesDoc.select(".data tr th")) {
            if (th.text().contains("Zweigstelle")) {
                table_keys.add("branch");
            } else if (th.text().contains("Status")) {
                table_keys.add("status");
            } else if (th.text().contains("Signatur")) {
                table_keys.add("signature");
            } else {
                table_keys.add(null);
            }
        }
        for (Element tr : copiesDoc.select(".data tr:has(td)")) {
            Copy copy = new Copy();
            int i = 0;
            for (Element td : tr.select("td")) {
                if (table_keys.get(i) != null) {
                    copy.set(table_keys.get(i), td.text().trim());
                }
                i++;
            }
            result.addCopy(copy);
        }
    }

    // Reservation Info, only works if the code above could find a URL
    if (!"".equals(copiesParameter)) {
        String reservationParameter = copiesParameter.replace("showHoldings", "showDocument");
        try {
            String reservationHtml = httpGet(opac_url + "/" + reservationParameter, ENCODING);
            Document reservationDoc = Jsoup.parse(reservationHtml);
            reservationDoc.setBaseUri(opac_url);
            if (reservationDoc.select("a").size() == 1) {
                result.setReservable(true);
                result.setReservation_info(reservationDoc.select("a").first().attr("abs:href"));
            }
        } catch (Exception e) {
            e.printStackTrace();
            // fail silently
        }
    }

    // TODO: Volumes

    try {
        Element isvolume = null;
        Map<String, String> volume = new HashMap<>();
        Elements links = doc.select(".data td a");
        int elcount = links.size();
        for (int eli = 0; eli < elcount; eli++) {
            List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(links.get(eli).attr("href")), "UTF-8");
            for (NameValuePair nv : anyurl) {
                if (nv.getName().equals("methodToCall") && nv.getValue().equals("volumeSearch")) {
                    isvolume = links.get(eli);
                } else if (nv.getName().equals("catKey")) {
                    volume.put("catKey", nv.getValue());
                } else if (nv.getName().equals("dbIdentifier")) {
                    volume.put("dbIdentifier", nv.getValue());
                }
            }
            if (isvolume != null) {
                volume.put("volume", "true");
                result.setVolumesearch(volume);
                break;
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

    return result;
}

From source file:de.geeksfactory.opacclient.apis.Bibliotheca.java

@Override
public ProlongResult prolong(String a, Account account, int useraction, String selection) throws IOException {
    if (!initialised) {
        start();//from ww w. j av  a 2  s  . c om
    }
    if (System.currentTimeMillis() - logged_in > SESSION_LIFETIME || logged_in_as == null) {
        try {
            account(account);
        } catch (JSONException e) {
            e.printStackTrace();
            return new ProlongResult(MultiStepResult.Status.ERROR,
                    stringProvider.getString(StringProvider.COULD_NOT_LOAD_ACCOUNT));
        } catch (OpacErrorException e) {
            return new ProlongResult(MultiStepResult.Status.ERROR, e.getMessage());
        }
    } else if (logged_in_as.getId() != account.getId()) {
        try {
            account(account);
        } catch (JSONException e) {
            e.printStackTrace();
            return new ProlongResult(MultiStepResult.Status.ERROR,
                    stringProvider.getString(StringProvider.COULD_NOT_LOAD_ACCOUNT));
        } catch (OpacErrorException e) {
            return new ProlongResult(MultiStepResult.Status.ERROR, e.getMessage());
        }
    }

    if (useraction == MultiStepResult.ACTION_CONFIRMATION) {
        List<NameValuePair> nameValuePairs = new ArrayList<>(2);
        nameValuePairs.add(new BasicNameValuePair("target", "make_vl"));
        nameValuePairs.add(new BasicNameValuePair("verlaengern", "Besttigung"));
        httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs), getDefaultEncoding());

        return new ProlongResult(MultiStepResult.Status.OK);
    } else {

        String html = httpGet(opac_url + "/" + a, getDefaultEncoding());
        Document doc = Jsoup.parse(html);

        if (doc.getElementsByClass("kontomeldung").size() == 1) {
            return new ProlongResult(MultiStepResult.Status.ERROR,
                    doc.getElementsByClass("kontomeldung").get(0).text());
        }
        if (doc.select("#verlaengern").size() == 1) {
            if (doc.select(".kontozeile_center table").size() == 1) {
                Element table = doc.select(".kontozeile_center table").first();
                ProlongResult res = new ProlongResult(MultiStepResult.Status.CONFIRMATION_NEEDED);
                List<String[]> details = new ArrayList<>();

                for (Element row : table.select("tr")) {
                    if (row.select(".konto_feld").size() == 1 && row.select(".konto_feldinhalt").size() == 1) {
                        details.add(new String[] { row.select(".konto_feld").text().trim(),
                                row.select(".konto_feldinhalt").text().trim() });
                    }
                }
                res.setDetails(details);
                return res;
            } else {
                List<NameValuePair> nameValuePairs = new ArrayList<>(2);
                nameValuePairs.add(new BasicNameValuePair("target", "make_vl"));
                nameValuePairs.add(new BasicNameValuePair("verlaengern", "Besttigung"));
                httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs),
                        getDefaultEncoding());

                return new ProlongResult(MultiStepResult.Status.OK);
            }
        }
    }
    return new ProlongResult(MultiStepResult.Status.ERROR, "??");
}

From source file:de.geeksfactory.opacclient.apis.IOpac.java

@Override
public ReservationResult reservation(DetailledItem item, Account account, int useraction, String selection)
        throws IOException {
    String reservation_info = item.getReservation_info();
    // STEP 1: Login page
    String html = httpGet(opac_url + "/" + reservation_info, getDefaultEncoding());
    Document doc = Jsoup.parse(html);
    if (doc.select("table").first().text().contains("kann nicht")) {
        return new ReservationResult(MultiStepResult.Status.ERROR, doc.select("table").first().text().trim());
    }/*from w ww  .j  a  va  2  s . c om*/

    if (doc.select("form[name=form1]").size() == 0) {
        return new ReservationResult(MultiStepResult.Status.ERROR);
    }

    Element form = doc.select("form[name=form1]").first();
    List<BasicNameValuePair> params = new ArrayList<>();
    params.add(new BasicNameValuePair("sleKndNr", account.getName()));
    params.add(new BasicNameValuePair("slePw", account.getPassword()));
    params.add(new BasicNameValuePair("pshLogin", "Reservieren"));
    for (Element input : form.select("input[type=hidden]")) {
        params.add(new BasicNameValuePair(input.attr("name"), input.attr("value")));
    }

    // STEP 2: Confirmation page
    html = httpPost(opac_url + "/cgi-bin/di.exe", new UrlEncodedFormEntity(params), getDefaultEncoding());
    doc = Jsoup.parse(html);

    if (doc.select("form[name=form1]").size() > 0) {
        // STEP 3: There is another confirmation needed
        form = doc.select("form[name=form1]").first();
        html = httpGet(opac_url + "/" + generateQuery(form), getDefaultEncoding());
        doc = Jsoup.parse(html);
    }

    if (doc.text().contains("fehlgeschlagen") || doc.text().contains("Achtung")
            || doc.text().contains("nicht m")) {
        return new ReservationResult(MultiStepResult.Status.ERROR, doc.select("table").first().text().trim());
    } else {
        return new ReservationResult(MultiStepResult.Status.OK);
    }

}

From source file:us.colloquy.index.IndexHandler.java

public void getURIForAllDiaries(List<DocumentPointer> documentPointers, Path pathToLetters) {
    List<Path> results = new ArrayList<>();

    int maxDepth = 6;

    try (Stream<Path> stream = Files.find(pathToLetters, maxDepth, (path, attr) -> {
        return String.valueOf(path).endsWith(".ncx");
    })) {/*from w ww.  j  a  va2 s  .  c  o  m*/

        stream.forEach(results::add);

    } catch (IOException e) {
        e.printStackTrace();
    }

    System.out.println("files: " + results.size());

    try {

        for (Path res : results) {
            Path parent = res.getParent();

            //                System.out.println("---------------------------------------------");
            //                System.out.println(parent.toString());
            //use jsoup to list all files that contain something useful
            Document doc = Jsoup.parse(res.toFile(), "UTF-8");

            String title = "";

            for (Element element : doc.getElementsByTag("docTitle")) {
                //Letter letter = new Letter();

                // StringBuilder content = new StringBuilder();

                for (Element child : element.children()) {
                    title = child.text();
                    // System.out.println("Title: " + title);
                }
            }

            //  System.out.println("==========================   " + res.toString() + " ==========================");

            boolean startPrinting = false;

            boolean newFile = true;

            for (Element element : doc.getElementsByTag("navPoint")) {

                //get nav label and content

                Element navLabelElement = element.select("navLabel").first();
                Element srsElement = element.select("content").first();

                String navLabel = "";
                String srs = "";

                if (navLabelElement != null) {
                    navLabel = navLabelElement.text().replaceAll("\\*", "").trim();
                }

                if (srsElement != null) {
                    srs = srsElement.attr("src");
                }

                if ("??".matches(navLabel))

                {
                    startPrinting = false;

                    // System.out.println("----------------- end of file pointer ---------------");
                }

                if (StringUtils.isNotEmpty(navLabel)
                        && navLabel.matches("??.*|?? ?.*") && newFile) {
                    newFile = false;
                    startPrinting = true;
                }

                if (startPrinting && !navLabel
                        .matches("(|??? ??)")) {
                    // System.out.println("----------------- file pointer ---------------");
                    //   System.out.println(navLabel + "\t" + srs);

                    DocumentPointer documentPointer = new DocumentPointer(
                            parent.toString() + File.separator + srs.replaceAll("#.*", ""), title);

                    documentPointers.add(documentPointer);
                }

            }

            //   System.out.println("==========================   END OF FILE ==========================");

        }
    } catch (Exception e) {
        e.printStackTrace();
    }

    System.out.println("Size: " + documentPointers.size());

    //  for (DocumentPointer pointer : documentPointers)
    // {
    //parse and
    //     System.out.println(pointer.getSourse() + "\t" + pointer.getUri());
}

From source file:de.geeksfactory.opacclient.apis.BiBer1992.java

@Override
public List<SearchField> getSearchFields() throws IOException {
    List<SearchField> fields = new ArrayList<>();

    HttpGet httpget;/*from www  .  ja va2s .co m*/
    if (opacDir.contains("opax")) {
        httpget = new HttpGet(opacUrl + "/" + opacDir + "/de/qsel.html.S");
    } else {
        httpget = new HttpGet(opacUrl + "/" + opacDir + "/de/qsel_main.S");
    }

    HttpResponse response = http_client.execute(httpget);

    if (response.getStatusLine().getStatusCode() == 500) {
        throw new NotReachableException(response.getStatusLine().getReasonPhrase());
    }
    String html = convertStreamToString(response.getEntity().getContent());
    HttpUtils.consume(response.getEntity());

    Document doc = Jsoup.parse(html);

    // get text fields
    Elements text_opts = doc.select("form select[name=REG1] option");
    for (Element opt : text_opts) {
        TextSearchField field = new TextSearchField();
        field.setId(opt.attr("value"));
        field.setDisplayName(opt.text());
        field.setHint("");
        fields.add(field);
    }

    // get media types
    Elements mt_opts = doc.select("form input[name~=(MT|MS)]");
    if (mt_opts.size() > 0) {
        DropdownSearchField mtDropdown = new DropdownSearchField();
        mtDropdown.setId(mt_opts.get(0).attr("name"));
        mtDropdown.setDisplayName("Medientyp");
        for (Element opt : mt_opts) {
            if (!opt.val().equals("")) {
                String text = opt.text();
                if (text.length() == 0) {
                    // text is empty, check layouts:
                    // Essen: <input name="MT"><img title="mediatype">
                    // Schaffenb: <input name="MT"><img alt="mediatype">
                    Element img = opt.nextElementSibling();
                    if (img != null && img.tagName().equals("img")) {
                        text = img.attr("title");
                        if (text.equals("")) {
                            text = img.attr("alt");
                        }
                    }
                }
                if (text.length() == 0) {
                    // text is still empty, check table layout, Example
                    // Friedrichshafen
                    // <td><input name="MT"></td> <td><img
                    // title="mediatype"></td>
                    Element td1 = opt.parent();
                    Element td2 = td1.nextElementSibling();
                    if (td2 != null) {
                        Elements td2Children = td2.select("img[title]");
                        if (td2Children.size() > 0) {
                            text = td2Children.get(0).attr("title");
                        }
                    }
                }
                if (text.length() == 0) {
                    // text is still empty, check images in label layout, Example
                    // Wiedenst
                    // <input type="radio" name="MT" id="MTYP1" value="MTYP1">
                    // <label for="MTYP1"><img src="http://www.wiedenest.de/bib/image/books
                    // .png" alt="Bcher" title="Bcher"></label>
                    Element label = opt.nextElementSibling();
                    if (label != null) {
                        Elements td2Children = label.select("img[title]");
                        if (td2Children.size() > 0) {
                            text = td2Children.get(0).attr("title");
                        }
                    }
                }
                if (text.length() == 0) {
                    // text is still empty: missing end tag like Offenburg
                    text = parse_option_regex(opt);
                }
                mtDropdown.addDropdownValue(opt.val(), text);
            }
        }
        fields.add(mtDropdown);
    }

    // get branches
    Elements br_opts = doc.select("form select[name=ZW] option");
    if (br_opts.size() > 0) {
        DropdownSearchField brDropdown = new DropdownSearchField();
        brDropdown.setId(br_opts.get(0).parent().attr("name"));
        brDropdown.setDisplayName(br_opts.get(0).parent().parent().previousElementSibling().text()
                .replace("\u00a0", "").replace("?", "").trim());
        for (Element opt : br_opts) {
            brDropdown.addDropdownValue(opt.val(), opt.text());
        }
        fields.add(brDropdown);
    }

    return fields;
}