Example usage for org.jsoup.nodes Document setBaseUri

List of usage examples for org.jsoup.nodes Document setBaseUri

Introduction

In this page you can find the example usage for org.jsoup.nodes Document setBaseUri.

Prototype

public void setBaseUri(final String baseUri) 

Source Link

Document

Update the base URI of this node and all of its descendants.

Usage

From source file:de.geeksfactory.opacclient.apis.Heidi.java

@Override
public AccountData account(Account account) throws IOException, JSONException, OpacErrorException {
    login(account);//from   w  ww .  ja  va 2  s  .c  o  m
    String html;
    Document doc;
    AccountData adata = new AccountData(account.getId());
    DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);

    html = httpGet(opac_url + "/konto.cgi?sess=" + sessid, getDefaultEncoding());
    doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url + "/");

    for (Element td : doc.select("table.konto td")) {
        if (td.text().contains("Offene")) {
            String text = td.text().trim().replaceAll(
                    "Offene[^0-9]+Geb.+hren:[^0-9]+([0-9.," + "]+)[^0-9A-Z]*(|EUR|CHF|Fr.)", "$1 $2");
            adata.setPendingFees(text);
        }
    }

    List<LentItem> lent = new ArrayList<>();
    for (Element tr : doc.select("table.kontopos tr")) {
        LentItem item = new LentItem();
        Element desc = tr.child(1).select("label").first();
        String dates = tr.child(2).text().trim();
        if (tr.child(1).select("a").size() > 0) {
            String kk = getQueryParamsFirst(tr.child(1).select("a").first().absUrl("href")).get("katkey");
            item.setId(kk);
        }

        int i = 0;
        for (Node node : desc.childNodes()) {
            if (node instanceof TextNode) {
                String text = ((TextNode) node).text().trim();
                if (i == 0) {
                    item.setAuthor(text);
                } else if (i == 1) {
                    item.setTitle(text);
                } else if (text.contains("Mediennummer")) {
                    item.setBarcode(text.replace("Mediennummer: ", ""));
                }
                i++;
            }
        }

        if (tr.child(0).select("input").size() == 1) {
            item.setProlongData(tr.child(0).select("input").first().val());
            item.setRenewable(true);
        } else {
            item.setProlongData("" + tr.child(0).select("span").first().attr("class"));
            item.setRenewable(false);
        }

        String todate = dates;
        if (todate.contains("-")) {
            String[] datesplit = todate.split("-");
            todate = datesplit[1].trim();
        }
        try {
            item.setDeadline(fmt.parseLocalDate(todate.substring(0, 10)));
        } catch (IllegalArgumentException e) {
            e.printStackTrace();
        }

        lent.add(item);
    }
    adata.setLent(lent);

    List<ReservedItem> reservations = new ArrayList<>();
    html = httpGet(opac_url + "/konto.cgi?konto=v&sess=" + sessid, getDefaultEncoding());
    reservations.addAll(parse_reservations(html));
    html = httpGet(opac_url + "/konto.cgi?konto=b&sess=" + sessid, getDefaultEncoding());
    reservations.addAll(parse_reservations(html));

    adata.setReservations(reservations);

    return adata;
}

From source file:de.geeksfactory.opacclient.apis.Bibliotheca.java

protected DetailledItem parse_result(String html) {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);

    DetailledItem result = new DetailledItem();

    if (doc.select(".detail_cover img").size() == 1) {
        result.setCover(doc.select(".detail_cover img").get(0).attr("src"));
    }/* www  .j  av  a 2  s . c om*/

    result.setTitle(doc.select(".detail_titel").text());

    Elements detailtrs = doc.select(".detailzeile table tr");
    for (int i = 0; i < detailtrs.size(); i++) {
        Element tr = detailtrs.get(i);
        if (tr.child(0).hasClass("detail_feld")) {
            String title = tr.child(0).text();
            String content = tr.child(1).text();
            if (title.equals("Gesamtwerk:") || title.equals("Erschienen in:")) {
                try {
                    if (tr.child(1).select("a").size() > 0) {
                        Element link = tr.child(1).select("a").first();
                        List<NameValuePair> query = URLEncodedUtils.parse(new URI(link.absUrl("href")),
                                "UTF-8");
                        for (NameValuePair q : query) {
                            if (q.getName().equals("MedienNr")) {
                                result.setCollectionId(q.getValue());
                            }
                        }
                    }
                } catch (URISyntaxException e) {
                }
            } else {

                if (content.contains("hier klicken") && tr.child(1).select("a").size() > 0) {
                    content += " " + tr.child(1).select("a").first().attr("href");
                }

                result.addDetail(new Detail(title, content));
            }
        }
    }

    Elements detailcenterlinks = doc.select(".detailzeile_center a.detail_link");
    for (int i = 0; i < detailcenterlinks.size(); i++) {
        Element a = detailcenterlinks.get(i);
        result.addDetail(new Detail(a.text().trim(), a.absUrl("href")));
    }

    try {
        JSONObject copymap = new JSONObject();
        if (data.has("copiestable")) {
            copymap = data.getJSONObject("copiestable");
        } else {
            Elements ths = doc.select(".exemplartab .exemplarmenubar th");
            for (int i = 0; i < ths.size(); i++) {
                Element th = ths.get(i);
                String head = th.text().trim();
                if (head.equals("Zweigstelle")) {
                    copymap.put("branch", i);
                } else if (head.equals("Abteilung")) {
                    copymap.put("department", i);
                } else if (head.equals("Bereich") || head.equals("Standort")) {
                    copymap.put("location", i);
                } else if (head.equals("Signatur")) {
                    copymap.put("signature", i);
                } else if (head.equals("Barcode") || head.equals("Medien-Nummer")) {
                    copymap.put("barcode", i);
                } else if (head.equals("Status")) {
                    copymap.put("status", i);
                } else if (head.equals("Frist") || head.matches("Verf.+gbar")) {
                    copymap.put("returndate", i);
                } else if (head.equals("Vorbestellungen") || head.equals("Reservierungen")) {
                    copymap.put("reservations", i);
                }
            }
        }
        Elements exemplartrs = doc.select(".exemplartab .tabExemplar, .exemplartab .tabExemplar_");
        DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);
        for (int i = 0; i < exemplartrs.size(); i++) {
            Element tr = exemplartrs.get(i);

            Copy copy = new Copy();

            Iterator<?> keys = copymap.keys();
            while (keys.hasNext()) {
                String key = (String) keys.next();
                int index;
                try {
                    index = copymap.has(key) ? copymap.getInt(key) : -1;
                } catch (JSONException e1) {
                    index = -1;
                }
                if (index >= 0) {
                    try {
                        copy.set(key, tr.child(index).text(), fmt);
                    } catch (IllegalArgumentException e) {
                        e.printStackTrace();
                    }
                }
            }

            result.addCopy(copy);
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

    try {
        Elements bandtrs = doc.select("table .tabBand a");
        for (int i = 0; i < bandtrs.size(); i++) {
            Element tr = bandtrs.get(i);

            Volume volume = new Volume();
            volume.setId(tr.attr("href").split("=")[1]);
            volume.setTitle(tr.text());
            result.addVolume(volume);
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

    if (doc.select(".detail_vorbest a").size() == 1) {
        result.setReservable(true);
        result.setReservation_info(doc.select(".detail_vorbest a").attr("href"));
    }
    return result;
}

From source file:de.geeksfactory.opacclient.apis.TouchPoint.java

@Override
public AccountData account(Account acc) throws IOException, JSONException, OpacErrorException {
    start();/* w ww  .  j a  v  a  2s .co  m*/
    LoginResponse login = login(acc);
    if (!login.success) {
        return null;
    }
    AccountData adata = new AccountData(acc.getId());
    if (login.warning != null) {
        adata.setWarning(login.warning);
    }

    // Lent media
    httpGet(opac_url + "/userAccount.do?methodToCall=start", ENCODING);
    String html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&accountTyp=loaned", ENCODING);
    List<LentItem> lent = new ArrayList<>();
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);
    List<LentItem> nextpageLent = parse_medialist(doc);
    if (nextpageLent != null) {
        lent.addAll(nextpageLent);
    }
    if (doc.select(".pagination").size() > 0 && lent != null) {
        Element pagination = doc.select(".pagination").first();
        Elements pages = pagination.select("a");
        for (Element page : pages) {
            if (!page.hasAttr("href")) {
                continue;
            }
            html = httpGet(page.attr("abs:href"), ENCODING);
            doc = Jsoup.parse(html);
            doc.setBaseUri(opac_url);
            nextpageLent = parse_medialist(doc);
            if (nextpageLent != null) {
                lent.addAll(nextpageLent);
            }
        }
    }
    adata.setLent(lent);

    // Requested media ("Vormerkungen")
    html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&accountTyp=requested", ENCODING);
    doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);

    List<ReservedItem> requested = new ArrayList<>();
    List<ReservedItem> nextpageRes = parse_reslist(doc);
    if (nextpageRes != null) {
        requested.addAll(nextpageRes);
    }
    if (doc.select(".pagination").size() > 0 && requested != null) {
        Element pagination = doc.select(".pagination").first();
        Elements pages = pagination.select("a");
        for (Element page : pages) {
            if (!page.hasAttr("href")) {
                continue;
            }
            html = httpGet(page.attr("abs:href"), ENCODING);
            doc = Jsoup.parse(html);
            doc.setBaseUri(opac_url);
            nextpageRes = parse_reslist(doc);
            if (nextpageRes != null) {
                requested.addAll(nextpageRes);
            }
        }
    }

    // Ordered media ("Bestellungen")
    html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&accountTyp=ordered", ENCODING);
    doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);
    List<ReservedItem> nextpageOrd = parse_reslist(doc);
    if (nextpageOrd != null) {
        requested.addAll(nextpageOrd);
    }
    if (doc.select(".pagination").size() > 0 && requested != null) {
        Element pagination = doc.select(".pagination").first();
        Elements pages = pagination.select("a");
        for (Element page : pages) {
            if (!page.hasAttr("href")) {
                continue;
            }
            html = httpGet(page.attr("abs:href"), ENCODING);
            doc = Jsoup.parse(html);
            doc.setBaseUri(opac_url);
            nextpageOrd = parse_reslist(doc);
            if (nextpageOrd != null) {
                requested.addAll(nextpageOrd);
            }
        }
    }
    adata.setReservations(requested);

    // Fees
    if (doc.select("#fees").size() > 0) {
        String text = doc.select("#fees").first().text().trim();
        if (text.matches("Geb.+hren[^\\(]+\\(([0-9.,]+)[^0-9A-Z]*(|EUR|CHF|Fr)\\)")) {
            text = text.replaceAll("Geb.+hren[^\\(]+\\(([0-9.,]+)[^0-9A-Z]*(|EUR|CHF|Fr)\\)", "$1 $2");
            adata.setPendingFees(text);
        }
    }

    return adata;
}

From source file:de.geeksfactory.opacclient.apis.TouchPoint.java

protected DetailledItem parse_result(String html) throws IOException {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);

    DetailledItem result = new DetailledItem();

    if (doc.select("#cover script").size() > 0) {
        String js = doc.select("#cover script").first().html();
        String isbn = matchJSVariable(js, "isbn");
        String ajaxUrl = matchJSVariable(js, "ajaxUrl");
        if (ajaxUrl == null) {
            ajaxUrl = matchJSParameter(js, "url");
        }/*from w w  w. j av a2s. c  om*/
        if (ajaxUrl != null && !"".equals(ajaxUrl)) {
            if (!"".equals(isbn) && isbn != null) {
                String url = new URL(new URL(opac_url + "/"), ajaxUrl).toString();
                String coverUrl = httpGet(url + "?isbn=" + isbn + "&size=medium", ENCODING);
                if (!"".equals(coverUrl)) {
                    result.setCover(coverUrl.replace("\r\n", "").trim());
                }
            } else {
                String url = new URL(new URL(opac_url + "/"), ajaxUrl).toString();
                String coverJs = httpGet(url, ENCODING);
                result.setCover(matchHTMLAttr(coverJs, "src"));
            }
        }
    }

    result.setTitle(doc.select("h1").first().text());
    for (Element tr : doc.select(".titleinfo tr")) {
        // Sometimes there is one th and one td, sometimes two tds
        String detailName = tr.select("th, td").first().text().trim();
        String detailValue = tr.select("td").last().text().trim();
        result.addDetail(new Detail(detailName, detailValue));
        if (detailName.contains("ID in diesem Katalog")) {
            result.setId(detailValue);
        }
    }
    if (result.getDetails().size() == 0 && doc.select("#details").size() > 0) {
        // e.g. Bayreuth_Uni
        String dname = "";
        String dval = "";
        boolean in_value = true;
        for (Node n : doc.select("#details").first().childNodes()) {
            if (n instanceof Element && ((Element) n).tagName().equals("strong")) {
                if (in_value) {
                    if (dname.length() > 0 && dval.length() > 0) {
                        result.addDetail(new Detail(dname, dval));
                    }
                    dname = ((Element) n).text();
                    in_value = false;
                } else {
                    dname += ((Element) n).text();
                }
            } else {
                String t = null;
                if (n instanceof TextNode) {
                    t = ((TextNode) n).text();
                } else if (n instanceof Element) {
                    t = ((Element) n).text();
                }
                if (t != null) {
                    if (in_value) {
                        dval += t;
                    } else {
                        in_value = true;
                        dval = t;
                    }
                }
            }
        }

    }

    // Copies
    String copiesParameter = doc.select("div[id^=ajax_holdings_url").attr("ajaxParameter").replace("&amp;", "");
    if (!"".equals(copiesParameter)) {
        String copiesHtml = httpGet(opac_url + "/" + copiesParameter, ENCODING);
        Document copiesDoc = Jsoup.parse(copiesHtml);
        List<String> table_keys = new ArrayList<>();
        for (Element th : copiesDoc.select(".data tr th")) {
            if (th.text().contains("Zweigstelle")) {
                table_keys.add("branch");
            } else if (th.text().contains("Status")) {
                table_keys.add("status");
            } else if (th.text().contains("Signatur")) {
                table_keys.add("signature");
            } else {
                table_keys.add(null);
            }
        }
        for (Element tr : copiesDoc.select(".data tr:has(td)")) {
            Copy copy = new Copy();
            int i = 0;
            for (Element td : tr.select("td")) {
                if (table_keys.get(i) != null) {
                    copy.set(table_keys.get(i), td.text().trim());
                }
                i++;
            }
            result.addCopy(copy);
        }
    }

    // Reservation Info, only works if the code above could find a URL
    if (!"".equals(copiesParameter)) {
        String reservationParameter = copiesParameter.replace("showHoldings", "showDocument");
        try {
            String reservationHtml = httpGet(opac_url + "/" + reservationParameter, ENCODING);
            Document reservationDoc = Jsoup.parse(reservationHtml);
            reservationDoc.setBaseUri(opac_url);
            if (reservationDoc.select("a").size() == 1) {
                result.setReservable(true);
                result.setReservation_info(reservationDoc.select("a").first().attr("abs:href"));
            }
        } catch (Exception e) {
            e.printStackTrace();
            // fail silently
        }
    }

    // TODO: Volumes

    try {
        Element isvolume = null;
        Map<String, String> volume = new HashMap<>();
        Elements links = doc.select(".data td a");
        int elcount = links.size();
        for (int eli = 0; eli < elcount; eli++) {
            List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(links.get(eli).attr("href")), "UTF-8");
            for (NameValuePair nv : anyurl) {
                if (nv.getName().equals("methodToCall") && nv.getValue().equals("volumeSearch")) {
                    isvolume = links.get(eli);
                } else if (nv.getName().equals("catKey")) {
                    volume.put("catKey", nv.getValue());
                } else if (nv.getName().equals("dbIdentifier")) {
                    volume.put("dbIdentifier", nv.getValue());
                }
            }
            if (isvolume != null) {
                volume.put("volume", "true");
                result.setVolumesearch(volume);
                break;
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

    return result;
}

From source file:de.geeksfactory.opacclient.apis.SISIS.java

protected void parse_reslist(String type, List<ReservedItem> reservations, Document doc, int offset) {
    Elements copytrs = doc.select(".data tr");
    doc.setBaseUri(opac_url);
    int trs = copytrs.size();
    if (trs == 1) {
        return;/*  w  ww. j a  va  2 s  .c o  m*/
    }
    assert (trs > 0);
    for (int i = 1; i < trs; i++) {
        Element tr = copytrs.get(i);
        ReservedItem item = new ReservedItem();

        if (tr.text().contains("keine Daten") || tr.children().size() == 1) {
            return;
        }

        item.setTitle(tr.child(1).select("strong").text().trim());
        try {
            String[] rowsplit1 = tr.child(1).html().split("<br[ /]*>");
            String[] rowsplit2 = tr.child(2).html().split("<br[ /]*>");
            if (rowsplit1.length > 1)
                item.setAuthor(rowsplit1[1].trim());
            if (rowsplit2.length > 2)
                item.setBranch(rowsplit2[2].trim());
            if (rowsplit2.length > 2)
                item.setStatus(rowsplit2[0].trim());

            if (tr.select("a").size() == 1) {
                item.setCancelData(type + "$" + offset + "$" + tr.select("a").attr("abs:href").split("\\?")[1]);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }

        reservations.add(item);
    }
    assert (reservations.size() == trs - 1);
}

From source file:de.geeksfactory.opacclient.apis.SISIS.java

protected void parse_medialist(List<LentItem> media, Document doc, int offset) {
    Elements copytrs = doc.select(".data tr");
    doc.setBaseUri(opac_url);

    DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);

    int trs = copytrs.size();
    if (trs == 1) {
        return;/*w  w  w  .  j av  a2  s.  co  m*/
    }
    assert (trs > 0);
    for (int i = 1; i < trs; i++) {
        Element tr = copytrs.get(i);
        LentItem item = new LentItem();

        if (tr.text().contains("keine Daten")) {
            return;
        }

        item.setTitle(tr.child(1).select("strong").text().trim());
        try {
            item.setAuthor(tr.child(1).html().split("<br[ /]*>")[1].trim());

            String[] col2split = tr.child(2).html().split("<br[ /]*>");
            String deadline = col2split[0].trim();
            if (deadline.contains("-")) {
                deadline = deadline.split("-")[1].trim();
            }
            try {
                item.setDeadline(fmt.parseLocalDate(deadline).toString());
            } catch (IllegalArgumentException e1) {
                e1.printStackTrace();
            }

            if (col2split.length > 1) {
                item.setHomeBranch(col2split[1].trim());
            }

            if (tr.select("a").size() > 0) {
                for (Element link : tr.select("a")) {
                    String href = link.attr("abs:href");
                    Map<String, String> hrefq = getQueryParamsFirst(href);
                    if (hrefq.get("methodToCall").equals("renewalPossible")) {
                        item.setProlongData(offset + "$" + href.split("\\?")[1]);
                        item.setRenewable(true);
                        break;
                    }
                }
            } else if (tr.select(".textrot, .textgruen, .textdunkelblau").size() > 0) {
                item.setProlongData("" + tr.select(".textrot, .textgruen, .textdunkelblau").text());
                item.setRenewable(false);
            }

        } catch (Exception ex) {
            ex.printStackTrace();
        }

        media.add(item);
    }
    assert (media.size() == trs - 1);

}

From source file:de.geeksfactory.opacclient.apis.SISIS.java

@Override
public AccountData account(Account acc) throws IOException, JSONException, OpacErrorException {
    start(); // TODO: Is this necessary?

    int resultNum;

    if (!login(acc)) {
        return null;
    }//from www .  j a  v a2  s .  c om

    // Geliehene Medien
    String html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&typ=1", ENCODING);
    List<LentItem> medien = new ArrayList<>();
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);
    parse_medialist(medien, doc, 1);
    if (doc.select(".box-right").size() > 0) {
        for (Element link : doc.select(".box-right").first().select("a")) {
            String href = link.attr("abs:href");
            Map<String, String> hrefq = getQueryParamsFirst(href);
            if (hrefq == null || hrefq.get("methodToCall") == null) {
                continue;
            }
            if (hrefq.get("methodToCall").equals("pos") && !"1".equals(hrefq.get("anzPos"))) {
                html = httpGet(href, ENCODING);
                parse_medialist(medien, Jsoup.parse(html), Integer.parseInt(hrefq.get("anzPos")));
            }
        }
    }
    if (doc.select("#label1").size() > 0) {
        resultNum = 0;
        String rNum = doc.select("#label1").first().text().trim().replaceAll(".*\\(([0-9]*)\\).*", "$1");
        if (rNum.length() > 0) {
            resultNum = Integer.parseInt(rNum);
        }

        assert (resultNum == medien.size());
    }

    // Ordered media ("Bestellungen")
    html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&typ=6", ENCODING);
    List<ReservedItem> reserved = new ArrayList<>();
    doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);
    parse_reslist("6", reserved, doc, 1);
    Elements label6 = doc.select("#label6");
    if (doc.select(".box-right").size() > 0) {
        for (Element link : doc.select(".box-right").first().select("a")) {
            String href = link.attr("abs:href");
            Map<String, String> hrefq = getQueryParamsFirst(href);
            if (hrefq == null || hrefq.get("methodToCall") == null) {
                break;
            }
            if (hrefq.get("methodToCall").equals("pos") && !"1".equals(hrefq.get("anzPos"))) {
                html = httpGet(href, ENCODING);
                parse_reslist("6", reserved, Jsoup.parse(html), Integer.parseInt(hrefq.get("anzPos")));
            }
        }
    }

    // Prebooked media ("Vormerkungen")
    html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&typ=7", ENCODING);
    doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);
    parse_reslist("7", reserved, doc, 1);
    if (doc.select(".box-right").size() > 0) {
        for (Element link : doc.select(".box-right").first().select("a")) {
            String href = link.attr("abs:href");
            Map<String, String> hrefq = getQueryParamsFirst(href);
            if (hrefq == null || hrefq.get("methodToCall") == null) {
                break;
            }
            if (hrefq.get("methodToCall").equals("pos") && !"1".equals(hrefq.get("anzPos"))) {
                html = httpGet(href, ENCODING);
                parse_reslist("7", reserved, Jsoup.parse(html), Integer.parseInt(hrefq.get("anzPos")));
            }
        }
    }
    if (label6.size() > 0 && doc.select("#label7").size() > 0) {
        resultNum = 0;
        String rNum = label6.text().trim().replaceAll(".*\\(([0-9]*)\\).*", "$1");
        if (rNum.length() > 0) {
            resultNum = Integer.parseInt(rNum);
        }
        rNum = doc.select("#label7").text().trim().replaceAll(".*\\(([0-9]*)\\).*", "$1");
        if (rNum.length() > 0) {
            resultNum += Integer.parseInt(rNum);
        }
        assert (resultNum == reserved.size());
    }

    AccountData res = new AccountData(acc.getId());

    if (doc.select("#label8").size() > 0) {
        String text = doc.select("#label8").first().text().trim();
        if (text.matches("Geb.+hren[^\\(]+\\(([0-9.,]+)[^0-9A-Z]*(|EUR|CHF|Fr)\\)")) {
            text = text.replaceAll("Geb.+hren[^\\(]+\\(([0-9.,]+)[^0-9A-Z]*(|EUR|CHF|Fr)\\)", "$1 $2");
            res.setPendingFees(text);
        }
    }
    Pattern p = Pattern.compile("[^0-9.]*", Pattern.MULTILINE);
    if (doc.select(".box3").size() > 0) {
        for (Element box : doc.select(".box3")) {
            if (box.select("strong").size() == 1) {
                String text = box.select("strong").text();
                if (text.equals("Jahresgebhren")) {
                    text = box.text();
                    text = p.matcher(text).replaceAll("");
                    res.setValidUntil(text);
                }
            }

        }
    }

    res.setLent(medien);
    res.setReservations(reserved);
    return res;
}

From source file:de.geeksfactory.opacclient.apis.TouchPoint.java

protected SearchRequestResult parse_search(String html, int page) throws OpacErrorException, IOException {
    Document doc = Jsoup.parse(html);

    if (doc.select("#RefineHitListForm").size() > 0) {
        // the results are located on a different page loaded via AJAX
        html = httpGet(opac_url + "/speedHitList.do?_=" + String.valueOf(System.currentTimeMillis() / 1000)
                + "&hitlistindex=0&exclusionList=", ENCODING);
        doc = Jsoup.parse(html);/*  w  ww .  jav a 2  s. c om*/
    }

    if (doc.select(".nodata").size() > 0) {
        return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1);
    }

    doc.setBaseUri(opac_url + "/searchfoo");

    int results_total = -1;

    String resultnumstr = doc.select(".box-header h2").first().text();
    if (resultnumstr.contains("(1/1)") || resultnumstr.contains(" 1/1")) {
        reusehtml = html;
        throw new OpacErrorException("is_a_redirect");
    } else if (resultnumstr.contains("(")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*\\(([0-9]+)\\).*", "$1"));
    } else if (resultnumstr.contains(": ")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*: ([0-9]+)$", "$1"));
    }

    Elements table = doc.select("table.data > tbody > tr");
    identifier = null;

    Elements links = doc.select("table.data a");
    boolean haslink = false;
    for (Element node : links) {
        if (node.hasAttr("href") & node.attr("href").contains("singleHit.do") && !haslink) {
            haslink = true;
            try {
                List<NameValuePair> anyurl = URLEncodedUtils
                        .parse(new URI(node.attr("href").replace(" ", "%20").replace("&amp;", "&")), ENCODING);
                for (NameValuePair nv : anyurl) {
                    if (nv.getName().equals("identifier")) {
                        identifier = nv.getValue();
                        break;
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
            }

        }
    }

    List<SearchResult> results = new ArrayList<>();
    for (int i = 0; i < table.size(); i++) {
        Element tr = table.get(i);
        SearchResult sr = new SearchResult();
        if (tr.select(".icn, img[width=32]").size() > 0) {
            String[] fparts = tr.select(".icn, img[width=32]").first().attr("src").split("/");
            String fname = fparts[fparts.length - 1];
            String changedFname = fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "")
                    .replace(".png", "");

            // File names can look like this: "20_DVD_Video.gif"
            Pattern pattern = Pattern.compile("(\\d+)_.*");
            Matcher matcher = pattern.matcher(changedFname);
            if (matcher.find()) {
                changedFname = matcher.group(1);
            }

            MediaType defaulttype = defaulttypes.get(changedFname);
            if (data.has("mediatypes")) {
                try {
                    sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname)));
                } catch (JSONException | IllegalArgumentException e) {
                    sr.setType(defaulttype);
                }
            } else {
                sr.setType(defaulttype);
            }
        }
        String title;
        String text;
        if (tr.select(".results table").size() > 0) { // e.g. RWTH Aachen
            title = tr.select(".title a").text();
            text = tr.select(".title div").text();
        } else { // e.g. Schaffhausen, BSB Mnchen
            title = tr.select(".title, .hitlistTitle").text();
            text = tr.select(".results, .hitlistMetadata").first().ownText();
        }

        // we need to do some evil javascript parsing here to get the cover
        // and loan status of the item

        // get cover
        if (tr.select(".cover script").size() > 0) {
            String js = tr.select(".cover script").first().html();
            String isbn = matchJSVariable(js, "isbn");
            String ajaxUrl = matchJSVariable(js, "ajaxUrl");
            if (!"".equals(isbn) && !"".equals(ajaxUrl)) {
                String url = new URL(new URL(opac_url + "/"), ajaxUrl).toString();
                String coverUrl = httpGet(url + "?isbn=" + isbn + "&size=small", ENCODING);
                if (!"".equals(coverUrl)) {
                    sr.setCover(coverUrl.replace("\r\n", "").trim());
                }
            }
        }
        // get loan status and media ID
        if (tr.select("div[id^=loanstatus] + script").size() > 0) {
            String js = tr.select("div[id^=loanstatus] + script").first().html();
            String[] variables = new String[] { "loanstateDBId", "itemIdentifier", "hitlistIdentifier",
                    "hitlistPosition", "duplicateHitlistIdentifier", "itemType", "titleStatus", "typeofHit",
                    "context" };
            String ajaxUrl = matchJSVariable(js, "ajaxUrl");
            if (!"".equals(ajaxUrl)) {
                JSONObject id = new JSONObject();
                List<NameValuePair> map = new ArrayList<>();
                for (String variable : variables) {
                    String value = matchJSVariable(js, variable);
                    if (!"".equals(value)) {
                        map.add(new BasicNameValuePair(variable, value));
                    }
                    try {
                        if (variable.equals("itemIdentifier")) {
                            id.put("id", value);
                        } else if (variable.equals("loanstateDBId")) {
                            id.put("db", value);
                        }
                    } catch (JSONException e) {
                        e.printStackTrace();
                    }
                }
                sr.setId(id.toString());
                String url = new URL(new URL(opac_url + "/"), ajaxUrl).toString();
                String loanStatusHtml = httpGet(url + "?" + URLEncodedUtils.format(map, "UTF-8"), ENCODING)
                        .replace("\r\n", "").trim();
                Document loanStatusDoc = Jsoup.parse(loanStatusHtml);
                String loanstatus = loanStatusDoc.text().replace("\u00bb", "").trim();

                if ((loanstatus.startsWith("entliehen") && loanstatus.contains("keine Vormerkung mglich")
                        || loanstatus.contains("Keine Exemplare verfgbar"))) {
                    sr.setStatus(SearchResult.Status.RED);
                } else if (loanstatus.startsWith("entliehen") || loanstatus.contains("andere Zweigstelle")) {
                    sr.setStatus(SearchResult.Status.YELLOW);
                } else if ((loanstatus.startsWith("bestellbar") && !loanstatus.contains("nicht bestellbar"))
                        || (loanstatus.startsWith("vorbestellbar")
                                && !loanstatus.contains("nicht vorbestellbar"))
                        || (loanstatus.startsWith("vorbestellbar")
                                && !loanstatus.contains("nicht vorbestellbar"))
                        || (loanstatus.startsWith("vormerkbar") && !loanstatus.contains("nicht vormerkbar"))
                        || (loanstatus.contains("heute zurckgebucht"))
                        || (loanstatus.contains("ausleihbar") && !loanstatus.contains("nicht ausleihbar"))) {
                    sr.setStatus(SearchResult.Status.GREEN);
                }
                if (sr.getType() != null) {
                    if (sr.getType().equals(MediaType.EBOOK) || sr.getType().equals(MediaType.EVIDEO)
                            || sr.getType().equals(MediaType.MP3))
                    // Especially Onleihe.de ebooks are often marked
                    // green though they are not available.
                    {
                        sr.setStatus(SearchResult.Status.UNKNOWN);
                    }
                }
            }
        }

        sr.setInnerhtml(("<b>" + title + "</b><br/>") + text);

        sr.setNr(10 * (page - 1) + i + 1);
        results.add(sr);
    }
    resultcount = results.size();
    return new SearchRequestResult(results, results_total, page);
}

From source file:de.geeksfactory.opacclient.apis.SISIS.java

protected SearchRequestResult parse_search(String html, int page) throws OpacErrorException {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url + "/searchfoo");

    if (doc.select(".error").size() > 0) {
        throw new OpacErrorException(doc.select(".error").text().trim());
    } else if (doc.select(".nohits").size() > 0) {
        throw new OpacErrorException(doc.select(".nohits").text().trim());
    } else if (doc.select(".box-header h2, #nohits").text().contains("keine Treffer")) {
        return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1);
    }//  w ww.  j a  v  a  2 s .  co  m

    int results_total = -1;

    String resultnumstr = doc.select(".box-header h2").first().text();
    if (resultnumstr.contains("(1/1)") || resultnumstr.contains(" 1/1")) {
        reusehtml = html;
        throw new OpacErrorException("is_a_redirect");
    } else if (resultnumstr.contains("(")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*\\(([0-9]+)\\).*", "$1"));
    } else if (resultnumstr.contains(": ")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*: ([0-9]+)$", "$1"));
    }

    Elements table = doc.select("table.data tbody tr");
    identifier = null;

    Elements links = doc.select("table.data a");
    boolean haslink = false;
    for (int i = 0; i < links.size(); i++) {
        Element node = links.get(i);
        if (node.hasAttr("href") & node.attr("href").contains("singleHit.do") && !haslink) {
            haslink = true;
            try {
                List<NameValuePair> anyurl = URLEncodedUtils
                        .parse(new URI(node.attr("href").replace(" ", "%20").replace("&amp;", "&")), ENCODING);
                for (NameValuePair nv : anyurl) {
                    if (nv.getName().equals("identifier")) {
                        identifier = nv.getValue();
                        break;
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
            }

        }
    }

    List<SearchResult> results = new ArrayList<>();
    for (int i = 0; i < table.size(); i++) {
        Element tr = table.get(i);
        SearchResult sr = new SearchResult();
        if (tr.select("td img[title]").size() > 0) {
            String title = tr.select("td img").get(0).attr("title");
            String[] fparts = tr.select("td img").get(0).attr("src").split("/");
            String fname = fparts[fparts.length - 1];
            MediaType default_by_fname = defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "")
                    .replace(".gif", "").replace(".png", ""));
            MediaType default_by_title = defaulttypes.get(title);
            MediaType default_name = default_by_title != null ? default_by_title : default_by_fname;
            if (data.has("mediatypes")) {
                try {
                    sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname)));
                } catch (JSONException | IllegalArgumentException e) {
                    sr.setType(default_name);
                }
            } else {
                sr.setType(default_name);
            }
        }
        String alltext = tr.text();
        if (alltext.contains("eAudio") || alltext.contains("eMusic")) {
            sr.setType(MediaType.MP3);
        } else if (alltext.contains("eVideo")) {
            sr.setType(MediaType.EVIDEO);
        } else if (alltext.contains("eBook")) {
            sr.setType(MediaType.EBOOK);
        } else if (alltext.contains("Munzinger")) {
            sr.setType(MediaType.EDOC);
        }

        if (tr.children().size() > 3 && tr.child(3).select("img[title*=cover]").size() == 1) {
            sr.setCover(tr.child(3).select("img[title*=cover]").attr("abs:src"));
            if (sr.getCover().contains("showCover.do")) {
                downloadCover(sr);
            }
        }

        Element middlething;
        if (tr.children().size() > 2 && tr.child(2).select("a").size() > 0) {
            middlething = tr.child(2);
        } else {
            middlething = tr.child(1);
        }

        List<Node> children = middlething.childNodes();
        if (middlething.select("div").not("#hlrightblock,.bestellfunktionen").size() == 1) {
            Element indiv = middlething.select("div").not("#hlrightblock,.bestellfunktionen").first();
            if (indiv.children().size() > 1) {
                children = indiv.childNodes();
            }
        } else if (middlething.select("span.titleData").size() == 1) {
            children = middlething.select("span.titleData").first().childNodes();
        }
        int childrennum = children.size();

        List<String[]> strings = new ArrayList<>();
        for (int ch = 0; ch < childrennum; ch++) {
            Node node = children.get(ch);
            if (node instanceof TextNode) {
                String text = ((TextNode) node).text().trim();
                if (text.length() > 3) {
                    strings.add(new String[] { "text", "", text });
                }
            } else if (node instanceof Element) {

                List<Node> subchildren = node.childNodes();
                for (int j = 0; j < subchildren.size(); j++) {
                    Node subnode = subchildren.get(j);
                    if (subnode instanceof TextNode) {
                        String text = ((TextNode) subnode).text().trim();
                        if (text.length() > 3) {
                            strings.add(new String[] { ((Element) node).tag().getName(), "text", text,
                                    ((Element) node).className(), node.attr("style") });
                        }
                    } else if (subnode instanceof Element) {
                        String text = ((Element) subnode).text().trim();
                        if (text.length() > 3) {
                            strings.add(new String[] { ((Element) node).tag().getName(),
                                    ((Element) subnode).tag().getName(), text, ((Element) node).className(),
                                    node.attr("style") });
                        }
                    }
                }
            }
        }

        StringBuilder description = null;
        if (tr.select("span.Z3988").size() == 1) {
            // Sometimes there is a <span class="Z3988"> item which provides
            // data in a standardized format.
            List<NameValuePair> z3988data;
            boolean hastitle = false;
            try {
                description = new StringBuilder();
                z3988data = URLEncodedUtils
                        .parse(new URI("http://dummy/?" + tr.select("span.Z3988").attr("title")), "UTF-8");
                for (NameValuePair nv : z3988data) {
                    if (nv.getValue() != null) {
                        if (!nv.getValue().trim().equals("")) {
                            if (nv.getName().equals("rft.btitle") && !hastitle) {
                                description.append("<b>").append(nv.getValue()).append("</b>");
                                hastitle = true;
                            } else if (nv.getName().equals("rft.atitle") && !hastitle) {
                                description.append("<b>").append(nv.getValue()).append("</b>");
                                hastitle = true;
                            } else if (nv.getName().equals("rft.au")) {
                                description.append("<br />").append(nv.getValue());
                            } else if (nv.getName().equals("rft.date")) {
                                description.append("<br />").append(nv.getValue());
                            }
                        }
                    }
                }
            } catch (URISyntaxException e) {
                description = null;
            }
        }
        boolean described = false;
        if (description != null && description.length() > 0) {
            sr.setInnerhtml(description.toString());
            described = true;
        } else {
            description = new StringBuilder();
        }
        int k = 0;
        boolean yearfound = false;
        boolean titlefound = false;
        boolean sigfound = false;
        for (String[] part : strings) {
            if (!described) {
                if (part[0].equals("a") && (k == 0 || !titlefound)) {
                    if (k != 0) {
                        description.append("<br />");
                    }
                    description.append("<b>").append(part[2]).append("</b>");
                    titlefound = true;
                } else if (part[2].matches("\\D*[0-9]{4}\\D*") && part[2].length() <= 10) {
                    yearfound = true;
                    if (k != 0) {
                        description.append("<br />");
                    }
                    description.append(part[2]);
                } else if (k == 1 && !yearfound && part[2].matches("^\\s*\\([0-9]{4}\\)$")) {
                    if (k != 0) {
                        description.append("<br />");
                    }
                    description.append(part[2]);
                } else if (k == 1 && !yearfound && part[2].matches("^\\s*\\([0-9]{4}\\)$")) {
                    if (k != 0) {
                        description.append("<br />");
                    }
                    description.append(part[2]);
                } else if (k > 1 && k < 4 && !sigfound && part[0].equals("text")
                        && part[2].matches("^[A-Za-z0-9,\\- ]+$")) {
                    description.append("<br />");
                    description.append(part[2]);
                }
            }
            if (part.length == 4) {
                if (part[0].equals("span") && part[3].equals("textgruen")) {
                    sr.setStatus(SearchResult.Status.GREEN);
                } else if (part[0].equals("span") && part[3].equals("textrot")) {
                    sr.setStatus(SearchResult.Status.RED);
                }
            } else if (part.length == 5) {
                if (part[4].contains("purple")) {
                    sr.setStatus(SearchResult.Status.YELLOW);
                }
            }
            if (sr.getStatus() == null) {
                if ((part[2].contains("entliehen")
                        && part[2].startsWith("Vormerkung ist leider nicht mglich"))
                        || part[2].contains("nur in anderer Zweigstelle ausleihbar und nicht bestellbar")) {
                    sr.setStatus(SearchResult.Status.RED);
                } else if (part[2].startsWith("entliehen")
                        || part[2].contains("Ein Exemplar finden Sie in einer anderen Zweigstelle")) {
                    sr.setStatus(SearchResult.Status.YELLOW);
                } else if ((part[2].startsWith("bestellbar") && !part[2].contains("nicht bestellbar"))
                        || (part[2].startsWith("vorbestellbar") && !part[2].contains("nicht vorbestellbar"))
                        || (part[2].startsWith("vorbestellbar") && !part[2].contains("nicht vorbestellbar"))
                        || (part[2].startsWith("vormerkbar") && !part[2].contains("nicht vormerkbar"))
                        || (part[2].contains("heute zurckgebucht"))
                        || (part[2].contains("ausleihbar") && !part[2].contains("nicht ausleihbar"))) {
                    sr.setStatus(SearchResult.Status.GREEN);
                }
                if (sr.getType() != null) {
                    if (sr.getType().equals(MediaType.EBOOK) || sr.getType().equals(MediaType.EVIDEO)
                            || sr.getType().equals(MediaType.MP3))
                    // Especially Onleihe.de ebooks are often marked
                    // green though they are not available.
                    {
                        sr.setStatus(SearchResult.Status.UNKNOWN);
                    }
                }
            }
            k++;
        }
        if (!described) {
            sr.setInnerhtml(description.toString());
        }

        sr.setNr(10 * (page - 1) + i);
        sr.setId(null);
        results.add(sr);
    }
    resultcount = results.size();
    return new SearchRequestResult(results, results_total, page);
}

From source file:de.geeksfactory.opacclient.apis.SISIS.java

protected DetailledItem parse_result(String html) throws IOException {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);

    String html2 = httpGet(opac_url + "/singleHit.do?methodToCall=activateTab&tab=showTitleActive", ENCODING);

    Document doc2 = Jsoup.parse(html2);
    doc2.setBaseUri(opac_url);/*from   ww w  . j a v  a2s. c  om*/

    String html3 = httpGet(opac_url + "/singleHit.do?methodToCall=activateTab&tab=showAvailabilityActive",
            ENCODING);

    Document doc3 = Jsoup.parse(html3);
    doc3.setBaseUri(opac_url);

    DetailledItem result = new DetailledItem();

    try {
        result.setId(doc.select("#bibtip_id").text().trim());
    } catch (Exception ex) {
        ex.printStackTrace();
    }
    List<String> reservationlinks = new ArrayList<>();
    for (Element link : doc3.select("#vormerkung a, #tab-content a")) {
        String href = link.absUrl("href");
        Map<String, String> hrefq = getQueryParamsFirst(href);
        if (result.getId() == null) {
            // ID retrieval
            String key = hrefq.get("katkey");
            if (key != null) {
                result.setId(key);
                break;
            }
        }

        // Vormerken
        if (hrefq.get("methodToCall") != null) {
            if (hrefq.get("methodToCall").equals("doVormerkung")
                    || hrefq.get("methodToCall").equals("doBestellung")) {
                reservationlinks.add(href.split("\\?")[1]);
            }
        }
    }
    if (reservationlinks.size() == 1) {
        result.setReservable(true);
        result.setReservation_info(reservationlinks.get(0));
    } else if (reservationlinks.size() == 0) {
        result.setReservable(false);
    } else {
        // TODO: Multiple options - handle this case!
    }

    if (doc.select(".data td img").size() == 1) {
        result.setCover(doc.select(".data td img").first().attr("abs:src"));
        try {
            downloadCover(result);
        } catch (Exception e) {

        }
    }

    if (doc.select(".aw_teaser_title").size() == 1) {
        result.setTitle(doc.select(".aw_teaser_title").first().text().trim());
    } else if (doc.select(".data td strong").size() > 0) {
        result.setTitle(doc.select(".data td strong").first().text().trim());
    } else {
        result.setTitle("");
    }
    if (doc.select(".aw_teaser_title_zusatz").size() > 0) {
        result.addDetail(new Detail("Titelzusatz", doc.select(".aw_teaser_title_zusatz").text().trim()));
    }

    String title = "";
    String text = "";
    boolean takeover = false;
    Element detailtrs = doc2.select(".box-container .data td").first();
    for (Node node : detailtrs.childNodes()) {
        if (node instanceof Element) {
            if (((Element) node).tagName().equals("strong")) {
                title = ((Element) node).text().trim();
                text = "";
            } else {
                if (((Element) node).tagName().equals("a")
                        && (((Element) node).text().trim().contains("hier klicken") || title.equals("Link:"))) {
                    text = text + node.attr("href");
                    takeover = true;
                    break;
                }
            }
        } else if (node instanceof TextNode) {
            text = text + ((TextNode) node).text();
        }
    }
    if (!takeover) {
        text = "";
        title = "";
    }

    detailtrs = doc2.select("#tab-content .data td").first();
    if (detailtrs != null) {
        for (Node node : detailtrs.childNodes()) {
            if (node instanceof Element) {
                if (((Element) node).tagName().equals("strong")) {
                    if (!text.equals("") && !title.equals("")) {
                        result.addDetail(new Detail(title.trim(), text.trim()));
                        if (title.equals("Titel:")) {
                            result.setTitle(text.trim());
                        }
                        text = "";
                    }

                    title = ((Element) node).text().trim();
                } else {
                    if (((Element) node).tagName().equals("a")
                            && (((Element) node).text().trim().contains("hier klicken")
                                    || title.equals("Link:"))) {
                        text = text + node.attr("href");
                    } else {
                        text = text + ((Element) node).text();
                    }
                }
            } else if (node instanceof TextNode) {
                text = text + ((TextNode) node).text();
            }
        }
    } else {
        if (doc2.select("#tab-content .fulltitle tr").size() > 0) {
            Elements rows = doc2.select("#tab-content .fulltitle tr");
            for (Element tr : rows) {
                if (tr.children().size() == 2) {
                    Element valcell = tr.child(1);
                    String value = valcell.text().trim();
                    if (valcell.select("a").size() == 1) {
                        value = valcell.select("a").first().absUrl("href");
                    }
                    result.addDetail(new Detail(tr.child(0).text().trim(), value));
                }
            }
        } else {
            result.addDetail(new Detail(stringProvider.getString(StringProvider.ERROR),
                    stringProvider.getString(StringProvider.COULD_NOT_LOAD_DETAIL)));
        }
    }
    if (!text.equals("") && !title.equals("")) {
        result.addDetail(new Detail(title.trim(), text.trim()));
        if (title.equals("Titel:")) {
            result.setTitle(text.trim());
        }
    }
    for (Element link : doc3.select("#tab-content a")) {
        Map<String, String> hrefq = getQueryParamsFirst(link.absUrl("href"));
        if (result.getId() == null) {
            // ID retrieval
            String key = hrefq.get("katkey");
            if (key != null) {
                result.setId(key);
                break;
            }
        }
    }
    for (Element link : doc3.select(".box-container a")) {
        if (link.text().trim().equals("Download")) {
            result.addDetail(
                    new Detail(stringProvider.getString(StringProvider.DOWNLOAD), link.absUrl("href")));
        }
    }

    Map<String, Integer> copy_columnmap = new HashMap<>();
    // Default values
    copy_columnmap.put("barcode", 1);
    copy_columnmap.put("branch", 3);
    copy_columnmap.put("status", 4);
    Elements copy_columns = doc.select("#tab-content .data tr#bg2 th");
    for (int i = 0; i < copy_columns.size(); i++) {
        Element th = copy_columns.get(i);
        String head = th.text().trim();
        if (head.contains("Status")) {
            copy_columnmap.put("status", i);
        }
        if (head.contains("Zweigstelle")) {
            copy_columnmap.put("branch", i);
        }
        if (head.contains("Mediennummer")) {
            copy_columnmap.put("barcode", i);
        }
        if (head.contains("Standort")) {
            copy_columnmap.put("location", i);
        }
        if (head.contains("Signatur")) {
            copy_columnmap.put("signature", i);
        }
    }

    Pattern status_lent = Pattern.compile(
            "^(entliehen) bis ([0-9]{1,2}.[0-9]{1,2}.[0-9]{2," + "4}) \\(gesamte Vormerkungen: ([0-9]+)\\)$");
    Pattern status_and_barcode = Pattern.compile("^(.*) ([0-9A-Za-z]+)$");

    Elements exemplartrs = doc.select("#tab-content .data tr").not("#bg2");
    DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);
    for (Element tr : exemplartrs) {
        try {
            Copy copy = new Copy();
            Element status = tr.child(copy_columnmap.get("status"));
            Element barcode = tr.child(copy_columnmap.get("barcode"));
            String barcodetext = barcode.text().trim().replace(" Wegweiser", "");

            // STATUS
            String statustext;
            if (status.getElementsByTag("b").size() > 0) {
                statustext = status.getElementsByTag("b").text().trim();
            } else {
                statustext = status.text().trim();
            }
            if (copy_columnmap.get("status").equals(copy_columnmap.get("barcode"))) {
                Matcher matcher1 = status_and_barcode.matcher(statustext);
                if (matcher1.matches()) {
                    statustext = matcher1.group(1);
                    barcodetext = matcher1.group(2);
                }
            }

            Matcher matcher = status_lent.matcher(statustext);
            if (matcher.matches()) {
                copy.setStatus(matcher.group(1));
                copy.setReservations(matcher.group(3));
                copy.setReturnDate(fmt.parseLocalDate(matcher.group(2)));
            } else {
                copy.setStatus(statustext);
            }
            copy.setBarcode(barcodetext);
            if (status.select("a[href*=doVormerkung]").size() == 1) {
                copy.setResInfo(status.select("a[href*=doVormerkung]").attr("href").split("\\?")[1]);
            }

            String branchtext = tr.child(copy_columnmap.get("branch")).text().trim().replace(" Wegweiser", "");
            copy.setBranch(branchtext);

            if (copy_columnmap.containsKey("location")) {
                copy.setLocation(
                        tr.child(copy_columnmap.get("location")).text().trim().replace(" Wegweiser", ""));
            }

            if (copy_columnmap.containsKey("signature")) {
                copy.setShelfmark(
                        tr.child(copy_columnmap.get("signature")).text().trim().replace(" Wegweiser", ""));
            }

            result.addCopy(copy);
        } catch (Exception ex) {
            ex.printStackTrace();
        }
    }

    try {
        Element isvolume = null;
        Map<String, String> volume = new HashMap<>();
        Elements links = doc.select(".data td a");
        int elcount = links.size();
        for (int eli = 0; eli < elcount; eli++) {
            List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(links.get(eli).attr("href")), "UTF-8");
            for (NameValuePair nv : anyurl) {
                if (nv.getName().equals("methodToCall") && nv.getValue().equals("volumeSearch")) {
                    isvolume = links.get(eli);
                } else if (nv.getName().equals("catKey")) {
                    volume.put("catKey", nv.getValue());
                } else if (nv.getName().equals("dbIdentifier")) {
                    volume.put("dbIdentifier", nv.getValue());
                }
            }
            if (isvolume != null) {
                volume.put("volume", "true");
                result.setVolumesearch(volume);
                break;
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

    return result;
}