Example usage for org.jsoup.nodes Element attr

List of usage examples for org.jsoup.nodes Element attr

Introduction

In this page you can find the example usage for org.jsoup.nodes Element attr.

Prototype

public String attr(String attributeKey) 

Source Link

Document

Get an attribute's value by its key.

Usage

From source file:de.geeksfactory.opacclient.apis.TouchPoint.java

public List<SearchField> getSearchFields() throws IOException, JSONException {
    if (!initialised) {
        start();//from   w  ww.  ja v  a  2  s. co m
    }

    String html = httpGet(opac_url + "/search.do?methodToCall=switchSearchPage&SearchType=2", ENCODING);
    Document doc = Jsoup.parse(html);
    List<SearchField> fields = new ArrayList<>();

    Elements options = doc.select("select[name=searchCategories[0]] option");
    for (Element option : options) {
        TextSearchField field = new TextSearchField();
        field.setDisplayName(option.text());
        field.setId(option.attr("value"));
        field.setHint("");
        fields.add(field);
    }

    for (Element dropdown : doc.select(".accordion-body select")) {
        parseDropdown(dropdown, fields);
    }

    return fields;
}

From source file:de.geeksfactory.opacclient.apis.TouchPoint.java

@Override
public ReservationResult reservation(DetailledItem item, Account acc, int useraction, String selection)
        throws IOException {
    if (System.currentTimeMillis() - logged_in > SESSION_LIFETIME || logged_in_as == null) {
        try {/*w ww.  j a  v  a 2  s  .  c  o  m*/
            login(acc);
        } catch (OpacErrorException e) {
            return new ReservationResult(MultiStepResult.Status.ERROR, e.getMessage());
        }
    } else if (logged_in_as.getId() != acc.getId()) {
        try {
            login(acc);
        } catch (OpacErrorException e) {
            return new ReservationResult(MultiStepResult.Status.ERROR, e.getMessage());
        }
    }
    String html;
    if (reusehtml_reservation != null) {
        html = reusehtml_reservation;
    } else {
        html = httpGet(item.getReservation_info(), ENCODING);
    }
    Document doc = Jsoup.parse(html);
    if (doc.select(".message-error").size() > 0) {
        return new ReservationResult(MultiStepResult.Status.ERROR, doc.select(".message-error").first().text());
    }
    List<NameValuePair> nameValuePairs = new ArrayList<>();
    nameValuePairs.add(new BasicNameValuePair("methodToCall", "requestItem"));
    if (doc.select("#newNeedBeforeDate").size() > 0) {
        nameValuePairs.add(new BasicNameValuePair("newNeedBeforeDate", doc.select("#newNeedBeforeDate").val()));
    }
    if (doc.select("select[name=location] option").size() > 0 && selection == null) {
        Elements options = doc.select("select[name=location] option");
        ReservationResult res = new ReservationResult(MultiStepResult.Status.SELECTION_NEEDED);
        List<Map<String, String>> optionsMap = new ArrayList<>();
        for (Element option : options) {
            Map<String, String> selopt = new HashMap<>();
            selopt.put("key", option.attr("value"));
            selopt.put("value", option.text());
            optionsMap.add(selopt);
        }
        res.setSelection(optionsMap);
        res.setMessage(doc.select("label[for=location]").text());
        reusehtml_reservation = html;
        return res;
    } else if (selection != null) {
        nameValuePairs.add(new BasicNameValuePair("location", selection));
        reusehtml_reservation = null;
    }

    html = httpPost(opac_url + "/requestItem.do", new UrlEncodedFormEntity(nameValuePairs), ENCODING);
    doc = Jsoup.parse(html);
    if (doc.select(".message-confirm").size() > 0) {
        return new ReservationResult(MultiStepResult.Status.OK);
    } else if (doc.select(".alert").size() > 0) {
        return new ReservationResult(MultiStepResult.Status.ERROR, doc.select(".alert").text());
    } else {
        return new ReservationResult(MultiStepResult.Status.ERROR);
    }
}

From source file:de.geeksfactory.opacclient.apis.Zones.java

private DetailledItem parse_result(String id, String html) {
    Document doc = Jsoup.parse(html);

    DetailledItem result = new DetailledItem();
    result.setTitle("");
    boolean title_is_set = false;

    result.setId(id);/*w w w .j  a  va  2s  . c  o m*/

    String detailTrsQuery = version18 ? ".inRoundBox1 table table tr"
            : ".DetailDataCell table table:not(.inRecordHeader) tr";
    Elements detailtrs1 = doc.select(detailTrsQuery);
    for (int i = 0; i < detailtrs1.size(); i++) {
        Element tr = detailtrs1.get(i);
        int s = tr.children().size();
        if (tr.child(0).text().trim().equals("Titel") && !title_is_set) {
            result.setTitle(tr.child(s - 1).text().trim());
            title_is_set = true;
        } else if (s > 1) {
            Element valchild = tr.child(s - 1);
            if (valchild.select("table").isEmpty()) {
                String val = valchild.text().trim();
                if (val.length() > 0) {
                    result.addDetail(new Detail(tr.child(0).text().trim(), val));
                }
            }
        }
    }

    for (Element a : doc.select("a.SummaryActionLink")) {
        if (a.text().contains("Vormerken")) {
            result.setReservable(true);
            result.setReservation_info(a.attr("href"));
        }
    }

    Elements detaildiv = doc.select("div.record-item-new");
    if (!detaildiv.isEmpty()) {
        for (int i = 0; i < detaildiv.size(); i++) {
            Element dd = detaildiv.get(i);
            String text = "";
            for (Node node : dd.childNodes()) {
                if (node instanceof TextNode) {
                    String snip = ((TextNode) node).text();
                    if (snip.length() > 0) {
                        text += snip;
                    }
                } else if (node instanceof Element) {
                    if (((Element) node).tagName().equals("br")) {
                        text += "\n";
                    } else {
                        String snip = ((Element) node).text().trim();
                        if (snip.length() > 0) {
                            text += snip;
                        }
                    }
                }
            }
            result.addDetail(new Detail("", text));
        }
    }

    if (doc.select("span.z3988").size() > 0) {
        // Sometimes there is a <span class="Z3988"> item which provides
        // data in a standardized format.
        String z3988data = doc.select("span.z3988").first().attr("title").trim();
        for (String pair : z3988data.split("&")) {
            String[] nv = pair.split("=", 2);
            if (nv.length == 2) {
                if (!nv[1].trim().equals("")) {
                    if (nv[0].equals("rft.btitle") && result.getTitle().length() == 0) {
                        result.setTitle(nv[1]);
                    } else if (nv[0].equals("rft.atitle") && result.getTitle().length() == 0) {
                        result.setTitle(nv[1]);
                    } else if (nv[0].equals("rft.au")) {
                        result.addDetail(new Detail("Author", nv[1]));
                    }
                }
            }
        }
    }

    // Cover
    if (doc.select(".BookCover, .LargeBookCover").size() > 0) {
        result.setCover(doc.select(".BookCover, .LargeBookCover").first().attr("src"));
    }

    Elements copydivs = doc.select("div[id^=stock_]");
    String pop = "";
    for (int i = 0; i < copydivs.size(); i++) {
        Element div = copydivs.get(i);

        if (div.attr("id").startsWith("stock_head")) {
            pop = div.text().trim();
            continue;
        }

        Copy copy = new Copy();
        DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);

        // This is getting very ugly - check if it is valid for libraries which are not Hamburg.
        // Seems to also work in Kiel (Zones 1.8, checked 10.10.2015)
        int j = 0;
        for (Node node : div.childNodes()) {
            try {
                if (node instanceof Element) {
                    if (((Element) node).tag().getName().equals("br")) {
                        copy.setBranch(pop);
                        result.addCopy(copy);
                        j = -1;
                    } else if (((Element) node).tag().getName().equals("b") && j == 1) {
                        copy.setLocation(((Element) node).text());
                    } else if (((Element) node).tag().getName().equals("b") && j > 1) {
                        copy.setStatus(((Element) node).text());
                    }
                    j++;
                } else if (node instanceof TextNode) {
                    if (j == 0) {
                        copy.setDepartment(((TextNode) node).text());
                    }
                    if (j == 2) {
                        copy.setBarcode(((TextNode) node).getWholeText().trim().split("\n")[0].trim());
                    }
                    if (j == 6) {
                        String text = ((TextNode) node).text().trim();
                        String date = text.substring(text.length() - 10);
                        try {
                            copy.setReturnDate(fmt.parseLocalDate(date));
                        } catch (IllegalArgumentException e) {
                            e.printStackTrace();
                        }
                    }
                    j++;
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    return result;
}

From source file:com.jimplush.goose.ContentExtractor.java

/**
 * we're going to start looking for where the clusters of paragraphs are. We'll score a cluster based on the number of stopwords
 * and the number of consecutive paragraphs together, which should form the cluster of text that this node is around
 * also store on how high up the paragraphs are, comments are usually at the bottom and should get a lower score
 *
 * @return//from  w w w .j  a v  a2  s . c om
 */
private Element calculateBestNodeBasedOnClustering(Document doc) {
    Element topNode = null;

    // grab all the paragraph elements on the page to start to inspect the likely hood of them being good peeps
    ArrayList<Element> nodesToCheck = getNodesToCheck(doc);

    double startingBoost = 1.0;
    int cnt = 0;
    int i = 0;

    // holds all the parents of the nodes we're checking
    Set<Element> parentNodes = new HashSet<Element>();

    ArrayList<Element> nodesWithText = new ArrayList<Element>();

    for (Element node : nodesToCheck) {

        String nodeText = node.text();
        WordStats wordStats = StopWords.getStopWordCount(nodeText);
        boolean highLinkDensity = isHighLinkDensity(node);

        if (wordStats.getStopWordCount() > 2 && !highLinkDensity) {

            nodesWithText.add(node);
        }

    }

    int numberOfNodes = nodesWithText.size();
    int negativeScoring = 0; // we shouldn't give more negatives than positives
    // we want to give the last 20% of nodes negative scores in case they're comments
    double bottomNodesForNegativeScore = (float) numberOfNodes * 0.25;

    if (logger.isDebugEnabled()) {
        logger.debug("About to inspect num of nodes with text: " + numberOfNodes);
    }

    for (Element node : nodesWithText) {

        // add parents and grandparents to scoring
        // only add boost to the middle paragraphs, top and bottom is usually jankz city
        // so basically what we're doing is giving boost scores to paragraphs that appear higher up in the dom
        // and giving lower, even negative scores to those who appear lower which could be commenty stuff

        float boostScore = 0;

        if (isOkToBoost(node)) {
            if (cnt >= 0) {
                boostScore = (float) ((1.0 / startingBoost) * 50);
                startingBoost++;
            }
        }

        // check for negative node values
        if (numberOfNodes > 15) {
            if ((numberOfNodes - i) <= bottomNodesForNegativeScore) {
                float booster = (float) bottomNodesForNegativeScore - (float) (numberOfNodes - i);
                boostScore = -(float) Math.pow(booster, (float) 2);

                // we don't want to score too highly on the negative side.
                float negscore = Math.abs(boostScore) + negativeScoring;
                if (negscore > 40) {
                    boostScore = 5;
                }
            }
        }

        if (logger.isDebugEnabled()) {
            logger.debug("Location Boost Score: " + boostScore + " on interation: " + i + "' id='"
                    + node.parent().id() + "' class='" + node.parent().attr("class"));
        }
        String nodeText = node.text();
        WordStats wordStats = StopWords.getStopWordCount(nodeText);
        int upscore = (int) (wordStats.getStopWordCount() + boostScore);
        updateScore(node.parent(), upscore);
        updateScore(node.parent().parent(), upscore / 2);
        updateNodeCount(node.parent(), 1);
        updateNodeCount(node.parent().parent(), 1);

        if (!parentNodes.contains(node.parent())) {
            parentNodes.add(node.parent());
        }

        if (!parentNodes.contains(node.parent().parent())) {
            parentNodes.add(node.parent().parent());
        }

        cnt++;
        i++;
    }

    // now let's find the parent node who scored the highest

    int topNodeScore = 0;
    for (Element e : parentNodes) {

        if (logger.isDebugEnabled()) {
            logger.debug("ParentNode: score='" + e.attr("gravityScore") + "' nodeCount='"
                    + e.attr("gravityNodes") + "' id='" + e.id() + "' class='" + e.attr("class") + "' ");
        }
        //int score = Integer.parseInt(e.attr("gravityScore")) * Integer.parseInt(e.attr("gravityNodes"));
        int score = getScore(e);
        if (score > topNodeScore) {
            topNode = e;
            topNodeScore = score;
        }

        if (topNode == null) {
            topNode = e;
        }
    }

    if (logger.isDebugEnabled()) {
        if (topNode == null) {
            logger.debug("ARTICLE NOT ABLE TO BE EXTRACTED!, WE HAZ FAILED YOU LORD VADAR");
        } else {
            String logText;
            String targetText = "";
            Element topPara = topNode.getElementsByTag("p").first();
            if (topPara == null) {
                topNode.text();
            } else {
                topPara.text();
            }

            if (targetText.length() >= 51) {
                logText = targetText.substring(0, 50);
            } else {
                logText = targetText;
            }
            logger.debug("TOPNODE TEXT: " + logText.trim());
            logger.debug("Our TOPNODE: score='" + topNode.attr("gravityScore") + "' nodeCount='"
                    + topNode.attr("gravityNodes") + "' id='" + topNode.id() + "' class='"
                    + topNode.attr("class") + "' ");
        }
    }

    return topNode;

}

From source file:com.esprit.lyricsplus.DAO.SongDAO.java

private String getDataFromGoogle(String query) {

    String result = "";
    String request = "https://www.google.com/search?q=" + query + "&num=20";
    System.out.println("Sending request..." + request);

    try {/* w w w  .  ja va  2  s  .c o  m*/

        // need http protocol, set this as a Google bot agent :)
        Document doc = Jsoup.connect(request)
                .userAgent("Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)")
                .timeout(5000).get();

        Element link = doc.select("h3.r > a").first();

        String linke = link.attr("href");
        if (linke.length() > 0 && linke.contains("http") && linke.contains("html")
                && linke.contains("azlyrics.com")) {
            linke = linke.substring(linke.indexOf("http"), linke.indexOf("html")) + "html";
            System.out.println("linkv2: " + linke);
            result = linke;
        } else {
            result = "NOT FOUND";
        }

    } catch (IOException e) {
        e.printStackTrace();
    } catch (NegativeArraySizeException e) {
        e.printStackTrace();
    }

    return result;
}

From source file:com.jimplush.goose.ContentExtractor.java

/**
 * remove any divs that looks like non-content, clusters of links, or paras with no gusto
 *
 * @param node/*w  w w  . j a  va  2  s .  co  m*/
 * @return
 */
private Element cleanupNode(Element node) {
    if (logger.isDebugEnabled()) {
        logger.debug("Starting cleanup Node");
    }

    node = addSiblings(node);

    Elements nodes = node.children();
    for (Element e : nodes) {
        if (e.tagName().equals("p")) {
            continue;
        }
        if (logger.isDebugEnabled()) {
            logger.debug("CLEANUP  NODE: " + e.id() + " class: " + e.attr("class"));
        }
        boolean highLinkDensity = isHighLinkDensity(e);
        if (highLinkDensity) {
            if (logger.isDebugEnabled()) {
                logger.debug("REMOVING  NODE FOR LINK DENSITY: " + e.id() + " class: " + e.attr("class"));
            }
            e.remove();
            continue;
        }
        // now check for word density
        // grab all the paragraphs in the children and remove ones that are too small to matter
        Elements subParagraphs = e.getElementsByTag("p");

        for (Element p : subParagraphs) {
            if (p.text().length() < 25) {
                p.remove();
            }
        }

        // now that we've removed shorty paragraphs let's make sure to exclude any first paragraphs that don't have paras as
        // their next siblings to avoid getting img bylines
        // first let's remove any element that now doesn't have any p tags at all
        Elements subParagraphs2 = e.getElementsByTag("p");
        if (subParagraphs2.size() == 0 && !e.tagName().equals("td")) {
            if (logger.isDebugEnabled()) {
                logger.debug("Removing node because it doesn't have any paragraphs");
            }
            e.remove();
            continue;
        }

        //if this node has a decent enough gravityScore we should keep it as well, might be content
        int topNodeScore = getScore(node);
        int currentNodeScore = getScore(e);
        float thresholdScore = (float) (topNodeScore * .08);
        if (logger.isDebugEnabled()) {
            logger.debug("topNodeScore: " + topNodeScore + " currentNodeScore: " + currentNodeScore
                    + " threshold: " + thresholdScore);
        }
        if (currentNodeScore < thresholdScore) {
            if (!e.tagName().equals("td")) {
                if (logger.isDebugEnabled()) {
                    logger.debug("Removing node due to low threshold score");
                }
                e.remove();
            } else {
                if (logger.isDebugEnabled()) {
                    logger.debug("Not removing TD node");
                }
            }

            continue;
        }

    }

    return node;

}

From source file:de.geeksfactory.opacclient.apis.TouchPoint.java

private void parseDropdown(Element dropdownElement, List<SearchField> fields) {
    Elements options = dropdownElement.select("option");
    DropdownSearchField dropdown = new DropdownSearchField();
    dropdown.setId(dropdownElement.attr("name"));
    // Some fields make no sense or are not supported in the app
    if (dropdown.getId().equals("numberOfHits") || dropdown.getId().equals("timeOut")
            || dropdown.getId().equals("rememberList")) {
        return;//from   w  ww  . j  a va2  s  . com
    }
    for (Element option : options) {
        dropdown.addDropdownValue(option.attr("value"), option.text());
    }
    dropdown.setDisplayName(dropdownElement.parent().select("label").text());
    fields.add(dropdown);
}

From source file:de.geeksfactory.opacclient.apis.BiBer1992.java

@Override
public ReservationResult reservation(DetailledItem item, Account account, int useraction, String selection)
        throws IOException {
    String resinfo = item.getReservation_info();
    if (selection == null || selection.equals("confirmed")) {
        // STEP 1: Check if reservable and select branch ("ID1")

        // Differences between opax and opac
        String func = opacDir.contains("opax") ? "sigl" : "resF";
        String id = opacDir.contains("opax") ? (resinfo.contains("resF") ? resinfo.substring(5) + "=" + resinfo
                : resinfo + "=resF_" + resinfo) : "ID=" + resinfo;

        String html = httpGet(//ww  w  .  j  av  a 2  s .  co m
                opacUrl + "/" + opacDir + "/reserv" + opacSuffix + "?LANG=de&FUNC=" + func + "&" + id,
                getDefaultEncoding());
        Document doc = Jsoup.parse(html);
        newStyleReservations = doc.select("input[name=" + resinfo.replace("resF_", "") + "]").val()
                .length() > 4;
        Elements optionsElements = doc.select("select[name=ID1] option");
        if (optionsElements.size() > 0) {
            List<Map<String, String>> options = new ArrayList<>();
            for (Element option : optionsElements) {
                if ("0".equals(option.attr("value"))) {
                    continue;
                }
                Map<String, String> selopt = new HashMap<>();
                selopt.put("key", option.attr("value") + ":" + option.text());
                selopt.put("value", option.text());
                options.add(selopt);
            }
            if (options.size() > 1) {
                ReservationResult res = new ReservationResult(MultiStepResult.Status.SELECTION_NEEDED);
                res.setActionIdentifier(ReservationResult.ACTION_BRANCH);
                res.setSelection(options);
                return res;
            } else {
                return reservation(item, account, useraction, options.get(0).get("key"));
            }
        } else {
            ReservationResult res = new ReservationResult(MultiStepResult.Status.ERROR);
            res.setMessage("Dieses Medium ist nicht reservierbar.");
            return res;
        }
    } else {
        // STEP 2: Reserve
        List<NameValuePair> nameValuePairs = new ArrayList<>();
        nameValuePairs.add(new BasicNameValuePair("LANG", "de"));
        nameValuePairs.add(new BasicNameValuePair("BENUTZER", account.getName()));
        nameValuePairs.add(new BasicNameValuePair("PASSWORD", account.getPassword()));
        nameValuePairs.add(new BasicNameValuePair("FUNC", "vors"));
        if (opacDir.contains("opax")) {
            nameValuePairs.add(new BasicNameValuePair(resinfo.replace("resF_", ""),
                    "vors" + (newStyleReservations ? resinfo.replace("resF_", "") : "")));
        }
        if (newStyleReservations) {
            nameValuePairs.add(new BasicNameValuePair("ID11", selection.split(":")[1]));
        }
        nameValuePairs.add(new BasicNameValuePair("ID1", selection.split(":")[0]));

        String html = httpPost(opacUrl + "/" + opacDir + "/setreserv" + opacSuffix,
                new UrlEncodedFormEntity(nameValuePairs), getDefaultEncoding());

        Document doc = Jsoup.parse(html);
        if (doc.select(".tab21 .p44b, .p2").text().contains("eingetragen")) {
            return new ReservationResult(MultiStepResult.Status.OK);
        } else {
            ReservationResult res = new ReservationResult(MultiStepResult.Status.ERROR);
            if (doc.select(".p1, .p22b").size() > 0) {
                res.setMessage(doc.select(".p1, .p22b").text());
            }
            return res;
        }
    }
}

From source file:de.geeksfactory.opacclient.apis.TouchPoint.java

protected SearchRequestResult parse_search(String html, int page) throws OpacErrorException, IOException {
    Document doc = Jsoup.parse(html);

    if (doc.select("#RefineHitListForm").size() > 0) {
        // the results are located on a different page loaded via AJAX
        html = httpGet(opac_url + "/speedHitList.do?_=" + String.valueOf(System.currentTimeMillis() / 1000)
                + "&hitlistindex=0&exclusionList=", ENCODING);
        doc = Jsoup.parse(html);//w ww .  j  ava 2 s.com
    }

    if (doc.select(".nodata").size() > 0) {
        return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1);
    }

    doc.setBaseUri(opac_url + "/searchfoo");

    int results_total = -1;

    String resultnumstr = doc.select(".box-header h2").first().text();
    if (resultnumstr.contains("(1/1)") || resultnumstr.contains(" 1/1")) {
        reusehtml = html;
        throw new OpacErrorException("is_a_redirect");
    } else if (resultnumstr.contains("(")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*\\(([0-9]+)\\).*", "$1"));
    } else if (resultnumstr.contains(": ")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*: ([0-9]+)$", "$1"));
    }

    Elements table = doc.select("table.data > tbody > tr");
    identifier = null;

    Elements links = doc.select("table.data a");
    boolean haslink = false;
    for (Element node : links) {
        if (node.hasAttr("href") & node.attr("href").contains("singleHit.do") && !haslink) {
            haslink = true;
            try {
                List<NameValuePair> anyurl = URLEncodedUtils
                        .parse(new URI(node.attr("href").replace(" ", "%20").replace("&amp;", "&")), ENCODING);
                for (NameValuePair nv : anyurl) {
                    if (nv.getName().equals("identifier")) {
                        identifier = nv.getValue();
                        break;
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
            }

        }
    }

    List<SearchResult> results = new ArrayList<>();
    for (int i = 0; i < table.size(); i++) {
        Element tr = table.get(i);
        SearchResult sr = new SearchResult();
        if (tr.select(".icn, img[width=32]").size() > 0) {
            String[] fparts = tr.select(".icn, img[width=32]").first().attr("src").split("/");
            String fname = fparts[fparts.length - 1];
            String changedFname = fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "")
                    .replace(".png", "");

            // File names can look like this: "20_DVD_Video.gif"
            Pattern pattern = Pattern.compile("(\\d+)_.*");
            Matcher matcher = pattern.matcher(changedFname);
            if (matcher.find()) {
                changedFname = matcher.group(1);
            }

            MediaType defaulttype = defaulttypes.get(changedFname);
            if (data.has("mediatypes")) {
                try {
                    sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname)));
                } catch (JSONException | IllegalArgumentException e) {
                    sr.setType(defaulttype);
                }
            } else {
                sr.setType(defaulttype);
            }
        }
        String title;
        String text;
        if (tr.select(".results table").size() > 0) { // e.g. RWTH Aachen
            title = tr.select(".title a").text();
            text = tr.select(".title div").text();
        } else { // e.g. Schaffhausen, BSB Mnchen
            title = tr.select(".title, .hitlistTitle").text();
            text = tr.select(".results, .hitlistMetadata").first().ownText();
        }

        // we need to do some evil javascript parsing here to get the cover
        // and loan status of the item

        // get cover
        if (tr.select(".cover script").size() > 0) {
            String js = tr.select(".cover script").first().html();
            String isbn = matchJSVariable(js, "isbn");
            String ajaxUrl = matchJSVariable(js, "ajaxUrl");
            if (!"".equals(isbn) && !"".equals(ajaxUrl)) {
                String url = new URL(new URL(opac_url + "/"), ajaxUrl).toString();
                String coverUrl = httpGet(url + "?isbn=" + isbn + "&size=small", ENCODING);
                if (!"".equals(coverUrl)) {
                    sr.setCover(coverUrl.replace("\r\n", "").trim());
                }
            }
        }
        // get loan status and media ID
        if (tr.select("div[id^=loanstatus] + script").size() > 0) {
            String js = tr.select("div[id^=loanstatus] + script").first().html();
            String[] variables = new String[] { "loanstateDBId", "itemIdentifier", "hitlistIdentifier",
                    "hitlistPosition", "duplicateHitlistIdentifier", "itemType", "titleStatus", "typeofHit",
                    "context" };
            String ajaxUrl = matchJSVariable(js, "ajaxUrl");
            if (!"".equals(ajaxUrl)) {
                JSONObject id = new JSONObject();
                List<NameValuePair> map = new ArrayList<>();
                for (String variable : variables) {
                    String value = matchJSVariable(js, variable);
                    if (!"".equals(value)) {
                        map.add(new BasicNameValuePair(variable, value));
                    }
                    try {
                        if (variable.equals("itemIdentifier")) {
                            id.put("id", value);
                        } else if (variable.equals("loanstateDBId")) {
                            id.put("db", value);
                        }
                    } catch (JSONException e) {
                        e.printStackTrace();
                    }
                }
                sr.setId(id.toString());
                String url = new URL(new URL(opac_url + "/"), ajaxUrl).toString();
                String loanStatusHtml = httpGet(url + "?" + URLEncodedUtils.format(map, "UTF-8"), ENCODING)
                        .replace("\r\n", "").trim();
                Document loanStatusDoc = Jsoup.parse(loanStatusHtml);
                String loanstatus = loanStatusDoc.text().replace("\u00bb", "").trim();

                if ((loanstatus.startsWith("entliehen") && loanstatus.contains("keine Vormerkung mglich")
                        || loanstatus.contains("Keine Exemplare verfgbar"))) {
                    sr.setStatus(SearchResult.Status.RED);
                } else if (loanstatus.startsWith("entliehen") || loanstatus.contains("andere Zweigstelle")) {
                    sr.setStatus(SearchResult.Status.YELLOW);
                } else if ((loanstatus.startsWith("bestellbar") && !loanstatus.contains("nicht bestellbar"))
                        || (loanstatus.startsWith("vorbestellbar")
                                && !loanstatus.contains("nicht vorbestellbar"))
                        || (loanstatus.startsWith("vorbestellbar")
                                && !loanstatus.contains("nicht vorbestellbar"))
                        || (loanstatus.startsWith("vormerkbar") && !loanstatus.contains("nicht vormerkbar"))
                        || (loanstatus.contains("heute zurckgebucht"))
                        || (loanstatus.contains("ausleihbar") && !loanstatus.contains("nicht ausleihbar"))) {
                    sr.setStatus(SearchResult.Status.GREEN);
                }
                if (sr.getType() != null) {
                    if (sr.getType().equals(MediaType.EBOOK) || sr.getType().equals(MediaType.EVIDEO)
                            || sr.getType().equals(MediaType.MP3))
                    // Especially Onleihe.de ebooks are often marked
                    // green though they are not available.
                    {
                        sr.setStatus(SearchResult.Status.UNKNOWN);
                    }
                }
            }
        }

        sr.setInnerhtml(("<b>" + title + "</b><br/>") + text);

        sr.setNr(10 * (page - 1) + i + 1);
        results.add(sr);
    }
    resultcount = results.size();
    return new SearchRequestResult(results, results_total, page);
}

From source file:de.geeksfactory.opacclient.apis.TouchPoint.java

@Override
public AccountData account(Account acc) throws IOException, JSONException, OpacErrorException {
    start();/*  ww w .  j  a  v a  2  s.  co  m*/
    LoginResponse login = login(acc);
    if (!login.success) {
        return null;
    }
    AccountData adata = new AccountData(acc.getId());
    if (login.warning != null) {
        adata.setWarning(login.warning);
    }

    // Lent media
    httpGet(opac_url + "/userAccount.do?methodToCall=start", ENCODING);
    String html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&accountTyp=loaned", ENCODING);
    List<LentItem> lent = new ArrayList<>();
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);
    List<LentItem> nextpageLent = parse_medialist(doc);
    if (nextpageLent != null) {
        lent.addAll(nextpageLent);
    }
    if (doc.select(".pagination").size() > 0 && lent != null) {
        Element pagination = doc.select(".pagination").first();
        Elements pages = pagination.select("a");
        for (Element page : pages) {
            if (!page.hasAttr("href")) {
                continue;
            }
            html = httpGet(page.attr("abs:href"), ENCODING);
            doc = Jsoup.parse(html);
            doc.setBaseUri(opac_url);
            nextpageLent = parse_medialist(doc);
            if (nextpageLent != null) {
                lent.addAll(nextpageLent);
            }
        }
    }
    adata.setLent(lent);

    // Requested media ("Vormerkungen")
    html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&accountTyp=requested", ENCODING);
    doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);

    List<ReservedItem> requested = new ArrayList<>();
    List<ReservedItem> nextpageRes = parse_reslist(doc);
    if (nextpageRes != null) {
        requested.addAll(nextpageRes);
    }
    if (doc.select(".pagination").size() > 0 && requested != null) {
        Element pagination = doc.select(".pagination").first();
        Elements pages = pagination.select("a");
        for (Element page : pages) {
            if (!page.hasAttr("href")) {
                continue;
            }
            html = httpGet(page.attr("abs:href"), ENCODING);
            doc = Jsoup.parse(html);
            doc.setBaseUri(opac_url);
            nextpageRes = parse_reslist(doc);
            if (nextpageRes != null) {
                requested.addAll(nextpageRes);
            }
        }
    }

    // Ordered media ("Bestellungen")
    html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&accountTyp=ordered", ENCODING);
    doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);
    List<ReservedItem> nextpageOrd = parse_reslist(doc);
    if (nextpageOrd != null) {
        requested.addAll(nextpageOrd);
    }
    if (doc.select(".pagination").size() > 0 && requested != null) {
        Element pagination = doc.select(".pagination").first();
        Elements pages = pagination.select("a");
        for (Element page : pages) {
            if (!page.hasAttr("href")) {
                continue;
            }
            html = httpGet(page.attr("abs:href"), ENCODING);
            doc = Jsoup.parse(html);
            doc.setBaseUri(opac_url);
            nextpageOrd = parse_reslist(doc);
            if (nextpageOrd != null) {
                requested.addAll(nextpageOrd);
            }
        }
    }
    adata.setReservations(requested);

    // Fees
    if (doc.select("#fees").size() > 0) {
        String text = doc.select("#fees").first().text().trim();
        if (text.matches("Geb.+hren[^\\(]+\\(([0-9.,]+)[^0-9A-Z]*(|EUR|CHF|Fr)\\)")) {
            text = text.replaceAll("Geb.+hren[^\\(]+\\(([0-9.,]+)[^0-9A-Z]*(|EUR|CHF|Fr)\\)", "$1 $2");
            adata.setPendingFees(text);
        }
    }

    return adata;
}