Example usage for org.jsoup.nodes Element hasAttr

List of usage examples for org.jsoup.nodes Element hasAttr

Introduction

In this page you can find the example usage for org.jsoup.nodes Element hasAttr.

Prototype

public boolean hasAttr(String attributeKey) 

Source Link

Document

Test if this element has an attribute.

Usage

From source file:gov.medicaid.screening.dao.impl.NursingLicenseDAOBean.java

/**
 * Performs a search for all possible results.
 *
 * @param criteria The search criteria.//from   w w  w .  ja  v  a  2 s .  c o  m
 * @param byName flag indicating it is a name search
 * @return the search result for licenses
 *
 * @throws URISyntaxException if an error occurs while building the URL.
 * @throws ClientProtocolException if client does not support protocol used.
 * @throws IOException if an error occurs while parsing response.
 * @throws ParseException if an error occurs while parsing response.
 * @throws ServiceException for any other problems encountered
 */
private SearchResult<License> getAllResults(NursingLicenseSearchCriteria criteria, boolean byName)
        throws URISyntaxException, ClientProtocolException, IOException, ParseException, ServiceException {
    DefaultHttpClient client = new DefaultHttpClient(getLaxSSLConnectionManager());
    client.setRedirectStrategy(new LaxRedirectStrategy());
    client.setCookieStore(loginAsPublicUser());

    HttpGet getSearch = new HttpGet(new URIBuilder(getSearchURL()).build());
    HttpResponse response = client.execute(getSearch);
    verifyAndAuditCall(getSearchURL(), response);

    Document page = Jsoup.parse(EntityUtils.toString(response.getEntity()));

    HttpPost search = new HttpPost(new URIBuilder(getSearchURL()).build());

    List<License> allLicenses = new ArrayList<License>();

    // switch to search by name screen
    if (byName) {
        HttpEntity entity = postForm(getSearchURL(), client, search,
                new String[][] { { "__EVENTTARGET", "_ctl7_rbtnSearch_1" }, { "__EVENTARGUMENT", "" },
                        { "_ctl7:ddlbLicenseType", "R" }, { "_ctl7:rbtnSearch", "2" },
                        { "_ctl7:txtCheckDigit", "" }, { "_ctl7:txtLicenseNumber", "" },
                        { "__VIEWSTATE", page.select("input[name=__VIEWSTATE]").first().val() } },
                true);

        page = Jsoup.parse(EntityUtils.toString(entity));
        entity = getResultPage(criteria, client, page, search, "_ctl7:cmdSearch", getSearchURL());
        page = Jsoup.parse(EntityUtils.toString(entity));

        // get the data grid entries
        if (page.select("table#_ctl7_grdSearchResults").size() < 1) {
            throw new ParsingException(ErrorCode.MITA50002.getDesc());
        }

        Elements rows = page.select(GRID_ROW_SELECTOR);
        while (rows.size() > 0) {
            for (Element row : rows) {
                String url = row.select("a").first().attr("href");
                String licenseNo = row.select("td:eq(4)").text();
                HttpGet getDetail = new HttpGet(Util.replaceLastURLPart(getSearchURL(), url));
                response = client.execute(getDetail);
                verifyAndAuditCall(getSearchURL(), response);
                Document licenseDetails = Jsoup.parse(EntityUtils.toString(response.getEntity()));
                allLicenses.add(parseLicense(licenseDetails, licenseNo.substring(0, 1)));
            }
            rows.clear();

            // check for next page
            Element currentPage = page.select("#_ctl7_grdSearchResults tr.TablePager span").first();
            if (getLog() != null) {
                getLog().log(Level.DEBUG, "Current page is: " + currentPage.text());
            }
            Element pageLink = currentPage.nextElementSibling();
            if (pageLink != null && pageLink.hasAttr("href")) {
                if (getLog() != null) {
                    getLog().log(Level.DEBUG, "There are more results, getting the next page.");
                }

                String target = parseEventTarget(pageLink.attr("href"));
                entity = getResultPage(criteria, client, page, search, target, getSearchURL());
                page = Jsoup.parse(EntityUtils.toString(entity));
                rows = page.select(GRID_ROW_SELECTOR);
            }
        }

    } else { // search by license number (site supports only exact match)

        HttpEntity entity = postForm(getSearchURL(), client, search,
                new String[][] { { "__EVENTTARGET", "_ctl7:cmdSearch" }, { "__EVENTARGUMENT", "" },
                        { "_ctl7:ddlbLicenseType", Util.defaultString(criteria.getLicenseType().getName()) },
                        { "_ctl7:rbtnSearch", "1" },
                        { "_ctl7:txtCheckDigit", Util.defaultString(criteria.getCheckDigit()) },
                        { "_ctl7:txtLicenseNumber", Util.defaultString(criteria.getIdentifier()) },
                        { "__VIEWSTATE", page.select("input[name=__VIEWSTATE]").first().val() } },
                true);

        page = Jsoup.parse(EntityUtils.toString(entity));
        if (page.select("span#lblFormTitle").text().equals("License Details")) {
            String prefLicenseType = criteria.getLicenseType().getName();
            allLicenses.add(parseLicense(page, prefLicenseType));
        }
    }

    SearchResult<License> searchResult = new SearchResult<License>();
    searchResult.setItems(allLicenses);
    return searchResult;
}

From source file:io.apiman.tools.i18n.TemplateScanner.java

/**
 * Scan the given html template using jsoup and find all strings that require translation.  This is
 * done by finding all elements with a "apiman-i18n-key" attribute.
 * @param file/*  w w w. j a v a 2  s  .c o  m*/
 * @param strings
 * @throws IOException
 */
private static void scanFile(File file, TreeMap<String, String> strings) throws IOException {
    Document doc = Jsoup.parse(file, "UTF-8");

    // First, scan for elements with the 'apiman-i18n-key' attribute.  These require translating.
    Elements elements = doc.select("*[apiman-i18n-key]");
    for (Element element : elements) {
        String i18nKey = element.attr("apiman-i18n-key");
        boolean isNecessary = false;

        // Process the element text (if the element has no children)
        if (strings.containsKey(i18nKey)) {
            if (hasNoChildren(element)) {
                isNecessary = true;
                String elementVal = element.text();
                if (elementVal.trim().length() > 0 && !elementVal.contains("{{")) {
                    String currentValue = strings.get(i18nKey);
                    if (!currentValue.equals(elementVal)) {
                        throw new IOException("Duplicate i18n key found with different default values.  Key="
                                + i18nKey + "  Value1=" + elementVal + "  Value2=" + currentValue);
                    }
                }
            }
        } else {
            if (hasNoChildren(element)) {
                String elementVal = element.text();
                if (elementVal.trim().length() > 0 && !elementVal.contains("{{")) {
                    isNecessary = true;
                    strings.put(i18nKey, elementVal);
                }
            }
        }

        // Process the translatable attributes
        for (String tattr : TRANSLATABLE_ATTRIBUTES) {
            if (element.hasAttr(tattr)) {
                String attrValue = element.attr(tattr);
                if (attrValue.contains("{{")) {
                    continue;
                }
                String attrI18nKey = i18nKey + '.' + tattr;
                String currentAttrValue = strings.get(attrI18nKey);
                if (currentAttrValue == null) {
                    isNecessary = true;
                    strings.put(attrI18nKey, attrValue);
                } else if (!currentAttrValue.equals(attrValue)) {
                    throw new IOException(
                            "Duplicate i18n key found with different default values (for attribute '" + tattr
                                    + "').  Key=" + attrI18nKey + "  Value1=" + attrValue + "  Value2="
                                    + currentAttrValue);
                } else {
                    isNecessary = true;
                }
            }
        }

        if (!isNecessary) {
            throw new IOException("Detected an unnecessary apiman-i18n-key attribute in file '" + file.getName()
                    + "' on element: " + element);
        }
    }

    // Next, scan all elements to see if the element *should* be marked for translation
    elements = doc.select("*");
    for (Element element : elements) {
        if (element.hasAttr("apiman-i18n-key") || element.hasAttr("apiman-i18n-skip")) {
            continue;
        }
        if (hasNoChildren(element)) {
            String value = element.text();
            if (value != null && value.trim().length() > 0) {
                if (!value.contains("{{")) {
                    throw new IOException("Found an element in '" + file.getName()
                            + "' that should be translated:  " + element);
                }
            }
        }
    }

    // Next scan elements with a translatable attribute and fail if any of those elements
    // are missing the apiman-i18n-key attribute.
    for (String tattr : TRANSLATABLE_ATTRIBUTES) {
        elements = doc.select("*[" + tattr + "]");
        for (Element element : elements) {
            if (element.hasAttr("apiman-i18n-key") || element.hasAttr("apiman-i18n-skip")
                    || element.attr(tattr).contains("{{")) {
                continue;
            } else {
                throw new IOException("In template '" + file.getName() + "', found an element with a '" + tattr
                        + "' attribute but missing 'apiman-i18n-key': " + element);
            }
        }
    }

}

From source file:com.lumata.lib.lupa.extractor.internal.HtmlBiggestImageExtractor.java

@Override
public Image extractBestImage(URL sourceUrl, Elements htmlSection, ImageExtractionRequirements requirements) {
    Map<String, Image> imagesToExplore = new HashMap<String, Image>();
    Set<ImageDownloadTask> imagesToDownload = new HashSet<ImageDownloadTask>();
    Iterator<org.jsoup.nodes.Element> it = htmlSection.iterator();

    // collect valid images
    while (it.hasNext() && imagesToExplore.size() < requirements.getMaxImagesToExplore()) {
        Element imageElement = it.next();
        String imageUrl = imageElement.absUrl("src");

        // Do not process empty img tags, duplicated images or tracking
        // pixels and other assorted ads
        if (imageUrl == null || imagesToExplore.containsKey(imageUrl) || isTrackingPixelOrAd(imageUrl)) {
            continue;
        }// ww  w.  j av a 2 s.  c  om

        // remember this image
        Image imageContent = new Image(imageUrl);
        if (imageElement.hasAttr(WIDTH_ATTRIBUTE)) {
            // TODO: We need to convert other picture size units supported by html (there must be a lib for this)
            imageContent.setWidth(Integer.parseInt(imageElement.attr(WIDTH_ATTRIBUTE).replace("px", "")));
        }
        if (imageElement.hasAttr(HEIGHT_ATTRIBUTE)) {
            imageContent.setHeight(Integer.parseInt(imageElement.attr(HEIGHT_ATTRIBUTE).replace("px", "")));
        }
        if (imageContent.getWidth() == null || imageContent.getHeight() == null) {// mark image to download
            imagesToDownload.add(new ImageDownloadTask(imageContent));
        }
        imagesToExplore.put(imageUrl, imageContent);
    }

    // if dimensions are empty -> download image
    if (CollectionUtils.isNotEmpty(imagesToDownload)) {
        try {
            ExecutorService pool = Executors.newFixedThreadPool(imagesToDownload.size(),
                    getThreadFactory(sourceUrl));
            pool.invokeAll(imagesToDownload);
            pool.shutdown();
        } catch (InterruptedException e) {
            LOG.error("InterruptedException while downloading images", e);
        }
    }

    // select biggest image
    Image biggestImage = null;
    try {
        biggestImage = Collections.max(imagesToExplore.values(), new Comparator<Image>() {
            @Override
            public int compare(Image o1, Image o2) {
                return getSquarePixels(o1) - getSquarePixels(o2);
            }
        });
    } catch (NoSuchElementException e) {
        return null;
    }

    // if image is too small, discard
    return (biggestImage.getWidth() < requirements.getMinImageSize()
            || biggestImage.getHeight() < requirements.getMinImageSize()) ? null : biggestImage;
}

From source file:com.zacwolf.commons.email.Email.java

private void prepare(final org.jsoup.nodes.Document doc) {
    removeComments(doc);//Remove any comments from the html of the message to reduce the size
    //Change the title to match the subject of the email
    if (doc.getElementsByTag("title").size() > 0)
        doc.getElementsByTag("title").first().html(getSubject());
    //Replace the contents of any tags with class="date" with the current date
    if (doc.getElementsByClass("date").size() > 0) {
        for (org.jsoup.nodes.Element datelem : doc.getElementsByClass("date")) {
            SimpleDateFormat df = new SimpleDateFormat("MMMMMMMMMM d, yyyy");
            if (datelem.hasAttr("format")) {
                try {
                    df = new SimpleDateFormat(datelem.attr("format"));
                } catch (Exception ee) {
                } //throw it away and just go back to the default format;
                datelem.html(df.format(TimeUtils.getGMTtime()));
            }/*www  .  j  a  va 2  s . c  o m*/
        }
    }
    //tables need the border-spacing: style attribute; added for GMail compatiblity
    for (org.jsoup.nodes.Element tbl : doc.getElementsByTag("table"))
        if (!tbl.attr("style").contains("border-spacing:"))
            tbl.attr("style",
                    tbl.attr("style") + (!tbl.attr("style").endsWith(";") ? ";" : "") + "border-spacing:0;");
}

From source file:mml.handler.post.MMLPostHTMLHandler.java

/**
 * Parse a codeblock//from w ww  .j ava 2  s.  c  om
 * @param elem the element to parse
 * @throws a JSON exception
 */
private void parsePre(Element elem) throws JSONException {
    if (elem.hasText()) {
        int offset = sb.length();
        String name = elem.attr("class");
        if (name == null || name.length() == 0)
            name = "pre";
        Range r = new Range(name, offset, 0);
        stil.add(r);
        if (elem.hasAttr("class")) {
            List<Node> children = elem.childNodes();
            for (Node child : children) {
                if (child instanceof Element) {
                    if (child.nodeName().equals("span"))
                        parseSpan((Element) child);
                    else
                        parseOtherElement((Element) child);
                } else if (child instanceof TextNode)
                    sb.append(((TextNode) child).getWholeText());
            }
        } else
            sb.append(elem.text());
        this.stil.updateLen(r, sb.length() - offset);
    }
    prevWasMilestone = false;
    ensure(1, false);
}

From source file:de.geeksfactory.opacclient.apis.Pica.java

protected SearchRequestResult parse_search(String html, int page) throws OpacErrorException {
    Document doc = Jsoup.parse(html);

    updateSearchSetValue(doc);/*from ww  w . j  a v  a2  s. co  m*/

    if (doc.select(".error").size() > 0) {
        String error = doc.select(".error").first().text().trim();
        if (error.equals("Es wurde nichts gefunden.") || error.equals("Nothing has been found")
                || error.equals("Er is niets gevonden.") || error.equals("Rien n'a t trouv.")) {
            // nothing found
            return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1);
        } else {
            // error
            throw new OpacErrorException(error);
        }
    }

    reusehtml = html;

    int results_total;

    String resultnumstr = doc.select(".pages").first().text();
    Pattern p = Pattern.compile("[0-9]+$");
    Matcher m = p.matcher(resultnumstr);
    if (m.find()) {
        resultnumstr = m.group();
    }
    if (resultnumstr.contains("(")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*\\(([0-9]+)\\).*", "$1"));
    } else if (resultnumstr.contains(": ")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*: ([0-9]+)$", "$1"));
    } else {
        results_total = Integer.parseInt(resultnumstr);
    }

    List<SearchResult> results = new ArrayList<>();

    if (results_total == 1) {
        // Only one result
        DetailledItem singleResult = parse_result(html);
        SearchResult sr = new SearchResult();
        sr.setType(getMediaTypeInSingleResult(html));
        sr.setInnerhtml(
                "<b>" + singleResult.getTitle() + "</b><br>" + singleResult.getDetails().get(0).getContent());
        results.add(sr);
    }

    Elements table = doc.select("table[summary=hitlist] tbody tr[valign=top]");
    // identifier = null;

    Elements links = doc.select("table[summary=hitlist] a");
    boolean haslink = false;
    for (int i = 0; i < links.size(); i++) {
        Element node = links.get(i);
        if (node.hasAttr("href") & node.attr("href").contains("SHW?") && !haslink) {
            haslink = true;
            try {
                List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(node.attr("href")),
                        getDefaultEncoding());
                for (NameValuePair nv : anyurl) {
                    if (nv.getName().equals("identifier")) {
                        // identifier = nv.getValue();
                        break;
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
            }

        }
    }

    for (int i = 0; i < table.size(); i++) {
        Element tr = table.get(i);
        SearchResult sr = new SearchResult();
        if (tr.select("td.hit img").size() > 0) {
            String[] fparts = tr.select("td img").get(0).attr("src").split("/");
            String fname = fparts[fparts.length - 1];
            if (data.has("mediatypes")) {
                try {
                    sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname)));
                } catch (JSONException | IllegalArgumentException e) {
                    sr.setType(defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "")
                            .replace(".gif", "").replace(".png", "")));
                }
            } else {
                sr.setType(defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "")
                        .replace(".gif", "").replace(".png", "")));
            }
        }
        Element middlething = tr.child(2);

        List<Node> children = middlething.childNodes();
        int childrennum = children.size();

        List<String[]> strings = new ArrayList<>();
        for (int ch = 0; ch < childrennum; ch++) {
            Node node = children.get(ch);
            if (node instanceof TextNode) {
                String text = ((TextNode) node).text().trim();
                if (text.length() > 3) {
                    strings.add(new String[] { "text", "", text });
                }
            } else if (node instanceof Element) {

                List<Node> subchildren = node.childNodes();
                for (int j = 0; j < subchildren.size(); j++) {
                    Node subnode = subchildren.get(j);
                    if (subnode instanceof TextNode) {
                        String text = ((TextNode) subnode).text().trim();
                        if (text.length() > 3) {
                            strings.add(new String[] { ((Element) node).tag().getName(), "text", text,
                                    ((Element) node).className(), node.attr("style") });
                        }
                    } else if (subnode instanceof Element) {
                        String text = ((Element) subnode).text().trim();
                        if (text.length() > 3) {
                            strings.add(new String[] { ((Element) node).tag().getName(),
                                    ((Element) subnode).tag().getName(), text, ((Element) node).className(),
                                    node.attr("style") });
                        }
                    }
                }
            }
        }

        StringBuilder description = new StringBuilder();

        int k = 0;
        for (String[] part : strings) {
            if (part[0].equals("a") && k == 0) {
                description.append("<b>").append(part[2]).append("</b>");
            } else if (k < 3) {
                description.append("<br />").append(part[2]);
            }
            k++;
        }
        sr.setInnerhtml(description.toString());

        sr.setNr(10 * (page - 1) + i);
        sr.setId(null);
        results.add(sr);
    }
    resultcount = results.size();
    return new SearchRequestResult(results, results_total, page);
}

From source file:cn.wanghaomiao.xpath.core.XpathEvaluator.java

/**
 * //from   ww w  . j  a va  2s .c  o m
 *
 * @param e
 * @param node
 * @return
 */
public Element filter(Element e, Node node) throws NoSuchFunctionException, NoSuchAxisException {
    if (node.getTagName().equals("*") || node.getTagName().equals(e.nodeName())) {
        if (node.getPredicate() != null && StringUtils.isNotBlank(node.getPredicate().getValue())) {
            Predicate p = node.getPredicate();
            if (p.getOpEm() == null) {
                if (p.getValue().matches("\\d+") && getElIndex(e) == Integer.parseInt(p.getValue())) {
                    return e;
                } else if (p.getValue().endsWith("()")
                        && (Boolean) callFilterFunc(p.getValue().substring(0, p.getValue().length() - 2), e)) {
                    return e;
                } else if (p.getValue().startsWith("@")
                        && e.hasAttr(StringUtils.substringAfter(p.getValue(), "@"))) {
                    return e;
                }
                //todo p.value ~= contains(./@href,'renren.com')
            } else {
                if (p.getLeft().matches("[^/]+\\(\\)")) {
                    Object filterRes = p.getOpEm().excute(
                            callFilterFunc(p.getLeft().substring(0, p.getLeft().length() - 2), e).toString(),
                            p.getRight());
                    if (filterRes instanceof Boolean && (Boolean) filterRes) {
                        return e;
                    } else if (filterRes instanceof Integer
                            && e.siblingIndex() == Integer.parseInt(filterRes.toString())) {
                        return e;
                    }
                } else if (p.getLeft().startsWith("@")) {
                    String lValue = e.attr(p.getLeft().substring(1));
                    Object filterRes = p.getOpEm().excute(lValue, p.getRight());
                    if ((Boolean) filterRes) {
                        return e;
                    }
                } else {
                    // ???xpath?
                    List<Element> eltmp = new LinkedList<Element>();
                    eltmp.add(e);
                    List<JXNode> rstmp = evaluate(p.getLeft(), new Elements(eltmp));
                    if ((Boolean) p.getOpEm().excute(StringUtils.join(rstmp, ""), p.getRight())) {
                        return e;
                    }
                }
            }
        } else {
            return e;
        }
    }
    return null;
}

From source file:de.geeksfactory.opacclient.apis.TouchPoint.java

@Override
public AccountData account(Account acc) throws IOException, JSONException, OpacErrorException {
    start();//from w w  w  .j  av a2 s.  c om
    LoginResponse login = login(acc);
    if (!login.success) {
        return null;
    }
    AccountData adata = new AccountData(acc.getId());
    if (login.warning != null) {
        adata.setWarning(login.warning);
    }

    // Lent media
    httpGet(opac_url + "/userAccount.do?methodToCall=start", ENCODING);
    String html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&accountTyp=loaned", ENCODING);
    List<LentItem> lent = new ArrayList<>();
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);
    List<LentItem> nextpageLent = parse_medialist(doc);
    if (nextpageLent != null) {
        lent.addAll(nextpageLent);
    }
    if (doc.select(".pagination").size() > 0 && lent != null) {
        Element pagination = doc.select(".pagination").first();
        Elements pages = pagination.select("a");
        for (Element page : pages) {
            if (!page.hasAttr("href")) {
                continue;
            }
            html = httpGet(page.attr("abs:href"), ENCODING);
            doc = Jsoup.parse(html);
            doc.setBaseUri(opac_url);
            nextpageLent = parse_medialist(doc);
            if (nextpageLent != null) {
                lent.addAll(nextpageLent);
            }
        }
    }
    adata.setLent(lent);

    // Requested media ("Vormerkungen")
    html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&accountTyp=requested", ENCODING);
    doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);

    List<ReservedItem> requested = new ArrayList<>();
    List<ReservedItem> nextpageRes = parse_reslist(doc);
    if (nextpageRes != null) {
        requested.addAll(nextpageRes);
    }
    if (doc.select(".pagination").size() > 0 && requested != null) {
        Element pagination = doc.select(".pagination").first();
        Elements pages = pagination.select("a");
        for (Element page : pages) {
            if (!page.hasAttr("href")) {
                continue;
            }
            html = httpGet(page.attr("abs:href"), ENCODING);
            doc = Jsoup.parse(html);
            doc.setBaseUri(opac_url);
            nextpageRes = parse_reslist(doc);
            if (nextpageRes != null) {
                requested.addAll(nextpageRes);
            }
        }
    }

    // Ordered media ("Bestellungen")
    html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&accountTyp=ordered", ENCODING);
    doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);
    List<ReservedItem> nextpageOrd = parse_reslist(doc);
    if (nextpageOrd != null) {
        requested.addAll(nextpageOrd);
    }
    if (doc.select(".pagination").size() > 0 && requested != null) {
        Element pagination = doc.select(".pagination").first();
        Elements pages = pagination.select("a");
        for (Element page : pages) {
            if (!page.hasAttr("href")) {
                continue;
            }
            html = httpGet(page.attr("abs:href"), ENCODING);
            doc = Jsoup.parse(html);
            doc.setBaseUri(opac_url);
            nextpageOrd = parse_reslist(doc);
            if (nextpageOrd != null) {
                requested.addAll(nextpageOrd);
            }
        }
    }
    adata.setReservations(requested);

    // Fees
    if (doc.select("#fees").size() > 0) {
        String text = doc.select("#fees").first().text().trim();
        if (text.matches("Geb.+hren[^\\(]+\\(([0-9.,]+)[^0-9A-Z]*(|EUR|CHF|Fr)\\)")) {
            text = text.replaceAll("Geb.+hren[^\\(]+\\(([0-9.,]+)[^0-9A-Z]*(|EUR|CHF|Fr)\\)", "$1 $2");
            adata.setPendingFees(text);
        }
    }

    return adata;
}

From source file:de.geeksfactory.opacclient.apis.TouchPoint.java

protected SearchRequestResult parse_search(String html, int page) throws OpacErrorException, IOException {
    Document doc = Jsoup.parse(html);

    if (doc.select("#RefineHitListForm").size() > 0) {
        // the results are located on a different page loaded via AJAX
        html = httpGet(opac_url + "/speedHitList.do?_=" + String.valueOf(System.currentTimeMillis() / 1000)
                + "&hitlistindex=0&exclusionList=", ENCODING);
        doc = Jsoup.parse(html);//w ww  . j  a  v a 2 s. c  o  m
    }

    if (doc.select(".nodata").size() > 0) {
        return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1);
    }

    doc.setBaseUri(opac_url + "/searchfoo");

    int results_total = -1;

    String resultnumstr = doc.select(".box-header h2").first().text();
    if (resultnumstr.contains("(1/1)") || resultnumstr.contains(" 1/1")) {
        reusehtml = html;
        throw new OpacErrorException("is_a_redirect");
    } else if (resultnumstr.contains("(")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*\\(([0-9]+)\\).*", "$1"));
    } else if (resultnumstr.contains(": ")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*: ([0-9]+)$", "$1"));
    }

    Elements table = doc.select("table.data > tbody > tr");
    identifier = null;

    Elements links = doc.select("table.data a");
    boolean haslink = false;
    for (Element node : links) {
        if (node.hasAttr("href") & node.attr("href").contains("singleHit.do") && !haslink) {
            haslink = true;
            try {
                List<NameValuePair> anyurl = URLEncodedUtils
                        .parse(new URI(node.attr("href").replace(" ", "%20").replace("&amp;", "&")), ENCODING);
                for (NameValuePair nv : anyurl) {
                    if (nv.getName().equals("identifier")) {
                        identifier = nv.getValue();
                        break;
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
            }

        }
    }

    List<SearchResult> results = new ArrayList<>();
    for (int i = 0; i < table.size(); i++) {
        Element tr = table.get(i);
        SearchResult sr = new SearchResult();
        if (tr.select(".icn, img[width=32]").size() > 0) {
            String[] fparts = tr.select(".icn, img[width=32]").first().attr("src").split("/");
            String fname = fparts[fparts.length - 1];
            String changedFname = fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "")
                    .replace(".png", "");

            // File names can look like this: "20_DVD_Video.gif"
            Pattern pattern = Pattern.compile("(\\d+)_.*");
            Matcher matcher = pattern.matcher(changedFname);
            if (matcher.find()) {
                changedFname = matcher.group(1);
            }

            MediaType defaulttype = defaulttypes.get(changedFname);
            if (data.has("mediatypes")) {
                try {
                    sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname)));
                } catch (JSONException | IllegalArgumentException e) {
                    sr.setType(defaulttype);
                }
            } else {
                sr.setType(defaulttype);
            }
        }
        String title;
        String text;
        if (tr.select(".results table").size() > 0) { // e.g. RWTH Aachen
            title = tr.select(".title a").text();
            text = tr.select(".title div").text();
        } else { // e.g. Schaffhausen, BSB Mnchen
            title = tr.select(".title, .hitlistTitle").text();
            text = tr.select(".results, .hitlistMetadata").first().ownText();
        }

        // we need to do some evil javascript parsing here to get the cover
        // and loan status of the item

        // get cover
        if (tr.select(".cover script").size() > 0) {
            String js = tr.select(".cover script").first().html();
            String isbn = matchJSVariable(js, "isbn");
            String ajaxUrl = matchJSVariable(js, "ajaxUrl");
            if (!"".equals(isbn) && !"".equals(ajaxUrl)) {
                String url = new URL(new URL(opac_url + "/"), ajaxUrl).toString();
                String coverUrl = httpGet(url + "?isbn=" + isbn + "&size=small", ENCODING);
                if (!"".equals(coverUrl)) {
                    sr.setCover(coverUrl.replace("\r\n", "").trim());
                }
            }
        }
        // get loan status and media ID
        if (tr.select("div[id^=loanstatus] + script").size() > 0) {
            String js = tr.select("div[id^=loanstatus] + script").first().html();
            String[] variables = new String[] { "loanstateDBId", "itemIdentifier", "hitlistIdentifier",
                    "hitlistPosition", "duplicateHitlistIdentifier", "itemType", "titleStatus", "typeofHit",
                    "context" };
            String ajaxUrl = matchJSVariable(js, "ajaxUrl");
            if (!"".equals(ajaxUrl)) {
                JSONObject id = new JSONObject();
                List<NameValuePair> map = new ArrayList<>();
                for (String variable : variables) {
                    String value = matchJSVariable(js, variable);
                    if (!"".equals(value)) {
                        map.add(new BasicNameValuePair(variable, value));
                    }
                    try {
                        if (variable.equals("itemIdentifier")) {
                            id.put("id", value);
                        } else if (variable.equals("loanstateDBId")) {
                            id.put("db", value);
                        }
                    } catch (JSONException e) {
                        e.printStackTrace();
                    }
                }
                sr.setId(id.toString());
                String url = new URL(new URL(opac_url + "/"), ajaxUrl).toString();
                String loanStatusHtml = httpGet(url + "?" + URLEncodedUtils.format(map, "UTF-8"), ENCODING)
                        .replace("\r\n", "").trim();
                Document loanStatusDoc = Jsoup.parse(loanStatusHtml);
                String loanstatus = loanStatusDoc.text().replace("\u00bb", "").trim();

                if ((loanstatus.startsWith("entliehen") && loanstatus.contains("keine Vormerkung mglich")
                        || loanstatus.contains("Keine Exemplare verfgbar"))) {
                    sr.setStatus(SearchResult.Status.RED);
                } else if (loanstatus.startsWith("entliehen") || loanstatus.contains("andere Zweigstelle")) {
                    sr.setStatus(SearchResult.Status.YELLOW);
                } else if ((loanstatus.startsWith("bestellbar") && !loanstatus.contains("nicht bestellbar"))
                        || (loanstatus.startsWith("vorbestellbar")
                                && !loanstatus.contains("nicht vorbestellbar"))
                        || (loanstatus.startsWith("vorbestellbar")
                                && !loanstatus.contains("nicht vorbestellbar"))
                        || (loanstatus.startsWith("vormerkbar") && !loanstatus.contains("nicht vormerkbar"))
                        || (loanstatus.contains("heute zurckgebucht"))
                        || (loanstatus.contains("ausleihbar") && !loanstatus.contains("nicht ausleihbar"))) {
                    sr.setStatus(SearchResult.Status.GREEN);
                }
                if (sr.getType() != null) {
                    if (sr.getType().equals(MediaType.EBOOK) || sr.getType().equals(MediaType.EVIDEO)
                            || sr.getType().equals(MediaType.MP3))
                    // Especially Onleihe.de ebooks are often marked
                    // green though they are not available.
                    {
                        sr.setStatus(SearchResult.Status.UNKNOWN);
                    }
                }
            }
        }

        sr.setInnerhtml(("<b>" + title + "</b><br/>") + text);

        sr.setNr(10 * (page - 1) + i + 1);
        results.add(sr);
    }
    resultcount = results.size();
    return new SearchRequestResult(results, results_total, page);
}

From source file:de.geeksfactory.opacclient.apis.Bibliotheca.java

@Override
public ReservationResult reservation(DetailledItem item, Account acc, int useraction, String selection)
        throws IOException {
    String reservation_info = item.getReservation_info();

    Document doc = null;/*from ww w  .  j  av a  2s .c  om*/

    if (useraction == MultiStepResult.ACTION_CONFIRMATION) {
        List<NameValuePair> nameValuePairs = new ArrayList<>(2);
        nameValuePairs.add(new BasicNameValuePair("make_allvl", "Bestaetigung"));
        nameValuePairs.add(new BasicNameValuePair("target", "makevorbest"));
        httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs), getDefaultEncoding());
        return new ReservationResult(MultiStepResult.Status.OK);
    } else if (selection == null || useraction == 0) {
        String html = httpGet(opac_url + "/" + reservation_info, getDefaultEncoding());
        doc = Jsoup.parse(html);

        if (doc.select("input[name=AUSWEIS]").size() > 0) {
            // Needs login
            List<NameValuePair> nameValuePairs = new ArrayList<>(2);
            nameValuePairs.add(new BasicNameValuePair("AUSWEIS", acc.getName()));
            nameValuePairs.add(new BasicNameValuePair("PWD", acc.getPassword()));
            if (data.has("db")) {
                try {
                    nameValuePairs.add(new BasicNameValuePair("vkontodb", data.getString("db")));
                } catch (JSONException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
            nameValuePairs.add(new BasicNameValuePair("B1", "weiter"));
            nameValuePairs.add(new BasicNameValuePair("target", doc.select("input[name=target]").val()));
            nameValuePairs.add(new BasicNameValuePair("type", "VT2"));
            html = httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs),
                    getDefaultEncoding());
            doc = Jsoup.parse(html);
        }
        if (doc.select("select[name=" + branch_inputfield + "]").size() == 0) {
            if (doc.select("select[name=VZST]").size() > 0) {
                branch_inputfield = "VZST";
            }
        }
        if (doc.select("select[name=" + branch_inputfield + "]").size() > 0) {
            List<Map<String, String>> branches = new ArrayList<>();
            for (Element option : doc.select("select[name=" + branch_inputfield + "]").first().children()) {
                String value = option.text().trim();
                String key;
                if (option.hasAttr("value")) {
                    key = option.attr("value");
                } else {
                    key = value;
                }
                Map<String, String> selopt = new HashMap<>();
                selopt.put("key", key);
                selopt.put("value", value);
                branches.add(selopt);
            }
            _res_target = doc.select("input[name=target]").attr("value");
            ReservationResult result = new ReservationResult(MultiStepResult.Status.SELECTION_NEEDED);
            result.setActionIdentifier(ReservationResult.ACTION_BRANCH);
            result.setSelection(branches);
            return result;
        }
    } else if (useraction == ReservationResult.ACTION_BRANCH) {
        List<NameValuePair> nameValuePairs = new ArrayList<>(2);
        nameValuePairs.add(new BasicNameValuePair(branch_inputfield, selection));
        nameValuePairs.add(new BasicNameValuePair("button2", "weiter"));
        nameValuePairs.add(new BasicNameValuePair("target", _res_target));
        String html = httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs),
                getDefaultEncoding());
        doc = Jsoup.parse(html);
    }

    if (doc == null) {
        return new ReservationResult(MultiStepResult.Status.ERROR);
    }

    if (doc.select("input[name=target]").size() > 0) {
        if (doc.select("input[name=target]").attr("value").equals("makevorbest")) {
            List<String[]> details = new ArrayList<>();

            if (doc.getElementsByClass("kontomeldung").size() == 1) {
                details.add(new String[] { doc.getElementsByClass("kontomeldung").get(0).text().trim() });
            }
            Pattern p = Pattern.compile("geb.hr", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE);
            for (Element div : doc.select(".kontozeile_center")) {
                for (String text : Jsoup.parse(div.html().replaceAll("(?i)<br[^>]*>", "br2n")).text()
                        .split("br2n")) {
                    if (p.matcher(text).find() && !text.contains("usstehend")
                            && text.contains("orbestellung")) {
                        details.add(new String[] { text.trim() });
                    }
                }
            }

            if (doc.select("#vorbest").size() > 0 && doc.select("#vorbest").val().contains("(")) {
                // Erlangen uses "Kostenpflichtige Vorbestellung (1 Euro)"
                // as the label of its reservation button
                details.add(new String[] { doc.select("#vorbest").val().trim() });
            }

            for (Element row : doc.select(".kontozeile_center table tr")) {
                if (row.select(".konto_feld").size() == 1 && row.select(".konto_feldinhalt").size() == 1) {
                    details.add(new String[] { row.select(".konto_feld").text().trim(),
                            row.select(".konto_feldinhalt").text().trim() });
                }
            }
            ReservationResult result = new ReservationResult(MultiStepResult.Status.CONFIRMATION_NEEDED);
            result.setDetails(details);
            return result;
        }
    }

    if (doc.getElementsByClass("kontomeldung").size() == 1) {
        return new ReservationResult(MultiStepResult.Status.ERROR,
                doc.getElementsByClass("kontomeldung").get(0).text());
    }

    return new ReservationResult(MultiStepResult.Status.ERROR,
            stringProvider.getString(StringProvider.UNKNOWN_ERROR));
}