Example usage for org.jsoup.nodes Element select

List of usage examples for org.jsoup.nodes Element select

Introduction

In this page you can find the example usage for org.jsoup.nodes Element select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:me.vertretungsplan.parser.UntisCommonParser.java

/**
 * Parses an Untis substitution table ({@link UntisSubstitutionParser}).
 *
 * @param v/*from w  w w . j  av  a2 s . c  o  m*/
 * @param lastChange
 * @param doc
 * @throws JSONException
 * @throws CredentialInvalidException
 */
protected void parseSubstitutionTable(SubstitutionSchedule v, String lastChange, Document doc)
        throws JSONException, CredentialInvalidException {
    JSONObject data = scheduleData.getData();

    LocalDateTime lastChangeDate = ParserUtils.parseDateTime(lastChange);
    Pattern dayPattern = Pattern.compile("\\d\\d?.\\d\\d?. / \\w+");

    int dateColumn = -1;
    JSONArray columns = data.getJSONArray("columns");
    for (int i = 0; i < columns.length(); i++) {
        if (columns.getString(i).equals("date")) {
            dateColumn = i;
            break;
        }
    }

    Element table = doc.select("table[rules=all], table:has(tr:has(td[align=center]))").first();
    if (table.text().replace("\u00a0", "").trim().equals("Keine Vertretungen"))
        return;

    if (dateColumn == -1) {
        SubstitutionScheduleDay day = new SubstitutionScheduleDay();
        day.setLastChangeString(lastChange);
        day.setLastChange(lastChangeDate);
        String title = doc.select("font[size=5], font[size=4], font[size=3] b").text();
        Matcher matcher = dayPattern.matcher(title);
        if (matcher.find()) {
            String date = matcher.group();
            day.setDateString(date);
            day.setDate(ParserUtils.parseDate(date));
        }
        parseSubstitutionScheduleTable(table, data, day);
        v.addDay(day);
    } else {
        for (Element line : table.select("tr.list.odd:not(:has(td.inline_header)), "
                + "tr.list.even:not(:has(td.inline_header)), " + "tr:has(td[align=center]):gt(0)")) {
            SubstitutionScheduleDay day = null;
            String date = line.select("td").get(dateColumn).text().trim();

            if (date.indexOf("-") > 0) {
                date = date.substring(0, date.indexOf("-") - 1).trim();
            }

            LocalDate parsedDate = ParserUtils.parseDate(date);
            for (SubstitutionScheduleDay search : v.getDays()) {
                if (Objects.equals(search.getDate(), parsedDate)
                        || Objects.equals(search.getDateString(), date)) {
                    day = search;
                    break;
                }
            }
            if (day == null) {
                day = new SubstitutionScheduleDay();
                day.setDateString(date);
                day.setDate(parsedDate);
                day.setLastChangeString(lastChange);
                day.setLastChange(lastChangeDate);
                v.addDay(day);
            }
            parseSubstitutionScheduleTable(line, data, day);
        }
    }
}

From source file:com.vaushell.shaarlijavaapi.ShaarliClient.java

private String extract(final Element source, final String templateName) {
    if (source == null) {
        throw new IllegalArgumentException();
    }/*  ww w .  j a  v  a  2s  .  co  m*/

    final ShaarliTemplates.Template template = templates.get(templateName);
    if (template == null) {
        throw new IllegalArgumentException("template '" + templateName + "' not found");
    }

    final Element elt;
    if (template.cssPath.isEmpty()) {
        elt = source;
    } else {
        final Elements elts = source.select(template.cssPath);
        if (elts.isEmpty()) {
            return null;
        }

        elt = elts.first();
    }

    String content;
    if (template.attribut.isEmpty()) {
        content = elt.text();
    } else {
        content = elt.attr(template.attribut);
    }
    if (content == null) {
        return null;
    }
    content = content.trim();

    if (!template.regex.isEmpty()) {
        final Pattern p = Pattern.compile(template.regex);
        final Matcher m = p.matcher(content);
        if (m.find()) {
            content = m.group().trim();
        }
    }

    if (content.isEmpty()) {
        return null;
    }

    return content;
}

From source file:info.dolezel.fatrat.plugins.UloztoDownload.java

@Override
public void processLink(String link) {

    //if (link.contains("/live/"))
    //    link = link.replace("/live/", "/");
    if (link.startsWith("http://uloz.to") || link.startsWith("https://uloz.to"))
        link = link.replace("https?://uloz.to", "https://www.uloz.to");
    if (link.startsWith("http://m.uloz.to") || link.startsWith("https://m.uloz.to"))
        link = link.replace("https?://m.uloz.to", "https://www.uloz.to");

    if (!logIn(link))
        return;/*  w  ww.  ja  va2  s.com*/

    final String downloadLink = link; // I can't make 'link' final

    fetchPage(link, new PageFetchListener() {

        @Override
        public void onCompleted(ByteBuffer buf, Map<String, String> headers) {
            try {
                if (headers.containsKey("location")) {
                    String location = headers.get("location");
                    if (location.contains("smazano") || location.contains("nenalezeno"))
                        setFailed("The file has been removed");
                    else
                        processLink(location);
                    return;
                }

                CharBuffer cb = charsetUtf8.decode(buf);

                if (cb.toString().contains("?disclaimer=1")) {
                    processLink(downloadLink + "?disclaimer=1");
                    return;
                }

                final Document doc = Jsoup.parse(cb.toString());
                final Element freeForm = doc.getElementById("frm-download-freeDownloadTab-freeDownloadForm");
                final Element premiumLink = doc.getElementById("#quickDownloadButton");

                boolean usePremium = usePremium(downloadLink);

                if (cb.toString().contains("Nem dostatek kreditu"))
                    setMessage("Credit depleted, using FREE download");
                else if (usePremium && premiumLink != null) {
                    String msg = "Using premium download";

                    Elements aCredits = doc.getElementsByAttributeValue("href", "/kredit");

                    if (!aCredits.isEmpty())
                        msg += " (" + aCredits.get(0).ownText() + " left)";

                    setMessage(msg);

                    startDownload("http://www.uloz.to" + premiumLink.attr("href"));
                    return;

                } else if (loggedIn)
                    setMessage("Login failed, using FREE download");

                Elements aNames = doc.getElementsByClass("jsShowDownload");
                if (!aNames.isEmpty())
                    reportFileName(aNames.get(0).ownText());

                final PostQuery pq = new PostQuery();
                final Map<String, String> hdr = new HashMap<String, String>();
                Elements eHiddens = freeForm.select("input[type=hidden]");

                hdr.put("X-Requested-With", "XMLHttpRequest");
                hdr.put("Referer", downloadLink);
                hdr.put("Accept", "application/json, text/javascript, */*; q=0.01");

                for (Element e : eHiddens)
                    pq.add(e.attr("name"), e.attr("value"));

                fetchPage("https://uloz.to/reloadXapca.php?rnd=" + Math.abs(new Random().nextInt()),
                        new PageFetchListener() {

                            @Override
                            public void onCompleted(ByteBuffer buf, Map<String, String> headers) {
                                CharBuffer cb = charsetUtf8.decode(buf);
                                String captchaUrl;

                                try {
                                    JSONObject json = new JSONObject(cb.toString());
                                    captchaUrl = "https:" + json.getString("image");
                                    pq.add("hash", json.getString("hash"));
                                    pq.add("timestamp", "" + json.getInt("timestamp"));
                                    pq.add("salt", "" + json.getInt("salt"));
                                } catch (JSONException e) {
                                    setFailed("Error parsing captcha JSON");
                                    return;
                                }

                                solveCaptcha(captchaUrl, new CaptchaListener() {

                                    @Override
                                    public void onFailed() {
                                        setFailed("Failed to decode the captcha code");
                                    }

                                    @Override
                                    public void onSolved(String text) {

                                        String action = freeForm.attr("action");
                                        pq.add("captcha_value", text);

                                        fetchPage("https://www.uloz.to" + action, new PageFetchListener() {

                                            @Override
                                            public void onCompleted(ByteBuffer buf,
                                                    Map<String, String> headers) {
                                                try {
                                                    CharBuffer cb = charsetUtf8.decode(buf);
                                                    JSONObject obj = new JSONObject(cb.toString());

                                                    startDownload(obj.getString("url"));
                                                } catch (Exception e) {
                                                    setFailed("" + e);
                                                }
                                            }

                                            @Override
                                            public void onFailed(String error) {
                                                setFailed(error);
                                            }

                                        }, pq.toString(), hdr);

                                    }
                                });
                            }

                            @Override
                            public void onFailed(String error) {
                                setFailed("Failed to load captcha AJAX page");
                            }

                        });

            } catch (Exception e) {
                e.printStackTrace();
                setFailed(e.toString());
            }
        }

        @Override
        public void onFailed(String error) {
            setFailed("Failed to load the initial page");
        }
    }, null);
}

From source file:de.geeksfactory.opacclient.apis.SISIS.java

@Override
public ReservationResult reservation(DetailledItem item, Account acc, int useraction, String selection)
        throws IOException {
    String reservation_info = item.getReservation_info();
    final String branch_inputfield = "issuepoint";

    Document doc = null;/*from ww  w  .  j av  a 2 s .c  om*/

    String action = "reservation";
    if (reservation_info.contains("doBestellung")) {
        action = "order";
    }

    if (useraction == MultiStepResult.ACTION_CONFIRMATION) {
        List<NameValuePair> nameValuePairs = new ArrayList<>(2);
        nameValuePairs.add(new BasicNameValuePair("methodToCall", action));
        nameValuePairs.add(new BasicNameValuePair("CSId", CSId));
        String html = httpPost(opac_url + "/" + action + ".do", new UrlEncodedFormEntity(nameValuePairs),
                ENCODING);
        doc = Jsoup.parse(html);
    } else if (selection == null || useraction == 0) {
        String html = httpGet(opac_url + "/availability.do?" + reservation_info, ENCODING);
        doc = Jsoup.parse(html);

        if (doc.select("input[name=username]").size() > 0) {
            // Login vonnten
            List<NameValuePair> nameValuePairs = new ArrayList<>(2);
            nameValuePairs.add(new BasicNameValuePair("username", acc.getName()));
            nameValuePairs.add(new BasicNameValuePair("password", acc.getPassword()));
            nameValuePairs.add(new BasicNameValuePair("methodToCall", "submit"));
            nameValuePairs.add(new BasicNameValuePair("CSId", CSId));
            nameValuePairs.add(new BasicNameValuePair("login_action", "Login"));

            html = handleLoginMessage(
                    httpPost(opac_url + "/login.do", new UrlEncodedFormEntity(nameValuePairs), ENCODING));
            doc = Jsoup.parse(html);

            if (doc.getElementsByClass("error").size() == 0) {
                logged_in = System.currentTimeMillis();
                logged_in_as = acc;
            }
        }
        if (doc.select("input[name=expressorder]").size() > 0) {
            List<NameValuePair> nameValuePairs = new ArrayList<>(2);
            nameValuePairs.add(new BasicNameValuePair(branch_inputfield, selection));
            nameValuePairs.add(new BasicNameValuePair("methodToCall", action));
            nameValuePairs.add(new BasicNameValuePair("CSId", CSId));
            nameValuePairs.add(new BasicNameValuePair("expressorder", " "));
            html = httpPost(opac_url + "/" + action + ".do", new UrlEncodedFormEntity(nameValuePairs),
                    ENCODING);
            doc = Jsoup.parse(html);
        }
        if (doc.select("input[name=" + branch_inputfield + "]").size() > 0) {
            List<Map<String, String>> branches = new ArrayList<>();
            for (Element option : doc.select("input[name=" + branch_inputfield + "]").first().parent().parent()
                    .parent().select("td")) {
                if (option.select("input").size() != 1) {
                    continue;
                }
                String value = option.text().trim();
                String key = option.select("input").val();
                Map<String, String> selopt = new HashMap<>();
                selopt.put("key", key);
                selopt.put("value", value);
                branches.add(selopt);
            }
            ReservationResult result = new ReservationResult(MultiStepResult.Status.SELECTION_NEEDED);
            result.setActionIdentifier(ReservationResult.ACTION_BRANCH);
            result.setSelection(branches);
            return result;
        }
    } else if (useraction == ReservationResult.ACTION_BRANCH) {
        List<NameValuePair> nameValuePairs = new ArrayList<>(2);
        nameValuePairs.add(new BasicNameValuePair(branch_inputfield, selection));
        nameValuePairs.add(new BasicNameValuePair("methodToCall", action));
        nameValuePairs.add(new BasicNameValuePair("CSId", CSId));

        String html = httpPost(opac_url + "/" + action + ".do", new UrlEncodedFormEntity(nameValuePairs),
                ENCODING);
        doc = Jsoup.parse(html);
    }

    if (doc == null) {
        return new ReservationResult(MultiStepResult.Status.ERROR);
    }

    if (doc.getElementsByClass("error").size() >= 1) {
        return new ReservationResult(MultiStepResult.Status.ERROR,
                doc.getElementsByClass("error").get(0).text());
    }

    if (doc.select("#CirculationForm p").size() > 0 && doc.select("input[type=button]").size() >= 2) {
        List<String[]> details = new ArrayList<>();
        for (String row : doc.select("#CirculationForm p").first().html().split("<br>")) {
            Document frag = Jsoup.parseBodyFragment(row);
            if (frag.text().contains(":")) {
                String[] split = frag.text().split(":");
                if (split.length >= 2) {
                    details.add(new String[] { split[0].trim() + ":", split[1].trim() });
                }
            } else {
                details.add(new String[] { "", frag.text().trim() });
            }
        }
        ReservationResult result = new ReservationResult(Status.CONFIRMATION_NEEDED);
        result.setDetails(details);
        return result;
    }

    if (doc.select("#CirculationForm .textrot").size() >= 1) {
        String errmsg = doc.select("#CirculationForm .textrot").get(0).text();
        if (errmsg.contains("Dieses oder andere Exemplare in anderer Zweigstelle ausleihbar")) {
            Copy best = null;
            for (Copy copy : item.getCopies()) {
                if (copy.getResInfo() == null) {
                    continue;
                }
                if (best == null) {
                    best = copy;
                    continue;
                }
                try {
                    if (Integer.parseInt(copy.getReservations()) < Long.parseLong(best.getReservations())) {
                        best = copy;
                    } else if (Integer.parseInt(copy.getReservations()) == Long
                            .parseLong(best.getReservations())) {
                        if (copy.getReturnDate().isBefore(best.getReturnDate())) {
                            best = copy;
                        }
                    }
                } catch (NumberFormatException e) {

                }
            }
            if (best != null) {
                item.setReservation_info(best.getResInfo());
                return reservation(item, acc, 0, null);
            }
        }
        return new ReservationResult(MultiStepResult.Status.ERROR, errmsg);
    }

    if (doc.select("#CirculationForm td[colspan=2] strong").size() >= 1) {
        return new ReservationResult(MultiStepResult.Status.OK,
                doc.select("#CirculationForm td[colspan=2] strong").get(0).text());
    }
    return new ReservationResult(Status.OK);
}

From source file:ExtractorContentTest.java

private void treatSection(Element section, List<Catalog> catalogs) {

    // 1. get section name
    // FIXME what is it does not exist?
    // FIXME can be "h3"
    Elements sect2 = section.getElementsByTag("h2");
    String s2 = null;/*from   ww w. j a v  a2 s  .c  om*/

    if (!sect2.isEmpty())
        s2 = sect2.first().text(); // FIXME what about more than 1 ?

    String s3 = null;
    Elements sect3 = section.getElementsByTag("h3");
    if (!sect3.isEmpty())
        s3 = sect3.first().text();

    String dt = null;
    Elements sectDT = section.getElementsByTag("p");
    if (!sectDT.isEmpty()) {
        String contentDT = sectDT.first().text();
        if (contentDT.startsWith(";"))
            dt = contentDT.replaceAll(";", "");
    }

    // FIXME can be subsection

    // FIXME (1. optional step) some comments

    // 2. retrieve tabular
    Elements tables = section.getElementsByTag("table");
    //if (!tables.isEmpty()) 
    //System.err.println("\n****** " + s2 + " " + s3 + " *******\n");

    for (Element table : tables) {

        // (0. optional step) act as subviewname
        Elements caption = table.select("caption");
        String captionName = null;
        if (!caption.isEmpty())
            captionName = caption.first().text();

        /*** 
         * Headers
         */
        //
        List<Header> rHeaders = collectHeaders(table);

        boolean sortable = !table.select("[class=sortable wikitable]").isEmpty()
                || !table.select("[class=wikitable sortable]").isEmpty();

        // FIXME: other cases
        Elements heads = table.select("thead");
        if (sortable && (!heads.isEmpty())) {
            rHeaders = collectHeaders(heads.first());
        }

        // 2 treat row               
        Catalog product = null;
        Tree<String> structuralInformation = mkStructuralInformation(s2, s3, dt, captionName);
        if (sortable) {
            product = treatRows(table.select("tbody").first(), structuralInformation, rHeaders, sortable);
        } else
            product = treatRows(table, structuralInformation, rHeaders, sortable);
        catalogs.add(product);

        // 

    }

    // set the "ID" / names
    // clean up
    for (Catalog catalog : catalogs) {
        for (Product p : catalog) {
            Header primaryHeader = p.getHeaders().get(0);
            p.setName(p.getValue(primaryHeader.getName()));
        }
    }

}

From source file:de.geeksfactory.opacclient.apis.SISIS.java

protected SearchRequestResult parse_search(String html, int page) throws OpacErrorException {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url + "/searchfoo");

    if (doc.select(".error").size() > 0) {
        throw new OpacErrorException(doc.select(".error").text().trim());
    } else if (doc.select(".nohits").size() > 0) {
        throw new OpacErrorException(doc.select(".nohits").text().trim());
    } else if (doc.select(".box-header h2, #nohits").text().contains("keine Treffer")) {
        return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1);
    }//  w ww  .j  a  va  2  s.c o  m

    int results_total = -1;

    String resultnumstr = doc.select(".box-header h2").first().text();
    if (resultnumstr.contains("(1/1)") || resultnumstr.contains(" 1/1")) {
        reusehtml = html;
        throw new OpacErrorException("is_a_redirect");
    } else if (resultnumstr.contains("(")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*\\(([0-9]+)\\).*", "$1"));
    } else if (resultnumstr.contains(": ")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*: ([0-9]+)$", "$1"));
    }

    Elements table = doc.select("table.data tbody tr");
    identifier = null;

    Elements links = doc.select("table.data a");
    boolean haslink = false;
    for (int i = 0; i < links.size(); i++) {
        Element node = links.get(i);
        if (node.hasAttr("href") & node.attr("href").contains("singleHit.do") && !haslink) {
            haslink = true;
            try {
                List<NameValuePair> anyurl = URLEncodedUtils
                        .parse(new URI(node.attr("href").replace(" ", "%20").replace("&amp;", "&")), ENCODING);
                for (NameValuePair nv : anyurl) {
                    if (nv.getName().equals("identifier")) {
                        identifier = nv.getValue();
                        break;
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
            }

        }
    }

    List<SearchResult> results = new ArrayList<>();
    for (int i = 0; i < table.size(); i++) {
        Element tr = table.get(i);
        SearchResult sr = new SearchResult();
        if (tr.select("td img[title]").size() > 0) {
            String title = tr.select("td img").get(0).attr("title");
            String[] fparts = tr.select("td img").get(0).attr("src").split("/");
            String fname = fparts[fparts.length - 1];
            MediaType default_by_fname = defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "")
                    .replace(".gif", "").replace(".png", ""));
            MediaType default_by_title = defaulttypes.get(title);
            MediaType default_name = default_by_title != null ? default_by_title : default_by_fname;
            if (data.has("mediatypes")) {
                try {
                    sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname)));
                } catch (JSONException | IllegalArgumentException e) {
                    sr.setType(default_name);
                }
            } else {
                sr.setType(default_name);
            }
        }
        String alltext = tr.text();
        if (alltext.contains("eAudio") || alltext.contains("eMusic")) {
            sr.setType(MediaType.MP3);
        } else if (alltext.contains("eVideo")) {
            sr.setType(MediaType.EVIDEO);
        } else if (alltext.contains("eBook")) {
            sr.setType(MediaType.EBOOK);
        } else if (alltext.contains("Munzinger")) {
            sr.setType(MediaType.EDOC);
        }

        if (tr.children().size() > 3 && tr.child(3).select("img[title*=cover]").size() == 1) {
            sr.setCover(tr.child(3).select("img[title*=cover]").attr("abs:src"));
            if (sr.getCover().contains("showCover.do")) {
                downloadCover(sr);
            }
        }

        Element middlething;
        if (tr.children().size() > 2 && tr.child(2).select("a").size() > 0) {
            middlething = tr.child(2);
        } else {
            middlething = tr.child(1);
        }

        List<Node> children = middlething.childNodes();
        if (middlething.select("div").not("#hlrightblock,.bestellfunktionen").size() == 1) {
            Element indiv = middlething.select("div").not("#hlrightblock,.bestellfunktionen").first();
            if (indiv.children().size() > 1) {
                children = indiv.childNodes();
            }
        } else if (middlething.select("span.titleData").size() == 1) {
            children = middlething.select("span.titleData").first().childNodes();
        }
        int childrennum = children.size();

        List<String[]> strings = new ArrayList<>();
        for (int ch = 0; ch < childrennum; ch++) {
            Node node = children.get(ch);
            if (node instanceof TextNode) {
                String text = ((TextNode) node).text().trim();
                if (text.length() > 3) {
                    strings.add(new String[] { "text", "", text });
                }
            } else if (node instanceof Element) {

                List<Node> subchildren = node.childNodes();
                for (int j = 0; j < subchildren.size(); j++) {
                    Node subnode = subchildren.get(j);
                    if (subnode instanceof TextNode) {
                        String text = ((TextNode) subnode).text().trim();
                        if (text.length() > 3) {
                            strings.add(new String[] { ((Element) node).tag().getName(), "text", text,
                                    ((Element) node).className(), node.attr("style") });
                        }
                    } else if (subnode instanceof Element) {
                        String text = ((Element) subnode).text().trim();
                        if (text.length() > 3) {
                            strings.add(new String[] { ((Element) node).tag().getName(),
                                    ((Element) subnode).tag().getName(), text, ((Element) node).className(),
                                    node.attr("style") });
                        }
                    }
                }
            }
        }

        StringBuilder description = null;
        if (tr.select("span.Z3988").size() == 1) {
            // Sometimes there is a <span class="Z3988"> item which provides
            // data in a standardized format.
            List<NameValuePair> z3988data;
            boolean hastitle = false;
            try {
                description = new StringBuilder();
                z3988data = URLEncodedUtils
                        .parse(new URI("http://dummy/?" + tr.select("span.Z3988").attr("title")), "UTF-8");
                for (NameValuePair nv : z3988data) {
                    if (nv.getValue() != null) {
                        if (!nv.getValue().trim().equals("")) {
                            if (nv.getName().equals("rft.btitle") && !hastitle) {
                                description.append("<b>").append(nv.getValue()).append("</b>");
                                hastitle = true;
                            } else if (nv.getName().equals("rft.atitle") && !hastitle) {
                                description.append("<b>").append(nv.getValue()).append("</b>");
                                hastitle = true;
                            } else if (nv.getName().equals("rft.au")) {
                                description.append("<br />").append(nv.getValue());
                            } else if (nv.getName().equals("rft.date")) {
                                description.append("<br />").append(nv.getValue());
                            }
                        }
                    }
                }
            } catch (URISyntaxException e) {
                description = null;
            }
        }
        boolean described = false;
        if (description != null && description.length() > 0) {
            sr.setInnerhtml(description.toString());
            described = true;
        } else {
            description = new StringBuilder();
        }
        int k = 0;
        boolean yearfound = false;
        boolean titlefound = false;
        boolean sigfound = false;
        for (String[] part : strings) {
            if (!described) {
                if (part[0].equals("a") && (k == 0 || !titlefound)) {
                    if (k != 0) {
                        description.append("<br />");
                    }
                    description.append("<b>").append(part[2]).append("</b>");
                    titlefound = true;
                } else if (part[2].matches("\\D*[0-9]{4}\\D*") && part[2].length() <= 10) {
                    yearfound = true;
                    if (k != 0) {
                        description.append("<br />");
                    }
                    description.append(part[2]);
                } else if (k == 1 && !yearfound && part[2].matches("^\\s*\\([0-9]{4}\\)$")) {
                    if (k != 0) {
                        description.append("<br />");
                    }
                    description.append(part[2]);
                } else if (k == 1 && !yearfound && part[2].matches("^\\s*\\([0-9]{4}\\)$")) {
                    if (k != 0) {
                        description.append("<br />");
                    }
                    description.append(part[2]);
                } else if (k > 1 && k < 4 && !sigfound && part[0].equals("text")
                        && part[2].matches("^[A-Za-z0-9,\\- ]+$")) {
                    description.append("<br />");
                    description.append(part[2]);
                }
            }
            if (part.length == 4) {
                if (part[0].equals("span") && part[3].equals("textgruen")) {
                    sr.setStatus(SearchResult.Status.GREEN);
                } else if (part[0].equals("span") && part[3].equals("textrot")) {
                    sr.setStatus(SearchResult.Status.RED);
                }
            } else if (part.length == 5) {
                if (part[4].contains("purple")) {
                    sr.setStatus(SearchResult.Status.YELLOW);
                }
            }
            if (sr.getStatus() == null) {
                if ((part[2].contains("entliehen")
                        && part[2].startsWith("Vormerkung ist leider nicht mglich"))
                        || part[2].contains("nur in anderer Zweigstelle ausleihbar und nicht bestellbar")) {
                    sr.setStatus(SearchResult.Status.RED);
                } else if (part[2].startsWith("entliehen")
                        || part[2].contains("Ein Exemplar finden Sie in einer anderen Zweigstelle")) {
                    sr.setStatus(SearchResult.Status.YELLOW);
                } else if ((part[2].startsWith("bestellbar") && !part[2].contains("nicht bestellbar"))
                        || (part[2].startsWith("vorbestellbar") && !part[2].contains("nicht vorbestellbar"))
                        || (part[2].startsWith("vorbestellbar") && !part[2].contains("nicht vorbestellbar"))
                        || (part[2].startsWith("vormerkbar") && !part[2].contains("nicht vormerkbar"))
                        || (part[2].contains("heute zurckgebucht"))
                        || (part[2].contains("ausleihbar") && !part[2].contains("nicht ausleihbar"))) {
                    sr.setStatus(SearchResult.Status.GREEN);
                }
                if (sr.getType() != null) {
                    if (sr.getType().equals(MediaType.EBOOK) || sr.getType().equals(MediaType.EVIDEO)
                            || sr.getType().equals(MediaType.MP3))
                    // Especially Onleihe.de ebooks are often marked
                    // green though they are not available.
                    {
                        sr.setStatus(SearchResult.Status.UNKNOWN);
                    }
                }
            }
            k++;
        }
        if (!described) {
            sr.setInnerhtml(description.toString());
        }

        sr.setNr(10 * (page - 1) + i);
        sr.setId(null);
        results.add(sr);
    }
    resultcount = results.size();
    return new SearchRequestResult(results, results_total, page);
}

From source file:ExtractorContentTest.java

private Catalog treatRows(Element table, Tree<String> structuralInformation, List<Header> rHeaders,
        boolean sortable) {
    int I = 0;/*from  w  w  w  . ja v a 2  s.  c  o m*/
    Catalog product = new Catalog(structuralInformation, rHeaders);
    for (Element row : table.select("tr")) {

        Elements lines;
        if (sortable) {
            lines = row.select("th"); // first entry is a header in sortable table
            lines.addAll(row.select("td"));
        } else {
            lines = row.select("td");
        }

        Product p = new Product("product_" + I, structuralInformation, rHeaders);
        int J = 0;
        for (Element line : lines) {
            p.add(J, line.text());
            J++;
        }

        // necessarily a tr with a td
        if (!lines.isEmpty()) {
            if (sortable && (I == 0)) {
                // header (first entry) is not a product
            } else
                product.add(p);
            I++;
        }

    }
    return product;
}

From source file:de.geeksfactory.opacclient.apis.SISIS.java

protected DetailledItem parse_result(String html) throws IOException {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);/* w  w  w.java  2  s. c o m*/

    String html2 = httpGet(opac_url + "/singleHit.do?methodToCall=activateTab&tab=showTitleActive", ENCODING);

    Document doc2 = Jsoup.parse(html2);
    doc2.setBaseUri(opac_url);

    String html3 = httpGet(opac_url + "/singleHit.do?methodToCall=activateTab&tab=showAvailabilityActive",
            ENCODING);

    Document doc3 = Jsoup.parse(html3);
    doc3.setBaseUri(opac_url);

    DetailledItem result = new DetailledItem();

    try {
        result.setId(doc.select("#bibtip_id").text().trim());
    } catch (Exception ex) {
        ex.printStackTrace();
    }
    List<String> reservationlinks = new ArrayList<>();
    for (Element link : doc3.select("#vormerkung a, #tab-content a")) {
        String href = link.absUrl("href");
        Map<String, String> hrefq = getQueryParamsFirst(href);
        if (result.getId() == null) {
            // ID retrieval
            String key = hrefq.get("katkey");
            if (key != null) {
                result.setId(key);
                break;
            }
        }

        // Vormerken
        if (hrefq.get("methodToCall") != null) {
            if (hrefq.get("methodToCall").equals("doVormerkung")
                    || hrefq.get("methodToCall").equals("doBestellung")) {
                reservationlinks.add(href.split("\\?")[1]);
            }
        }
    }
    if (reservationlinks.size() == 1) {
        result.setReservable(true);
        result.setReservation_info(reservationlinks.get(0));
    } else if (reservationlinks.size() == 0) {
        result.setReservable(false);
    } else {
        // TODO: Multiple options - handle this case!
    }

    if (doc.select(".data td img").size() == 1) {
        result.setCover(doc.select(".data td img").first().attr("abs:src"));
        try {
            downloadCover(result);
        } catch (Exception e) {

        }
    }

    if (doc.select(".aw_teaser_title").size() == 1) {
        result.setTitle(doc.select(".aw_teaser_title").first().text().trim());
    } else if (doc.select(".data td strong").size() > 0) {
        result.setTitle(doc.select(".data td strong").first().text().trim());
    } else {
        result.setTitle("");
    }
    if (doc.select(".aw_teaser_title_zusatz").size() > 0) {
        result.addDetail(new Detail("Titelzusatz", doc.select(".aw_teaser_title_zusatz").text().trim()));
    }

    String title = "";
    String text = "";
    boolean takeover = false;
    Element detailtrs = doc2.select(".box-container .data td").first();
    for (Node node : detailtrs.childNodes()) {
        if (node instanceof Element) {
            if (((Element) node).tagName().equals("strong")) {
                title = ((Element) node).text().trim();
                text = "";
            } else {
                if (((Element) node).tagName().equals("a")
                        && (((Element) node).text().trim().contains("hier klicken") || title.equals("Link:"))) {
                    text = text + node.attr("href");
                    takeover = true;
                    break;
                }
            }
        } else if (node instanceof TextNode) {
            text = text + ((TextNode) node).text();
        }
    }
    if (!takeover) {
        text = "";
        title = "";
    }

    detailtrs = doc2.select("#tab-content .data td").first();
    if (detailtrs != null) {
        for (Node node : detailtrs.childNodes()) {
            if (node instanceof Element) {
                if (((Element) node).tagName().equals("strong")) {
                    if (!text.equals("") && !title.equals("")) {
                        result.addDetail(new Detail(title.trim(), text.trim()));
                        if (title.equals("Titel:")) {
                            result.setTitle(text.trim());
                        }
                        text = "";
                    }

                    title = ((Element) node).text().trim();
                } else {
                    if (((Element) node).tagName().equals("a")
                            && (((Element) node).text().trim().contains("hier klicken")
                                    || title.equals("Link:"))) {
                        text = text + node.attr("href");
                    } else {
                        text = text + ((Element) node).text();
                    }
                }
            } else if (node instanceof TextNode) {
                text = text + ((TextNode) node).text();
            }
        }
    } else {
        if (doc2.select("#tab-content .fulltitle tr").size() > 0) {
            Elements rows = doc2.select("#tab-content .fulltitle tr");
            for (Element tr : rows) {
                if (tr.children().size() == 2) {
                    Element valcell = tr.child(1);
                    String value = valcell.text().trim();
                    if (valcell.select("a").size() == 1) {
                        value = valcell.select("a").first().absUrl("href");
                    }
                    result.addDetail(new Detail(tr.child(0).text().trim(), value));
                }
            }
        } else {
            result.addDetail(new Detail(stringProvider.getString(StringProvider.ERROR),
                    stringProvider.getString(StringProvider.COULD_NOT_LOAD_DETAIL)));
        }
    }
    if (!text.equals("") && !title.equals("")) {
        result.addDetail(new Detail(title.trim(), text.trim()));
        if (title.equals("Titel:")) {
            result.setTitle(text.trim());
        }
    }
    for (Element link : doc3.select("#tab-content a")) {
        Map<String, String> hrefq = getQueryParamsFirst(link.absUrl("href"));
        if (result.getId() == null) {
            // ID retrieval
            String key = hrefq.get("katkey");
            if (key != null) {
                result.setId(key);
                break;
            }
        }
    }
    for (Element link : doc3.select(".box-container a")) {
        if (link.text().trim().equals("Download")) {
            result.addDetail(
                    new Detail(stringProvider.getString(StringProvider.DOWNLOAD), link.absUrl("href")));
        }
    }

    Map<String, Integer> copy_columnmap = new HashMap<>();
    // Default values
    copy_columnmap.put("barcode", 1);
    copy_columnmap.put("branch", 3);
    copy_columnmap.put("status", 4);
    Elements copy_columns = doc.select("#tab-content .data tr#bg2 th");
    for (int i = 0; i < copy_columns.size(); i++) {
        Element th = copy_columns.get(i);
        String head = th.text().trim();
        if (head.contains("Status")) {
            copy_columnmap.put("status", i);
        }
        if (head.contains("Zweigstelle")) {
            copy_columnmap.put("branch", i);
        }
        if (head.contains("Mediennummer")) {
            copy_columnmap.put("barcode", i);
        }
        if (head.contains("Standort")) {
            copy_columnmap.put("location", i);
        }
        if (head.contains("Signatur")) {
            copy_columnmap.put("signature", i);
        }
    }

    Pattern status_lent = Pattern.compile(
            "^(entliehen) bis ([0-9]{1,2}.[0-9]{1,2}.[0-9]{2," + "4}) \\(gesamte Vormerkungen: ([0-9]+)\\)$");
    Pattern status_and_barcode = Pattern.compile("^(.*) ([0-9A-Za-z]+)$");

    Elements exemplartrs = doc.select("#tab-content .data tr").not("#bg2");
    DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);
    for (Element tr : exemplartrs) {
        try {
            Copy copy = new Copy();
            Element status = tr.child(copy_columnmap.get("status"));
            Element barcode = tr.child(copy_columnmap.get("barcode"));
            String barcodetext = barcode.text().trim().replace(" Wegweiser", "");

            // STATUS
            String statustext;
            if (status.getElementsByTag("b").size() > 0) {
                statustext = status.getElementsByTag("b").text().trim();
            } else {
                statustext = status.text().trim();
            }
            if (copy_columnmap.get("status").equals(copy_columnmap.get("barcode"))) {
                Matcher matcher1 = status_and_barcode.matcher(statustext);
                if (matcher1.matches()) {
                    statustext = matcher1.group(1);
                    barcodetext = matcher1.group(2);
                }
            }

            Matcher matcher = status_lent.matcher(statustext);
            if (matcher.matches()) {
                copy.setStatus(matcher.group(1));
                copy.setReservations(matcher.group(3));
                copy.setReturnDate(fmt.parseLocalDate(matcher.group(2)));
            } else {
                copy.setStatus(statustext);
            }
            copy.setBarcode(barcodetext);
            if (status.select("a[href*=doVormerkung]").size() == 1) {
                copy.setResInfo(status.select("a[href*=doVormerkung]").attr("href").split("\\?")[1]);
            }

            String branchtext = tr.child(copy_columnmap.get("branch")).text().trim().replace(" Wegweiser", "");
            copy.setBranch(branchtext);

            if (copy_columnmap.containsKey("location")) {
                copy.setLocation(
                        tr.child(copy_columnmap.get("location")).text().trim().replace(" Wegweiser", ""));
            }

            if (copy_columnmap.containsKey("signature")) {
                copy.setShelfmark(
                        tr.child(copy_columnmap.get("signature")).text().trim().replace(" Wegweiser", ""));
            }

            result.addCopy(copy);
        } catch (Exception ex) {
            ex.printStackTrace();
        }
    }

    try {
        Element isvolume = null;
        Map<String, String> volume = new HashMap<>();
        Elements links = doc.select(".data td a");
        int elcount = links.size();
        for (int eli = 0; eli < elcount; eli++) {
            List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(links.get(eli).attr("href")), "UTF-8");
            for (NameValuePair nv : anyurl) {
                if (nv.getName().equals("methodToCall") && nv.getValue().equals("volumeSearch")) {
                    isvolume = links.get(eli);
                } else if (nv.getName().equals("catKey")) {
                    volume.put("catKey", nv.getValue());
                } else if (nv.getName().equals("dbIdentifier")) {
                    volume.put("dbIdentifier", nv.getValue());
                }
            }
            if (isvolume != null) {
                volume.put("volume", "true");
                result.setVolumesearch(volume);
                break;
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

    return result;
}

From source file:ExtractorContentTest.java

private List<Header> collectHeaders(Element table) {
    List<Header> headers = new ArrayList<Header>();
    List<Header> headersWithNestedHeaders = new ArrayList<Header>();
    List<List<Header>> nestedHeaders = new ArrayList<List<Header>>();

    int levelHeader = 0; // FIXME nested header > 1

    for (Element row : table.select("tr")) {

        if (isEmpty(row)) // sometimes the first row, especially in sortable table, is empty (the second row is relevant for headers) 
            continue;

        if (levelHeader == 0) {
            for (Element header : row.select("th")) {
                String hName = header.text();
                Header headerV = new Header(hName);
                Elements colspan = header.getElementsByAttribute("colspan");
                if (!colspan.isEmpty()) {
                    headersWithNestedHeaders.add(headerV);
                    int v = Integer.parseInt(colspan.first().attr("colspan"));
                    headerV.setNumbersOfNestedHeaders(v);
                }//from ww  w .ja v  a2  s.  c o m

                headers.add(headerV);

            }
            levelHeader++;
        }

        else if (levelHeader == 1) {
            // nested header
            List<Header> nHeaders = new ArrayList<Header>();
            for (Element header : row.select("th")) {
                String hName = header.text();
                Header headerV = new Header(hName);
                nHeaders.add(headerV);
            }
            nestedHeaders.add(nHeaders);
            levelHeader++;
        }

    }

    // FIXME table.select("thead"); 

    // FIXME assign a "number" of appearance for headers 
    // especially important for nested headers (colspan="3")
    List<Header> rHeaders = new ArrayList<Header>();
    List<Header> nHeaders = new ArrayList<Header>();
    if (nestedHeaders.size() > 0)
        nHeaders = nestedHeaders.get(0); // FIXME 0 at the moment but normally it can be refined

    int lastIndex = 0;
    for (Header header : headers) {
        // nested
        if (headersWithNestedHeaders.contains(header)) { // header has nested headers

            int nNestedHeaders = header.getNumbersOfNestedHeaders(); // number of hested headers 

            // now associating an header to nested headers
            // nHeaders[lastIndex...lastIndex+nNestedHeaders]

            int v = 0;
            int u = 0;
            for (Header nH : nHeaders) {
                if (u++ < lastIndex)
                    continue;
                rHeaders.add(nH);
                if (v < nNestedHeaders) {
                    header.addNestedHeader(nH);
                    nH.addParentHeader(header);
                    v++;
                }

            }
            lastIndex += nNestedHeaders;

        } else {
            rHeaders.add(header);
        }

    }
    //System.err.println("rHeaders=" + rHeaders);
    return rHeaders;
}

From source file:de.geeksfactory.opacclient.apis.BiBer1992.java

static List<LentItem> parseMediaList(AccountData res, Document doc, JSONObject data) throws JSONException {
    List<LentItem> media = new ArrayList<>();
    if (doc == null) {
        return media;
    }/*from  w ww  . java 2  s .c o m*/

    // parse result list
    JSONObject copymap = data.getJSONObject("accounttable");

    Pattern expire = Pattern.compile("Ausweisg.ltigkeit: ([0-9.]+)");
    Pattern fees = Pattern.compile("([0-9,.]+) .");
    for (Element td : doc.select(".td01x09n")) {
        String text = td.text().trim();
        if (expire.matcher(text).matches()) {
            res.setValidUntil(expire.matcher(text).replaceAll("$1"));
        } else if (fees.matcher(text).matches()) {
            res.setPendingFees(text);
        }
    }
    DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);
    Elements rowElements = doc.select("form[name=medkl] table tr");

    // rows: skip 1st row -> title row
    for (int i = 1; i < rowElements.size(); i++) {
        Element tr = rowElements.get(i);
        if (tr.child(0).tagName().equals("th")) {
            continue;
        }
        LentItem item = new LentItem();

        Pattern itemIdPat = Pattern.compile("javascript:smAcc\\('[a-z]+','[a-z]+','([A-Za-z0-9]+)'\\)");
        // columns: all elements of one media
        Iterator<?> keys = copymap.keys();
        while (keys.hasNext()) {
            String key = (String) keys.next();
            int index;
            try {
                index = copymap.has(key) ? copymap.getInt(key) : -1;
            } catch (JSONException e1) {
                index = -1;
            }
            if (index >= 0) {
                String value = tr.child(index).text().trim().replace("\u00A0", "");

                switch (key) {
                case "author":
                    value = findTitleAndAuthor(value)[1];
                    break;
                case "title":
                    value = findTitleAndAuthor(value)[0];
                    break;
                case "returndate":
                    try {
                        value = fmt.parseLocalDate(value).toString();
                    } catch (IllegalArgumentException e1) {
                        e1.printStackTrace();
                    }
                    break;
                }

                if (tr.child(index).select("a").size() == 1) {
                    Matcher matcher = itemIdPat.matcher(tr.child(index).select("a").attr("href"));
                    if (matcher.find())
                        item.setId(matcher.group(1));
                }

                if (value != null && value.length() != 0)
                    item.set(key, value);
            }
        }

        if (tr.select("input[type=checkbox][value=YES]").size() > 0) {
            item.setProlongData(tr.select("input[type=checkbox][value=YES]").attr("name"));
        }

        media.add(item);
    }
    return media;
}