Example usage for org.jsoup.nodes Element child

List of usage examples for org.jsoup.nodes Element child

Introduction

In this page you can find the example usage for org.jsoup.nodes Element child.

Prototype

public Element child(int index) 

Source Link

Document

Get a child element of this element, by its 0-based index number.

Usage

From source file:de.geeksfactory.opacclient.apis.Pica.java

protected SearchRequestResult parse_search(String html, int page) throws OpacErrorException {
    Document doc = Jsoup.parse(html);

    updateSearchSetValue(doc);/* w w w .  jav a  2s. c o  m*/

    if (doc.select(".error").size() > 0) {
        String error = doc.select(".error").first().text().trim();
        if (error.equals("Es wurde nichts gefunden.") || error.equals("Nothing has been found")
                || error.equals("Er is niets gevonden.") || error.equals("Rien n'a t trouv.")) {
            // nothing found
            return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1);
        } else {
            // error
            throw new OpacErrorException(error);
        }
    }

    reusehtml = html;

    int results_total;

    String resultnumstr = doc.select(".pages").first().text();
    Pattern p = Pattern.compile("[0-9]+$");
    Matcher m = p.matcher(resultnumstr);
    if (m.find()) {
        resultnumstr = m.group();
    }
    if (resultnumstr.contains("(")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*\\(([0-9]+)\\).*", "$1"));
    } else if (resultnumstr.contains(": ")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*: ([0-9]+)$", "$1"));
    } else {
        results_total = Integer.parseInt(resultnumstr);
    }

    List<SearchResult> results = new ArrayList<>();

    if (results_total == 1) {
        // Only one result
        DetailledItem singleResult = parse_result(html);
        SearchResult sr = new SearchResult();
        sr.setType(getMediaTypeInSingleResult(html));
        sr.setInnerhtml(
                "<b>" + singleResult.getTitle() + "</b><br>" + singleResult.getDetails().get(0).getContent());
        results.add(sr);
    }

    Elements table = doc.select("table[summary=hitlist] tbody tr[valign=top]");
    // identifier = null;

    Elements links = doc.select("table[summary=hitlist] a");
    boolean haslink = false;
    for (int i = 0; i < links.size(); i++) {
        Element node = links.get(i);
        if (node.hasAttr("href") & node.attr("href").contains("SHW?") && !haslink) {
            haslink = true;
            try {
                List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(node.attr("href")),
                        getDefaultEncoding());
                for (NameValuePair nv : anyurl) {
                    if (nv.getName().equals("identifier")) {
                        // identifier = nv.getValue();
                        break;
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
            }

        }
    }

    for (int i = 0; i < table.size(); i++) {
        Element tr = table.get(i);
        SearchResult sr = new SearchResult();
        if (tr.select("td.hit img").size() > 0) {
            String[] fparts = tr.select("td img").get(0).attr("src").split("/");
            String fname = fparts[fparts.length - 1];
            if (data.has("mediatypes")) {
                try {
                    sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname)));
                } catch (JSONException | IllegalArgumentException e) {
                    sr.setType(defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "")
                            .replace(".gif", "").replace(".png", "")));
                }
            } else {
                sr.setType(defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "")
                        .replace(".gif", "").replace(".png", "")));
            }
        }
        Element middlething = tr.child(2);

        List<Node> children = middlething.childNodes();
        int childrennum = children.size();

        List<String[]> strings = new ArrayList<>();
        for (int ch = 0; ch < childrennum; ch++) {
            Node node = children.get(ch);
            if (node instanceof TextNode) {
                String text = ((TextNode) node).text().trim();
                if (text.length() > 3) {
                    strings.add(new String[] { "text", "", text });
                }
            } else if (node instanceof Element) {

                List<Node> subchildren = node.childNodes();
                for (int j = 0; j < subchildren.size(); j++) {
                    Node subnode = subchildren.get(j);
                    if (subnode instanceof TextNode) {
                        String text = ((TextNode) subnode).text().trim();
                        if (text.length() > 3) {
                            strings.add(new String[] { ((Element) node).tag().getName(), "text", text,
                                    ((Element) node).className(), node.attr("style") });
                        }
                    } else if (subnode instanceof Element) {
                        String text = ((Element) subnode).text().trim();
                        if (text.length() > 3) {
                            strings.add(new String[] { ((Element) node).tag().getName(),
                                    ((Element) subnode).tag().getName(), text, ((Element) node).className(),
                                    node.attr("style") });
                        }
                    }
                }
            }
        }

        StringBuilder description = new StringBuilder();

        int k = 0;
        for (String[] part : strings) {
            if (part[0].equals("a") && k == 0) {
                description.append("<b>").append(part[2]).append("</b>");
            } else if (k < 3) {
                description.append("<br />").append(part[2]);
            }
            k++;
        }
        sr.setInnerhtml(description.toString());

        sr.setNr(10 * (page - 1) + i);
        sr.setId(null);
        results.add(sr);
    }
    resultcount = results.size();
    return new SearchRequestResult(results, results_total, page);
}

From source file:de.geeksfactory.opacclient.apis.Bibliotheca.java

protected SearchRequestResult parse_search(String html, int page) {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);//from w w  w.  j ava 2s .  c o m
    Elements table = doc.select(".resulttab tr.result_trefferX, .resulttab tr.result_treffer");
    List<SearchResult> results = new ArrayList<>();
    for (int i = 0; i < table.size(); i++) {
        Element tr = table.get(i);
        SearchResult sr = new SearchResult();
        int contentindex = 1;
        if (tr.select("td a img").size() > 0) {
            String[] fparts = tr.select("td a img").get(0).attr("src").split("/");
            String fname = fparts[fparts.length - 1];
            if (data.has("mediatypes")) {
                try {
                    sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname)));
                } catch (JSONException | IllegalArgumentException e) {
                    sr.setType(defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "")
                            .replace(".gif", "").replace(".png", "")));
                }
            } else {
                sr.setType(defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "")
                        .replace(".gif", "").replace(".png", "")));
            }
        } else {
            if (tr.children().size() == 3) {
                contentindex = 2;
            }
        }
        sr.setInnerhtml(tr.child(contentindex).child(0).html());

        sr.setNr(i);
        Element link = tr.child(contentindex).select("a").first();
        try {
            if (link != null && link.attr("href").contains("detmediennr")) {
                Map<String, String> params = getQueryParamsFirst(link.attr("abs:href"));
                String nr = params.get("detmediennr");
                if (Integer.parseInt(nr) > i + 1) {
                    // Seems to be an ID
                    if (params.get("detDB") != null) {
                        sr.setId("&detmediennr=" + nr + "&detDB=" + params.get("detDB"));
                    } else {
                        sr.setId("&detmediennr=" + nr);
                    }
                }
            }
        } catch (Exception e) {
        }
        try {
            if (tr.child(1).childNode(0) instanceof Comment) {
                Comment c = (Comment) tr.child(1).childNode(0);
                String comment = c.getData().trim();
                String id = comment.split(": ")[1];
                sr.setId(id);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        results.add(sr);
    }
    int results_total = -1;
    if (doc.select(".result_gefunden").size() > 0) {
        try {
            results_total = Integer.parseInt(
                    doc.select(".result_gefunden").text().trim().replaceAll(".*[^0-9]+([0-9]+).*", "$1"));
        } catch (NumberFormatException e) {
            e.printStackTrace();
            results_total = -1;
        }
    }
    return new SearchRequestResult(results, results_total, page);
}

From source file:de.geeksfactory.opacclient.apis.Bibliotheca.java

@Override
public ProlongAllResult prolongAll(Account account, int useraction, String selection) throws IOException {

    if (!initialised) {
        start();/*  w w w . ja  v a  2s  . com*/
    }
    if (System.currentTimeMillis() - logged_in > SESSION_LIFETIME || logged_in_as == null) {
        try {
            account(account);
        } catch (JSONException e) {
            e.printStackTrace();
            return new ProlongAllResult(MultiStepResult.Status.ERROR,
                    stringProvider.getString(StringProvider.CONNECTION_ERROR));
        } catch (OpacErrorException e) {
            return new ProlongAllResult(MultiStepResult.Status.ERROR, e.getMessage());
        }
    } else if (logged_in_as.getId() != account.getId()) {
        try {
            account(account);
        } catch (JSONException e) {
            e.printStackTrace();
            return new ProlongAllResult(MultiStepResult.Status.ERROR,
                    stringProvider.getString(StringProvider.CONNECTION_ERROR));
        } catch (OpacErrorException e) {
            return new ProlongAllResult(MultiStepResult.Status.ERROR, e.getMessage());
        }
    }
    String html = httpGet(opac_url + "/index.asp?target=alleverl", getDefaultEncoding());
    Document doc = Jsoup.parse(html);

    if (doc.getElementsByClass("kontomeldung").size() == 1) {
        String err = doc.getElementsByClass("kontomeldung").get(0).text();
        return new ProlongAllResult(MultiStepResult.Status.ERROR, err);
    }

    if (doc.select(".kontozeile table").size() == 1) {
        Map<Integer, String> colmap = new HashMap<>();
        List<Map<String, String>> result = new ArrayList<>();
        for (Element tr : doc.select(".kontozeile table tr")) {
            if (tr.select(".tabHeaderKonto").size() > 0) {
                int i = 0;
                for (Element th : tr.select("th")) {
                    if (th.text().contains("Verfasser")) {
                        colmap.put(i, OpacApi.ProlongAllResult.KEY_LINE_AUTHOR);
                    } else if (th.text().contains("Titel")) {
                        colmap.put(i, OpacApi.ProlongAllResult.KEY_LINE_TITLE);
                    } else if (th.text().contains("Neue")) {
                        colmap.put(i, OpacApi.ProlongAllResult.KEY_LINE_NEW_RETURNDATE);
                    } else if (th.text().contains("Frist")) {
                        colmap.put(i, OpacApi.ProlongAllResult.KEY_LINE_OLD_RETURNDATE);
                    } else if (th.text().contains("Status")) {
                        colmap.put(i, OpacApi.ProlongAllResult.KEY_LINE_MESSAGE);
                    }
                    i++;
                }
            } else {
                Map<String, String> line = new HashMap<>();
                for (Entry<Integer, String> entry : colmap.entrySet()) {
                    line.put(entry.getValue(), tr.child(entry.getKey()).text().trim());
                }
                result.add(line);
            }
        }

        if (doc.select("input#make_allvl").size() > 0) {
            List<NameValuePair> nameValuePairs = new ArrayList<>(2);
            nameValuePairs.add(new BasicNameValuePair("target", "make_allvl_flag"));
            nameValuePairs.add(new BasicNameValuePair("make_allvl", "Bestaetigung"));
            httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs), getDefaultEncoding());
        }

        return new ProlongAllResult(MultiStepResult.Status.OK, result);
    }

    return new ProlongAllResult(MultiStepResult.Status.ERROR,
            stringProvider.getString(StringProvider.INTERNAL_ERROR));
}

From source file:de.geeksfactory.opacclient.apis.BiBer1992.java

private SearchRequestResult parse_search(String html, int page) {
    List<SearchResult> results = new ArrayList<>();
    Document doc = Jsoup.parse(html);

    if (doc.select("h3").text().contains("Es wurde nichts gefunden")) {
        return new SearchRequestResult(results, 0, page);
    }/*from ww  w . j av a2  s.  c om*/

    Elements trList = doc.select("form table tr[valign]"); // <tr
    // valign="top">
    Elements elem;
    int rows_per_hit = 2;
    if (trList.size() == 1 || (trList.size() > 1 && trList.get(0).select("input[type=checkbox]").size() > 0
            && trList.get(1).select("input[type=checkbox]").size() > 0)) {
        rows_per_hit = 1;
    }

    try {
        rows_per_hit = data.getInt("rows_per_hit");
    } catch (JSONException e) {
    }

    // Overall search results
    // are very differently layouted, but have always the text:
    // "....Treffer Gesamt (nnn)"
    int results_total;
    Pattern pattern = Pattern.compile("Treffer Gesamt \\(([0-9]+)\\)");
    Matcher matcher = pattern.matcher(html);
    if (matcher.find()) {
        results_total = Integer.parseInt(matcher.group(1));
    } else {
        results_total = -1;
    }

    // limit to 20 entries
    int numOfEntries = trList.size() / rows_per_hit; // two rows per entry
    if (numOfEntries > numOfResultsPerPage) {
        numOfEntries = numOfResultsPerPage;
    }

    for (int i = 0; i < numOfEntries; i++) {
        Element tr = trList.get(i * rows_per_hit);
        SearchResult sr = new SearchResult();

        // ID as href tag
        elem = tr.select("td a");
        if (elem.size() > 0) {
            String hrefID = elem.get(0).attr("href");
            sr.setId(hrefID);
        } else {
            // no ID as href found, look for the ID in the input form
            elem = tr.select("td input");
            if (elem.size() > 0) {
                String nameID = elem.get(0).attr("name").trim();
                String hrefID = "/" + opacDir + "/ftitle" + opacSuffix + "?LANG=de&FUNC=full&" + nameID
                        + "=YES";
                sr.setId(hrefID);
            }
        }

        // media type
        elem = tr.select("td img");
        if (elem.size() > 0) {
            setMediaTypeFromImageFilename(sr, elem.get(0).attr("src"));
        }

        // description
        String desc = "";
        try {
            // array "searchtable" list the column numbers of the
            // description
            JSONArray searchtable = data.getJSONArray("searchtable");
            for (int j = 0; j < searchtable.length(); j++) {
                int colNum = searchtable.getInt(j);
                if (j > 0) {
                    desc = desc + "<br />";
                }
                String c = tr.child(colNum).html();
                if (tr.child(colNum).childNodes().size() == 1
                        && tr.child(colNum).select("a[href*=ftitle.]").size() > 0) {
                    c = tr.select("a[href*=ftitle.]").text();
                }
                desc = desc + c;
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        // remove links "<a ...>...</a>
        // needed for Friedrichshafen: "Warenkorb", "Vormerkung"
        // Herford: "Medienkorb"
        desc = desc.replaceAll("<a .*?</a>", "");
        sr.setInnerhtml(desc);

        if (tr.select("font.p04x09b").size() > 0 && tr.select("font.p02x09b").size() == 0) {
            sr.setStatus(Status.GREEN);
        } else if (tr.select("font.p04x09b").size() == 0 && tr.select("font.p02x09b").size() > 0) {
            sr.setStatus(Status.RED);
        } else if (tr.select("font.p04x09b").size() > 0 && tr.select("font.p02x09b").size() > 0) {
            sr.setStatus(Status.YELLOW);
        }

        // number
        sr.setNr(i / rows_per_hit);
        results.add(sr);
    }

    // m_resultcount = results.size();
    return new SearchRequestResult(results, results_total, page);
}

From source file:de.geeksfactory.opacclient.apis.SISIS.java

protected SearchRequestResult parse_search(String html, int page) throws OpacErrorException {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url + "/searchfoo");

    if (doc.select(".error").size() > 0) {
        throw new OpacErrorException(doc.select(".error").text().trim());
    } else if (doc.select(".nohits").size() > 0) {
        throw new OpacErrorException(doc.select(".nohits").text().trim());
    } else if (doc.select(".box-header h2, #nohits").text().contains("keine Treffer")) {
        return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1);
    }/*w  w  w. jav  a 2  s  . co  m*/

    int results_total = -1;

    String resultnumstr = doc.select(".box-header h2").first().text();
    if (resultnumstr.contains("(1/1)") || resultnumstr.contains(" 1/1")) {
        reusehtml = html;
        throw new OpacErrorException("is_a_redirect");
    } else if (resultnumstr.contains("(")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*\\(([0-9]+)\\).*", "$1"));
    } else if (resultnumstr.contains(": ")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*: ([0-9]+)$", "$1"));
    }

    Elements table = doc.select("table.data tbody tr");
    identifier = null;

    Elements links = doc.select("table.data a");
    boolean haslink = false;
    for (int i = 0; i < links.size(); i++) {
        Element node = links.get(i);
        if (node.hasAttr("href") & node.attr("href").contains("singleHit.do") && !haslink) {
            haslink = true;
            try {
                List<NameValuePair> anyurl = URLEncodedUtils
                        .parse(new URI(node.attr("href").replace(" ", "%20").replace("&amp;", "&")), ENCODING);
                for (NameValuePair nv : anyurl) {
                    if (nv.getName().equals("identifier")) {
                        identifier = nv.getValue();
                        break;
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
            }

        }
    }

    List<SearchResult> results = new ArrayList<>();
    for (int i = 0; i < table.size(); i++) {
        Element tr = table.get(i);
        SearchResult sr = new SearchResult();
        if (tr.select("td img[title]").size() > 0) {
            String title = tr.select("td img").get(0).attr("title");
            String[] fparts = tr.select("td img").get(0).attr("src").split("/");
            String fname = fparts[fparts.length - 1];
            MediaType default_by_fname = defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "")
                    .replace(".gif", "").replace(".png", ""));
            MediaType default_by_title = defaulttypes.get(title);
            MediaType default_name = default_by_title != null ? default_by_title : default_by_fname;
            if (data.has("mediatypes")) {
                try {
                    sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname)));
                } catch (JSONException | IllegalArgumentException e) {
                    sr.setType(default_name);
                }
            } else {
                sr.setType(default_name);
            }
        }
        String alltext = tr.text();
        if (alltext.contains("eAudio") || alltext.contains("eMusic")) {
            sr.setType(MediaType.MP3);
        } else if (alltext.contains("eVideo")) {
            sr.setType(MediaType.EVIDEO);
        } else if (alltext.contains("eBook")) {
            sr.setType(MediaType.EBOOK);
        } else if (alltext.contains("Munzinger")) {
            sr.setType(MediaType.EDOC);
        }

        if (tr.children().size() > 3 && tr.child(3).select("img[title*=cover]").size() == 1) {
            sr.setCover(tr.child(3).select("img[title*=cover]").attr("abs:src"));
            if (sr.getCover().contains("showCover.do")) {
                downloadCover(sr);
            }
        }

        Element middlething;
        if (tr.children().size() > 2 && tr.child(2).select("a").size() > 0) {
            middlething = tr.child(2);
        } else {
            middlething = tr.child(1);
        }

        List<Node> children = middlething.childNodes();
        if (middlething.select("div").not("#hlrightblock,.bestellfunktionen").size() == 1) {
            Element indiv = middlething.select("div").not("#hlrightblock,.bestellfunktionen").first();
            if (indiv.children().size() > 1) {
                children = indiv.childNodes();
            }
        } else if (middlething.select("span.titleData").size() == 1) {
            children = middlething.select("span.titleData").first().childNodes();
        }
        int childrennum = children.size();

        List<String[]> strings = new ArrayList<>();
        for (int ch = 0; ch < childrennum; ch++) {
            Node node = children.get(ch);
            if (node instanceof TextNode) {
                String text = ((TextNode) node).text().trim();
                if (text.length() > 3) {
                    strings.add(new String[] { "text", "", text });
                }
            } else if (node instanceof Element) {

                List<Node> subchildren = node.childNodes();
                for (int j = 0; j < subchildren.size(); j++) {
                    Node subnode = subchildren.get(j);
                    if (subnode instanceof TextNode) {
                        String text = ((TextNode) subnode).text().trim();
                        if (text.length() > 3) {
                            strings.add(new String[] { ((Element) node).tag().getName(), "text", text,
                                    ((Element) node).className(), node.attr("style") });
                        }
                    } else if (subnode instanceof Element) {
                        String text = ((Element) subnode).text().trim();
                        if (text.length() > 3) {
                            strings.add(new String[] { ((Element) node).tag().getName(),
                                    ((Element) subnode).tag().getName(), text, ((Element) node).className(),
                                    node.attr("style") });
                        }
                    }
                }
            }
        }

        StringBuilder description = null;
        if (tr.select("span.Z3988").size() == 1) {
            // Sometimes there is a <span class="Z3988"> item which provides
            // data in a standardized format.
            List<NameValuePair> z3988data;
            boolean hastitle = false;
            try {
                description = new StringBuilder();
                z3988data = URLEncodedUtils
                        .parse(new URI("http://dummy/?" + tr.select("span.Z3988").attr("title")), "UTF-8");
                for (NameValuePair nv : z3988data) {
                    if (nv.getValue() != null) {
                        if (!nv.getValue().trim().equals("")) {
                            if (nv.getName().equals("rft.btitle") && !hastitle) {
                                description.append("<b>").append(nv.getValue()).append("</b>");
                                hastitle = true;
                            } else if (nv.getName().equals("rft.atitle") && !hastitle) {
                                description.append("<b>").append(nv.getValue()).append("</b>");
                                hastitle = true;
                            } else if (nv.getName().equals("rft.au")) {
                                description.append("<br />").append(nv.getValue());
                            } else if (nv.getName().equals("rft.date")) {
                                description.append("<br />").append(nv.getValue());
                            }
                        }
                    }
                }
            } catch (URISyntaxException e) {
                description = null;
            }
        }
        boolean described = false;
        if (description != null && description.length() > 0) {
            sr.setInnerhtml(description.toString());
            described = true;
        } else {
            description = new StringBuilder();
        }
        int k = 0;
        boolean yearfound = false;
        boolean titlefound = false;
        boolean sigfound = false;
        for (String[] part : strings) {
            if (!described) {
                if (part[0].equals("a") && (k == 0 || !titlefound)) {
                    if (k != 0) {
                        description.append("<br />");
                    }
                    description.append("<b>").append(part[2]).append("</b>");
                    titlefound = true;
                } else if (part[2].matches("\\D*[0-9]{4}\\D*") && part[2].length() <= 10) {
                    yearfound = true;
                    if (k != 0) {
                        description.append("<br />");
                    }
                    description.append(part[2]);
                } else if (k == 1 && !yearfound && part[2].matches("^\\s*\\([0-9]{4}\\)$")) {
                    if (k != 0) {
                        description.append("<br />");
                    }
                    description.append(part[2]);
                } else if (k == 1 && !yearfound && part[2].matches("^\\s*\\([0-9]{4}\\)$")) {
                    if (k != 0) {
                        description.append("<br />");
                    }
                    description.append(part[2]);
                } else if (k > 1 && k < 4 && !sigfound && part[0].equals("text")
                        && part[2].matches("^[A-Za-z0-9,\\- ]+$")) {
                    description.append("<br />");
                    description.append(part[2]);
                }
            }
            if (part.length == 4) {
                if (part[0].equals("span") && part[3].equals("textgruen")) {
                    sr.setStatus(SearchResult.Status.GREEN);
                } else if (part[0].equals("span") && part[3].equals("textrot")) {
                    sr.setStatus(SearchResult.Status.RED);
                }
            } else if (part.length == 5) {
                if (part[4].contains("purple")) {
                    sr.setStatus(SearchResult.Status.YELLOW);
                }
            }
            if (sr.getStatus() == null) {
                if ((part[2].contains("entliehen")
                        && part[2].startsWith("Vormerkung ist leider nicht mglich"))
                        || part[2].contains("nur in anderer Zweigstelle ausleihbar und nicht bestellbar")) {
                    sr.setStatus(SearchResult.Status.RED);
                } else if (part[2].startsWith("entliehen")
                        || part[2].contains("Ein Exemplar finden Sie in einer anderen Zweigstelle")) {
                    sr.setStatus(SearchResult.Status.YELLOW);
                } else if ((part[2].startsWith("bestellbar") && !part[2].contains("nicht bestellbar"))
                        || (part[2].startsWith("vorbestellbar") && !part[2].contains("nicht vorbestellbar"))
                        || (part[2].startsWith("vorbestellbar") && !part[2].contains("nicht vorbestellbar"))
                        || (part[2].startsWith("vormerkbar") && !part[2].contains("nicht vormerkbar"))
                        || (part[2].contains("heute zurckgebucht"))
                        || (part[2].contains("ausleihbar") && !part[2].contains("nicht ausleihbar"))) {
                    sr.setStatus(SearchResult.Status.GREEN);
                }
                if (sr.getType() != null) {
                    if (sr.getType().equals(MediaType.EBOOK) || sr.getType().equals(MediaType.EVIDEO)
                            || sr.getType().equals(MediaType.MP3))
                    // Especially Onleihe.de ebooks are often marked
                    // green though they are not available.
                    {
                        sr.setStatus(SearchResult.Status.UNKNOWN);
                    }
                }
            }
            k++;
        }
        if (!described) {
            sr.setInnerhtml(description.toString());
        }

        sr.setNr(10 * (page - 1) + i);
        sr.setId(null);
        results.add(sr);
    }
    resultcount = results.size();
    return new SearchRequestResult(results, results_total, page);
}

From source file:org.apdplat.superword.extract.ChineseSynonymAntonymExtractor.java

public static SynonymAntonym parseSynonymAntonym(String html, String word) {
    SynonymAntonym synonymAntonym = new SynonymAntonym();
    synonymAntonym.setWord(new Word(word, ""));
    try {//  w  ww . j av  a  2s .  co  m
        for (Element element : Jsoup.parse(html).select(SYNONYM_ANTONYM_CSS_PATH)) {
            int size = element.children().size();
            LOGGER.debug("element size:" + size);
            for (int i = 0; i < size / 2; i++) {
                String type = element.child(i * 2).text();
                LOGGER.debug("type:" + type);
                if ("??".equals(type)) {
                    String synonym = element.child(i * 2 + 1).text();
                    LOGGER.debug("synonym:" + synonym);
                    for (String w : synonym.split("\\s+")) {
                        w = w.replaceAll("\\s+", "");
                        if (w.length() < 2) {
                            continue;
                        }
                        if (isNotChineseChar(w)) {
                            LOGGER.debug("?" + w);
                            continue;
                        }
                        if (w.equals(word)) {
                            continue;
                        }
                        LOGGER.debug("word:" + w);
                        synonymAntonym.addSynonym(new Word(w, ""));
                    }
                }
                if ("???".equals(type)) {
                    String antonym = element.child(i * 2 + 1).text();
                    LOGGER.debug("antonym:" + antonym);
                    for (String w : antonym.split("\\s+")) {
                        w = w.replaceAll("\\s+", "");
                        if (w.length() < 2) {
                            continue;
                        }
                        if (isNotChineseChar(w)) {
                            LOGGER.debug("?" + w);
                            continue;
                        }
                        LOGGER.debug("word:" + w);
                        synonymAntonym.addAntonym(new Word(w, ""));
                    }
                }
            }
        }
        if (!synonymAntonym.getAntonym().isEmpty() || !synonymAntonym.getSynonym().isEmpty()) {
            LOGGER.info("??????" + synonymAntonym);
        }
    } catch (Exception e) {
        LOGGER.error("??????", e);
    }
    return synonymAntonym;
}

From source file:org.apdplat.superword.extract.SentenceExtractor.java

public static Map<String, String> parse2(String html) {
    Map<String, String> sentences = new HashMap<>();
    try {//from   ww w .  j  av  a 2  s  .c  o  m
        Document document = Jsoup.parse(html);
        String title = document.select("html head title").text();
        if (!title.startsWith("??")) {
            LOGGER.error("???" + title);
            return sentences;
        }
        for (Element element : document.select("html body div#main div.main_sl div.info div.info-body")) {
            String en = element.child(3).text().trim();
            LOGGER.info("???:" + en);
            if (en.split("\\s+").length < 2) {
                LOGGER.debug("???");
                continue;
            }
            String cn = element.child(4).text().trim() + element.child(5).text().trim();
            LOGGER.info("???:" + cn);
            if (StringUtils.isNotBlank(en) && StringUtils.isNotBlank(cn)) {
                sentences.put(en, cn);
                //?
                TextAnalyzer.seg(en).forEach(w -> {
                    Word word = new Word(w, "");
                    WORD_FREQUENCE.putIfAbsent(word, new AtomicInteger());
                    WORD_FREQUENCE.get(word).incrementAndGet();
                });
            }
        }
    } catch (Exception e) {
        LOGGER.error("???", e);
    }
    return sentences;
}

From source file:org.arb.extractor.DomTreeWalker.java

private void collectIdsOnElement(Element element, AbstractCodeUnit codeUnit) {
    if (element.hasAttr("id")) {
        elementIdSet.add(element.attr("id"));
    }/*www. j  a v  a2 s.  c  o m*/
    if (element.hasAttr("arb:id")) {
        arbIdSet.add(element.attr("arb:id"));
    }
    for (int i = 0; i < element.children().size(); i++) {
        collectIdsOnElement(element.child(i), codeUnit);
    }
}

From source file:org.arb.extractor.DomTreeWalker.java

/**
 * Extract resource from an element and all its children.
 *  //  w w w .j a  v a2  s.co  m
 * @param element the target element.
 * @param codeUnit used to record all found replacement.
 */
private void extractResourceOnElement(Element element, AbstractCodeUnit codeUnit) {
    String ownText = element.ownText();
    if (!ownText.isEmpty() && !hasResource(element)) {
        DomCodeReplacement replacement = new DomCodeReplacement();
        replacement.setElement(element);
        replacement.setResourceText(ownText);
        replacement.setResourceId(getElementResourceId(element));
        if (replacement.getResourceId() == null) {
            replacement.setNewId(true);
            replacement.setUseArbId(true);
        }
        codeUnit.addReplacement(replacement);
    }
    for (int i = 0; i < element.children().size(); i++) {
        extractResourceOnElement(element.child(i), codeUnit);
    }
}

From source file:org.asqatasun.rules.elementselector.ImageElementSelector.java

/**
 * /*  w  w  w.  j a  v  a2  s . co m*/
 * @param imageParent
 * @param image
 * @return whether the current image is an image link
 */
private boolean isImageLink(Element imageParent, Element image) {
    if (imageParent == null || !StringUtils.equals(imageParent.text(), image.text())) {
        return false;
    }
    if (imageParent.children().size() == 1) {
        return isImageLink(imageParent.child(0), image);
    } else if (imageParent.children().isEmpty() && imageParent.equals(image)) {
        return true;
    }
    return false;
}