Example usage for org.jsoup.nodes Element attr

Introduction

In this page you can find the example usage for org.jsoup.nodes Element attr.

Prototype

public String attr(String attributeKey)

Source Link

Document

Get an attribute's value by its key.

Usage

From source file:com.jimplush.goose.ContentExtractor.java

/**
 * returns the gravityScore as an integer from this node
 *
 * @param node//from   w  ww  .  ja v a  2s . c om
 * @return
 */
private int getScore(Element node) {
    if (node == null)
        return 0;
    try {
        String grvScoreString = node.attr("gravityScore");
        if (string.isNullOrEmpty(grvScoreString))
            return 0;
        return Integer.parseInt(grvScoreString);
    } catch (NumberFormatException e) {
        return 0;
    }
}

From source file:com.jimplush.goose.ContentExtractor.java

private String debugNode(Element e) {

    StringBuilder sb = new StringBuilder();
    sb.append("GravityScore: '");
    sb.append(e.attr("gravityScore"));
    sb.append("' paraNodeCount: '");
    sb.append(e.attr("gravityNodes"));
    sb.append("' nodeId: '");
    sb.append(e.id());//from  w  ww.java 2s .c  o m
    sb.append("' className: '");
    sb.append(e.attr("class"));
    return sb.toString();

}

From source file:info.dolezel.fatrat.plugins.UloztoDownload.java

@Override
public void processLink(String link) {

    //if (link.contains("/live/"))
    //    link = link.replace("/live/", "/");
    if (link.startsWith("http://uloz.to") || link.startsWith("https://uloz.to"))
        link = link.replace("https?://uloz.to", "https://www.uloz.to");
    if (link.startsWith("http://m.uloz.to") || link.startsWith("https://m.uloz.to"))
        link = link.replace("https?://m.uloz.to", "https://www.uloz.to");

    if (!logIn(link))
        return;//w  w  w  .  ja  v  a  2s .  c o  m

    final String downloadLink = link; // I can't make 'link' final

    fetchPage(link, new PageFetchListener() {

        @Override
        public void onCompleted(ByteBuffer buf, Map<String, String> headers) {
            try {
                if (headers.containsKey("location")) {
                    String location = headers.get("location");
                    if (location.contains("smazano") || location.contains("nenalezeno"))
                        setFailed("The file has been removed");
                    else
                        processLink(location);
                    return;
                }

                CharBuffer cb = charsetUtf8.decode(buf);

                if (cb.toString().contains("?disclaimer=1")) {
                    processLink(downloadLink + "?disclaimer=1");
                    return;
                }

                final Document doc = Jsoup.parse(cb.toString());
                final Element freeForm = doc.getElementById("frm-download-freeDownloadTab-freeDownloadForm");
                final Element premiumLink = doc.getElementById("#quickDownloadButton");

                boolean usePremium = usePremium(downloadLink);

                if (cb.toString().contains("Nem dostatek kreditu"))
                    setMessage("Credit depleted, using FREE download");
                else if (usePremium && premiumLink != null) {
                    String msg = "Using premium download";

                    Elements aCredits = doc.getElementsByAttributeValue("href", "/kredit");

                    if (!aCredits.isEmpty())
                        msg += " (" + aCredits.get(0).ownText() + " left)";

                    setMessage(msg);

                    startDownload("http://www.uloz.to" + premiumLink.attr("href"));
                    return;

                } else if (loggedIn)
                    setMessage("Login failed, using FREE download");

                Elements aNames = doc.getElementsByClass("jsShowDownload");
                if (!aNames.isEmpty())
                    reportFileName(aNames.get(0).ownText());

                final PostQuery pq = new PostQuery();
                final Map<String, String> hdr = new HashMap<String, String>();
                Elements eHiddens = freeForm.select("input[type=hidden]");

                hdr.put("X-Requested-With", "XMLHttpRequest");
                hdr.put("Referer", downloadLink);
                hdr.put("Accept", "application/json, text/javascript, */*; q=0.01");

                for (Element e : eHiddens)
                    pq.add(e.attr("name"), e.attr("value"));

                fetchPage("https://uloz.to/reloadXapca.php?rnd=" + Math.abs(new Random().nextInt()),
                        new PageFetchListener() {

                            @Override
                            public void onCompleted(ByteBuffer buf, Map<String, String> headers) {
                                CharBuffer cb = charsetUtf8.decode(buf);
                                String captchaUrl;

                                try {
                                    JSONObject json = new JSONObject(cb.toString());
                                    captchaUrl = "https:" + json.getString("image");
                                    pq.add("hash", json.getString("hash"));
                                    pq.add("timestamp", "" + json.getInt("timestamp"));
                                    pq.add("salt", "" + json.getInt("salt"));
                                } catch (JSONException e) {
                                    setFailed("Error parsing captcha JSON");
                                    return;
                                }

                                solveCaptcha(captchaUrl, new CaptchaListener() {

                                    @Override
                                    public void onFailed() {
                                        setFailed("Failed to decode the captcha code");
                                    }

                                    @Override
                                    public void onSolved(String text) {

                                        String action = freeForm.attr("action");
                                        pq.add("captcha_value", text);

                                        fetchPage("https://www.uloz.to" + action, new PageFetchListener() {

                                            @Override
                                            public void onCompleted(ByteBuffer buf,
                                                    Map<String, String> headers) {
                                                try {
                                                    CharBuffer cb = charsetUtf8.decode(buf);
                                                    JSONObject obj = new JSONObject(cb.toString());

                                                    startDownload(obj.getString("url"));
                                                } catch (Exception e) {
                                                    setFailed("" + e);
                                                }
                                            }

                                            @Override
                                            public void onFailed(String error) {
                                                setFailed(error);
                                            }

                                        }, pq.toString(), hdr);

                                    }
                                });
                            }

                            @Override
                            public void onFailed(String error) {
                                setFailed("Failed to load captcha AJAX page");
                            }

                        });

            } catch (Exception e) {
                e.printStackTrace();
                setFailed(e.toString());
            }
        }

        @Override
        public void onFailed(String error) {
            setFailed("Failed to load the initial page");
        }
    }, null);
}

From source file:codeu.chat.client.commandline.Chat.java

private List<String> findScript(String url) {
    List<String> elemLinks = new ArrayList<String>();
    try {//from   ww w . j a v  a 2 s  .c  o  m
        Document doc = Jsoup.connect(url).get(); // Make the request
        String elemLink, elemText;

        // Parse the search results
        Elements links = doc.select("a[href]");
        for (Element link : links) {
            elemLink = link.attr("href");
            elemText = link.text();

            /* Check if any scripts for a movie in this Google search were found.
               If so, add them to the links list */
            if ((elemLink.contains("script-o-rama") || elemLink.contains("springfieldspringfield"))
                    && !(elemText.equals("Cached") || elemText.equals("Similar"))) {
                elemLinks.add(elemLink);
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return elemLinks; // Return an empty string to indicate failure
}

From source file:feedzilla.Feed.java

private void parser(Element entry) {
    boolean source = false;
    for (Element element : entry.children()) {
        switch (element.nodeName()) {
        case "id":
            this.id = Integer.parseInt(element.text().split(":")[1]);
            break;
        case "title":
            if (source) {
                this.source_title = element.text();
            } else {
                this.title = element.text();
            }/*w  w w .j  a va 2  s.co  m*/
            break;
        case "summary":
            this.summary = element.text().split("<br")[0];
            break;
        case "published":
            this.published = element.text();
            break;
        case "updated":
            this.updated = element.text();
            break;
        case "author":
            this.author = element.text();
            break;
        case "link":
            if (source) {
                this.source_link = element.attr("href");
            } else {
                this.link = element.attr("href");
            }
            break;
        case "rights":
            this.copyright = element.text();
            break;
        case "source":
            source = true;
            break;
        default:
            Log.debug("Unknow TAG: " + element.nodeName());
            break;
        }
    }
}

From source file:de.geeksfactory.opacclient.apis.IOpac.java

private SearchField createSearchField(Element descTd, Element inputTd) {
    String name = descTd.select("span, blockquote").text().replace(":", "").trim().replace("\u00a0", "");
    if (inputTd.select("select").size() > 0 && !name.equals("Treffer/Seite") && !name.equals("Medientypen")
            && !name.equals("Medientyp") && !name.equals("Treffer pro Seite")) {
        Element select = inputTd.select("select").first();
        DropdownSearchField field = new DropdownSearchField();
        field.setDisplayName(name);//  w  w w  . j  a  v a  2 s  . c  o m
        field.setId(select.attr("name"));
        for (Element option : select.select("option")) {
            field.addDropdownValue(option.attr("value"), option.text());
        }
        return field;
    } else if (inputTd.select("input").size() > 0) {
        TextSearchField field = new TextSearchField();
        Element input = inputTd.select("input").first();
        field.setDisplayName(name);
        field.setId(input.attr("name"));
        field.setHint("");
        return field;
    } else {
        return null;
    }
}

From source file:org.shareok.data.sagedata.SageJournalIssueDateProcessor.java

public Map<String, Map<String, String>> updateSageJournalLinks(Map<String, Map<String, String>> journalMap) {
    Document doc = null;//w w w. j  a va  2s.  c om
    try {
        doc = Jsoup.connect("http://journals.sagepub.com/action/showPublications?pageSize=20&startPage=199")
                .userAgent(
                        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.152 Safari/537.36")
                .cookie("auth", "token").timeout(300000).get();
        Elements trs = doc.select("form#browsePublicationsForm").get(0).select("table").get(0).select("tbody")
                .get(0).select("tr");
        for (Element tr : trs) {
            Element link = tr.select("td").get(1).select("a").get(0);
            String journalName = link.text();
            String journalLink = SageDataUtil.SAGE_HTTP_PREFIX + link.attr("href");
            String[] linkInfo = journalLink.split("/");
            String journalIssuesLink = SageDataUtil.SAGE_HTTP_PREFIX + "/loi/" + linkInfo[linkInfo.length - 1];
            if (null == journalMap.get(journalName)) {
                Map<String, String> infoMap = new HashMap<>();
                infoMap.put("homeLink", journalLink);
                infoMap.put("issueLink", journalIssuesLink);
                journalMap.put(journalName, infoMap);
            } else {
                Map<String, String> infoMap = journalMap.get(journalName);
                if (null == infoMap.get("homeLink")) {
                    infoMap.put("homeLink", journalLink);
                }
                if (null == infoMap.get("issueLink")) {
                    infoMap.put("issueLink", journalIssuesLink);
                }
            }
        }
    } catch (Exception ex) {
        ex.printStackTrace();
    }
    return journalMap;
}

From source file:de.geeksfactory.opacclient.apis.Bibliotheca.java

protected SearchRequestResult parse_search(String html, int page) {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);//w w  w  .j a  v a  2s  . com
    Elements table = doc.select(".resulttab tr.result_trefferX, .resulttab tr.result_treffer");
    List<SearchResult> results = new ArrayList<>();
    for (int i = 0; i < table.size(); i++) {
        Element tr = table.get(i);
        SearchResult sr = new SearchResult();
        int contentindex = 1;
        if (tr.select("td a img").size() > 0) {
            String[] fparts = tr.select("td a img").get(0).attr("src").split("/");
            String fname = fparts[fparts.length - 1];
            if (data.has("mediatypes")) {
                try {
                    sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname)));
                } catch (JSONException | IllegalArgumentException e) {
                    sr.setType(defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "")
                            .replace(".gif", "").replace(".png", "")));
                }
            } else {
                sr.setType(defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "")
                        .replace(".gif", "").replace(".png", "")));
            }
        } else {
            if (tr.children().size() == 3) {
                contentindex = 2;
            }
        }
        sr.setInnerhtml(tr.child(contentindex).child(0).html());

        sr.setNr(i);
        Element link = tr.child(contentindex).select("a").first();
        try {
            if (link != null && link.attr("href").contains("detmediennr")) {
                Map<String, String> params = getQueryParamsFirst(link.attr("abs:href"));
                String nr = params.get("detmediennr");
                if (Integer.parseInt(nr) > i + 1) {
                    // Seems to be an ID
                    if (params.get("detDB") != null) {
                        sr.setId("&detmediennr=" + nr + "&detDB=" + params.get("detDB"));
                    } else {
                        sr.setId("&detmediennr=" + nr);
                    }
                }
            }
        } catch (Exception e) {
        }
        try {
            if (tr.child(1).childNode(0) instanceof Comment) {
                Comment c = (Comment) tr.child(1).childNode(0);
                String comment = c.getData().trim();
                String id = comment.split(": ")[1];
                sr.setId(id);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        results.add(sr);
    }
    int results_total = -1;
    if (doc.select(".result_gefunden").size() > 0) {
        try {
            results_total = Integer.parseInt(
                    doc.select(".result_gefunden").text().trim().replaceAll(".*[^0-9]+([0-9]+).*", "$1"));
        } catch (NumberFormatException e) {
            e.printStackTrace();
            results_total = -1;
        }
    }
    return new SearchRequestResult(results, results_total, page);
}

From source file:de.geeksfactory.opacclient.apis.IOpac.java

@Override
public List<SearchField> getSearchFields() throws IOException {
    List<SearchField> fields = new ArrayList<>();

    // Extract all search fields, except media types
    String html;/*from w w w .  jav a 2s  .  c om*/
    try {
        html = httpGet(opac_url + dir + "/search_expert.htm", getDefaultEncoding());
    } catch (NotReachableException e) {
        html = httpGet(opac_url + dir + "/iopacie.htm", getDefaultEncoding());
    }
    Document doc = Jsoup.parse(html);
    Elements trs = doc.select("form tr:has(input:not([type=submit], [type=reset])), form tr:has(select)");
    for (Element tr : trs) {
        Elements tds = tr.children();
        if (tds.size() == 4) {
            // Two search fields next to each other in one row
            SearchField field1 = createSearchField(tds.get(0), tds.get(1));
            SearchField field2 = createSearchField(tds.get(2), tds.get(3));
            if (field1 != null) {
                fields.add(field1);
            }
            if (field2 != null) {
                fields.add(field2);
            }
        } else if (tds.size() == 2 || (tds.size() == 3 && tds.get(2).children().size() == 0)) {
            SearchField field = createSearchField(tds.get(0), tds.get(1));
            if (field != null) {
                fields.add(field);
            }
        }
    }

    if (fields.size() == 0 && doc.select("[name=sleStichwort]").size() > 0) {
        TextSearchField field = new TextSearchField();
        Element input = doc.select("input[name=sleStichwort]").first();
        field.setDisplayName(stringProvider.getString(StringProvider.FREE_SEARCH));
        field.setId(input.attr("name"));
        field.setHint("");
        fields.add(field);
    }

    // Extract available media types.
    // We have to parse JavaScript. Doing this with RegEx is evil.
    // But not as evil as including a JavaScript VM into the app.
    // And I honestly do not see another way.
    Pattern pattern_key = Pattern.compile("mtyp\\[[0-9]+\\]\\[\"typ\"\\] = \"([^\"]+)\";");
    Pattern pattern_value = Pattern.compile("mtyp\\[[0-9]+\\]\\[\"bez\"\\] = \"([^\"]+)\";");

    DropdownSearchField mtyp = new DropdownSearchField();
    try {
        try {
            html = httpGet(opac_url + dir + "/mtyp.js", getDefaultEncoding());
        } catch (NotReachableException e) {
            html = httpGet(opac_url + "/mtyp.js", getDefaultEncoding());
        }

        String[] parts = html.split("new Array\\(\\);");
        for (String part : parts) {
            Matcher matcher1 = pattern_key.matcher(part);
            String key = "";
            String value = "";
            if (matcher1.find()) {
                key = matcher1.group(1);
            }
            Matcher matcher2 = pattern_value.matcher(part);
            if (matcher2.find()) {
                value = matcher2.group(1);
            }
            if (!value.equals("")) {
                mtyp.addDropdownValue(key, value);
            }
        }
    } catch (IOException e) {
        try {
            html = httpGet(opac_url + dir + "/frames/search_form.php?bReset=1?bReset=1", getDefaultEncoding());
            doc = Jsoup.parse(html);

            for (Element opt : doc.select("#imtyp option")) {
                mtyp.addDropdownValue(opt.attr("value"), opt.text());
            }

        } catch (IOException e1) {
            e1.printStackTrace();
        }

    }
    if (mtyp.getDropdownValues() != null && !mtyp.getDropdownValues().isEmpty()) {
        mtyp.setDisplayName("Medientypen");
        mtyp.setId("Medientyp");
        fields.add(mtyp);
    }
    return fields;
}