Example usage for org.jsoup.nodes Element child

List of usage examples for org.jsoup.nodes Element child

Introduction

In this page you can find the example usage for org.jsoup.nodes Element child.

Prototype

public Element child(int index) 

Source Link

Document

Get a child element of this element, by its 0-based index number.

Usage

From source file:org.asqatasun.rules.elementselector.ImageElementSelector.java

/**
 * An link is seen as composite when it is composed with more than one 
 * element. The tested element has at least one image. If the text is different
 * from the one of the child element, the link is composite by definition. 
 * It the text is identical, we check whether the current element has more
 * than 1 child./*from w ww . j av  a 2s .  c  o m*/
 * @param imageParent
 * @return whether the current image is a composite link.
 */
private boolean isCompositeLink(Element imageParent, Element image) {
    if (imageParent == null) {
        return false;
    }
    if (!StringUtils.equals(imageParent.text(), image.text())) {
        return true;
    }
    if (imageParent.children().size() == 1) {
        return isCompositeLink(imageParent.child(0), image);
    } else if (imageParent.children().size() > 1) {
        return true;
    }
    return false;
}

From source file:org.trec.liveqa.GetYAnswersPropertiesFromQid.java

/**
 * /* ww  w .ja v a 2  s.c  o m*/
 * @param iQid question ID
 * @return map of features and attributes: question title, body, category, best answer, date
 * @throws Exception
 */
public static Map<String, String> extractData(String iQid) throws Exception {

    Map<String, String> res = new LinkedHashMap<>();
    res.put("qid", iQid);

    // parse date from qid
    res.put("Date", DATE_FORMAT.parse(iQid.substring(0, 14)).toString());

    // get and mine html page
    String url = URL_PREFIX + iQid;
    HttpClient client = new HttpClient();
    GetMethod method = new GetMethod(url);
    method.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
            new DefaultHttpMethodRetryHandler(3, false));
    try {
        int statusCode = client.executeMethod(method);
        if (statusCode != HttpStatus.SC_OK) {
            System.err.println("Method failed: " + method.getStatusLine());
        }
        InputStream responseBody = method.getResponseBodyAsStream();

        // strip top levels
        Document doc = Jsoup.parse(responseBody, "UTF8", url);
        Element html = doc.child(0);

        Element body = html.child(1);
        Element head = html.child(0);

        // get category
        res.put("Top level Category", findElementText(body, cc));

        // get title
        res.put("Title", findElementText(head, ct));

        // get body
        res.put("Body", findElementText(head, cb));

        // get keywords
        res.put("Keywords", findElementText(head, ck));

        // get best answer
        Element best_answer_div = html.select("div#ya-best-answer").first();
        if (best_answer_div != null) {
            res.put("Best Answer", findElementText(best_answer_div, cba));
        }

        responseBody.close();

    } catch (HttpException e) {
        System.err.println("Fatal protocol violation: " + e.getMessage());
        e.printStackTrace();
    } catch (IOException e) {
        System.err.println("Fatal transport error: " + e.getMessage());
        e.printStackTrace();
    } finally {
        method.releaseConnection();
    }

    return res;
}

From source file:poe.trade.assist.UniquesListSearchGenerator.java

/**imgurl, reqLvl, base, mod
 * @param args//from ww w .  java2 s . c  om
 * @throws Exception 
 */
public static void main(String[] args) throws Exception {
    List<String> outputLines = new LinkedList<>();
    outputLines.add(
            "Name   Art   Req.Level   Base   Mods   TaslismanSC   TalismanHC   Standard   Hardcore   poewiki");
    for (String list : lists) {
        HttpResponse<String> response = Unirest.get("http://pathofexile.gamepedia.com/" + list)
                .header("User-Agent",
                        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0")
                .asString();
        Document doc = Jsoup.parse(response.getBody());
        Elements elems = doc.select("table.wikitable.sortable");
        for (Element table : elems) {
            Elements rows = table.select("tr");
            int ctr = 0;
            boolean hasRequiredLevel = false;
            for (Element row : rows) {
                if (ctr == 0) { // first row is headers
                    hasRequiredLevel = !row.select("abbr[title=\"Required Level\"]").isEmpty();
                    ctr++;
                    continue;
                }
                String name = row.child(0).child(0).attr("title");
                System.out.println("Now processing: " + name);
                String imgurl = "=IMAGE(\"" + row.select("img").attr("src") + "\", 3)";
                String base = row.child(1).child(0).attr("title");
                String reqLvl = hasRequiredLevel ? row.child(2).text() : "0";
                reqLvl = reqLvl.equalsIgnoreCase("n/a") ? "0" : reqLvl;
                String mod = "=\"";
                Elements mods = row.select("span.itemboxstatsgroup.text-mod");
                if (!mods.isEmpty()) {
                    if (mods.size() > 2)
                        throw new Exception("mods.size() is > 2. " + name + " - " + mods.toString());
                    boolean hasImplicit = mods.size() > 1;
                    String imp = hasImplicit ? mods.get(0).text() : "";
                    int expIdx = hasImplicit ? 1 : 0;
                    String lineSeparator = "\"&CHAR(10)&\"";
                    String exp = mods.get(expIdx).textNodes().stream().map(n -> n.text().trim())
                            .filter(s -> !s.isEmpty()).collect(Collectors.joining(lineSeparator));
                    String additionalExp = mods.get(expIdx).children().stream().filter(e -> e.hasText())
                            .map(e -> e.text().trim()).collect(Collectors.joining(lineSeparator));
                    if (additionalExp != null && !additionalExp.isEmpty())
                        exp += lineSeparator + additionalExp;
                    mod += imp;
                    if (hasImplicit)
                        mod += (lineSeparator + "--------------" + lineSeparator);
                    mod += exp;
                }
                mod += "\"";

                String standard = "Standard";
                String hardcore = "Hardcore";
                String tempsc = "Talisman";
                String temphc = "Talisman+Hardcore";
                String nameenc = URLEncoder.encode(name, "UTF-8");
                String sc = hyperlink(getSearchURL(standard, nameenc));
                String hc = hyperlink(getSearchURL(hardcore, nameenc));
                String tsc = hyperlink(getSearchURL(tempsc, nameenc));
                String thc = hyperlink(getSearchURL(temphc, nameenc));
                String poewikiurl = hyperlink("http://pathofexile.gamepedia.com/" + (name.replace(' ', '_')));

                String s = format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s", name, imgurl, reqLvl, base, mod,
                        tsc, thc, sc, hc, poewikiurl);
                outputLines.add(s);
                Thread.sleep(1000);
            }
        }
    }
    FileUtils.writeLines(new File("uniqueslist.txt"), outputLines);
}