Example usage for org.jsoup.nodes Document body

List of usage examples for org.jsoup.nodes Document body

Introduction

In this page you can find the example usage for org.jsoup.nodes Document body.

Prototype

public Element body() 

Source Link

Document

Accessor to the document's body element.

Usage

From source file:sachin.bws.site.Template.java

public String getTemplate(String pageSource) {
    Document doc = Jsoup.parse(pageSource);
    String bodyClass[] = doc.body().attr("class").split("\\u0020");
    List<String> classes = Arrays.asList(bodyClass);
    Collections.reverse(classes);
    for (String t : classes) {
        if (templates.contains(t)) {
            return t;
        }// www . j av  a 2 s  .com
    }
    return "****";
}

From source file:utils.AutoLinkRenderer.java

private AutoLinkRenderer parse(Pattern pattern, ToLink toLink) {
    Document doc = Jsoup.parse(body);

    Document.OutputSettings settings = doc.outputSettings();
    settings.prettyPrint(false);//from   w ww  .  j  av a 2 s  .  com

    Elements elements = doc.getElementsMatchingOwnText(pattern);

    for (Element el : elements) {
        if (isIgnoreElement(el)) {
            continue;
        }

        List<TextNode> textNodeList = el.textNodes();

        for (TextNode node : textNodeList) {
            String result = convertLink(node.toString(), pattern, toLink);
            node.text(StringUtils.EMPTY);
            node.after(result);
        }
    }

    this.body = doc.body().html();
    return this;
}

From source file:xxx.web.comments.debates.impl.ProConOrgCommentsParser.java

@Override
public Debate parseDebate(InputStream inputStream) throws IOException {
    Debate result = new Debate();

    Document doc = Jsoup.parse(inputStream, "UTF-8", "http://www.procon.org/");

    // Set the Url of the doc

    // title//from   www.j a v a  2 s  .c  o  m
    Element body = doc.body();

    Map<String, Elements> proConElements = new HashMap<>();
    proConElements.put("pro", body.select("div[class=column pro]"));
    proConElements.put("con", body.select("div[class=column con]"));

    //        Elements pro = body.select("div[class=column pro]");
    //        System.out.println(pro);

    // title
    result.setTitle(Utils.normalize(body.select("h2").text()));

    for (Map.Entry<String, Elements> entry : proConElements.entrySet()) {
        // stance
        String stance = entry.getKey();

        Elements comments = entry.getValue().select("ul.comments > li[class^=comment]");

        for (Element element : comments) {

            Element divContent = element.select("div.contents").iterator().next();

            // extract argument content
            Argument argument = extractArgumentFromDivContent(divContent);
            // extract ID
            String parentId = element.attr("id").replace(":", "_");

            if (parentId == null) {
                throw new IllegalStateException("Parent id must be known");
            }

            argument.setId(parentId);
            // set stance - we know it
            argument.setStance(stance);

            result.getArgumentList().add(argument);

            Elements divReplies = element.select("li[class^=reply]");

            //                System.out.println(divReplies.size());

            for (Element divReply : divReplies) {

                Element replyDivContent = divReply.select("div.contents").iterator().next();

                // extract reply argument
                Argument replyArgument = extractArgumentFromDivContent(replyDivContent);

                // set id and parentId
                String id = element.attr("id").replace(":", "_");
                replyArgument.setId(id);

                if (id == null) {
                    throw new IllegalStateException("Id must be known");
                }

                replyArgument.setParentId(parentId);

                // add to debate
                result.getArgumentList().add(replyArgument);
            }

        }
    }

    return result;
}

From source file:xxx.web.comments.debates.impl.ProConOrgParser.java

@Override
public Debate parseDebate(InputStream inputStream) throws IOException {
    Debate result = new Debate();

    Document doc = Jsoup.parse(inputStream, "UTF-8", "http://www.procon.org/");

    // Set the Url of the doc

    // title/* www  .  j  av  a  2s  . c o  m*/
    Element body = doc.body();
    Elements debateTitleElements = body.select("h2");
    //        Elements debateTitleElements = body.select("p[class=title]").select("p[style]");

    if (debateTitleElements.first() == null) {
        // not a debate
        return null;
    }

    String title = Utils.normalize(debateTitleElements.first().text());
    result.setTitle(title);

    Elements proConTr = body.select("tr > td > b:contains(PRO \\(yes\\))");

    if (proConTr == null || proConTr.parents() == null || proConTr.parents().first() == null
            || proConTr.parents().first().parents() == null
            || proConTr.parents().first().parents().first() == null
            || proConTr.parents().first().parents().first().nextElementSibling() == null) {
        // not a pro-con debate
        return null;
    }

    Element trAnswers = proConTr.parents().first().parents().first().nextElementSibling();

    // the PRO side
    Element proTd = trAnswers.select("td").get(0);
    Element conTd = trAnswers.select("td").get(1);

    //        System.out.println(proTd.select("blockquote").size());
    //        System.out.println(conTd.select("blockquote").size());

    for (Element text : proTd.select("blockquote > div[class=editortext]")) {
        Argument argument = new Argument();
        argument.setStance("pro");
        argument.setText(extractPlainTextFromTextElement(text));
        argument.setOriginalHTML(text.html());

        // set ID
        idCounter++;
        argument.setId("pcq_" + idCounter);

        if (!argument.getText().isEmpty()) {
            result.getArgumentList().add(argument);
        } else {
            System.err.println("Failed to extract text from " + text.html());
        }
    }

    for (Element text : conTd.select("blockquote > div[class=editortext]")) {
        Argument argument = new Argument();
        argument.setStance("con");
        argument.setText(extractPlainTextFromTextElement(text));
        argument.setOriginalHTML(text.html());

        idCounter++;
        argument.setId("pcq_" + idCounter);

        if (!argument.getText().isEmpty()) {
            result.getArgumentList().add(argument);
        } else {
            System.err.println("Failed to extract text from " + text.html());
        }
    }

    // show some stats:
    Map<String, Integer> map = new HashMap<>();
    map.put("pro", 0);
    map.put("con", 0);
    for (Argument argument : result.getArgumentList()) {
        map.put(argument.getStance(), map.get(argument.getStance()) + 1);
    }
    System.out.println(map);

    return result;
}