Example usage for org.jsoup.nodes Element getAllElements

List of usage examples for org.jsoup.nodes Element getAllElements

Introduction

In this page you can find the example usage for org.jsoup.nodes Element getAllElements.

Prototype

public Elements getAllElements() 

Source Link

Document

Find all elements under this element (including self, and children of children).

Usage

From source file:org.norvelle.addressdiscoverer.parse.structured.StructuredPageWebContactLink.java

/**
 * Attempt to find a URL-type link associated with the given Jsoup Element,
 * by looking at all the HREF attributes of the various subelements.
 * /* w ww  . j ava 2  s  . co  m*/
 * @param element
 * @throws DoesNotContainContactLinkException
 * @throws MultipleContactLinksOfSameTypeFoundException 
 */
public StructuredPageWebContactLink(Element element)
        throws DoesNotContainContactLinkException, MultipleContactLinksOfSameTypeFoundException {
    super(element);
    ArrayList<String> hrefs = new ArrayList();
    Elements elements = element.getAllElements();
    for (Element child : elements) {
        if (child.hasAttr("href")) {
            String href = child.attr("href");
            if (!href.startsWith("mailto:"))
                hrefs.add(href);
        }
    }

    if (hrefs.isEmpty())
        throw new DoesNotContainContactLinkException();
    else if (hrefs.size() > 1)
        throw new MultipleContactLinksOfSameTypeFoundException("Multiple web links");
    this.address = hrefs.get(0);
}

From source file:org.xwiki.validator.HTML5DutchWebGuidelinesValidator.java

/**
 * Images placed in a link should have a non-empty text alternative to enable visitors who do not see the image to
 * follow the link.// ww w .j a va2s.  c  o m
 */
public void validateRpd7s4() {
    for (Element link : getElements(ELEM_LINK)) {

        // Look for images in the link.
        boolean hasNonEmptyAlt = false;
        for (Element child : getChildren(link, ELEM_IMG)) {
            if (StringUtils.isNotEmpty(getAttributeValue(child, ATTR_ALT))) {
                hasNonEmptyAlt = true;
            }
        }

        // Look for text in the link.
        boolean hasText = false;
        for (Element linkChild : link.getAllElements()) {
            if (linkChild.hasText()) {
                hasText = true;
            }
        }

        // Images in links must have a not empty alt attribute if there's no text in the link.
        assertTrue(Type.ERROR, "rpd7s4.links", hasNonEmptyAlt || hasText);
    }
}

From source file:org.xwiki.validator.HTML5DutchWebGuidelinesValidator.java

/**
 * Do not describe the mechanism behind following a link.
 *//*from   w  w w .  j a  va2 s  .  c  om*/
public void validateRpd8s1() {
    List<String> forbiddenLinkTexts = Arrays.asList(messages.getString("rpd8s1.forbiddenLinkTexts").split(","));

    for (Element link : getElements(ELEM_LINK)) {
        for (Element linkChild : link.getAllElements()) {
            if (linkChild.hasText()) {
                for (String forbiddenLinkText : forbiddenLinkTexts) {
                    assertFalse(Type.ERROR, "rpd8s1.link",
                            StringUtils.containsIgnoreCase(linkChild.text(), forbiddenLinkText));
                }
            }
        }
    }
}

From source file:us.colloquy.sandbox.TestExtractor.java

@Test
public void useJsoup() {

    String homeDir = System.getProperty("user.home");

    System.out.println(homeDir);/*from  www.ja  va 2  s  . com*/

    //JSOUP API allows to extract all  elements of letters in files

    // File input = new File("samples/OEBPS/Text/0001_1006_2001.xhtml");

    File input = new File("samples/pisma-1904/OEBPS/Text/single_doc.html");

    try {
        Document doc = Jsoup.parse(input, "UTF-8");

        List<Letter> letters = new ArrayList<>(); //our model contains only a subset of fields

        String previousYear = "";

        for (Element element : doc.getElementsByClass("section")) {
            Letter letter = new Letter();

            StringBuilder content = new StringBuilder();

            for (Element child : element.children()) {

                for (Attribute att : child.attributes()) {
                    System.out.println(att.getKey() + " " + att.getValue());
                }

                if ("center".equalsIgnoreCase(child.className())) {
                    String toWhom = child.getElementsByTag("strong").text();

                    if (StringUtils.isEmpty(toWhom)) {
                        toWhom = child.text();
                        // System.out.println(toWhom);
                    }

                    String[] toWhomArray = toWhom.split("(\\s\\s)|(,)");

                    for (String to : toWhomArray) {
                        RussianDate.parseToWhom(letter, to); //here we need to recognize a russian name and store that but for now we store the content
                    }

                    //check if there is anything else here and find date and place - it will be replaced if exists below

                    String entireText = child.text();

                    String tail = entireText.replace(toWhom, "");

                    if (StringUtils.isNotEmpty(tail)) {
                        RussianDate.parseDateAndPlace(letter, tail, previousYear); //a parser that figures out date and place if they are present
                    }

                    // System.out.println("two whom\t " +  child.getElementsByTag("strong").text() );

                } else if ("Data".equalsIgnoreCase(child.className())) {

                    if (child.getElementsByTag("em") != null
                            && StringUtils.isNotEmpty(child.getElementsByTag("em").text())) {
                        RussianDate.parseDateAndPlace(letter, child.getElementsByTag("em").text(),
                                previousYear); //most often date and place are enclosed in em tag

                        if (letter.getDate() != null) {
                            LocalDate localDate = letter.getDate().toInstant().atZone(ZoneId.systemDefault())
                                    .toLocalDate();
                            int year = localDate.getYear();
                            previousYear = year + "";
                        }
                    }

                    // System.out.println("when and where\t " + child.getElementsByTag("em").text());

                } else if ("petit".equalsIgnoreCase(child.className())
                        || "Textpetit_otstup".equalsIgnoreCase(child.className())) {
                    letter.getNotes().add(child.text());

                } else {
                    //System.out.println(child.text() );

                    Elements elements = child.getElementsByTag("sup");

                    for (Element e : elements) {
                        String value = e.text();

                        e.replaceWith(new TextNode("[" + value + "]", null));
                    }

                    for (Element el : child.getAllElements()) {
                        // System.out.println(el.tagName());
                        if ("sup".equalsIgnoreCase(el.tagName())) {
                            content.append(" [" + el.text() + "] ");
                        } else {
                            content.append(el.text());
                        }

                    }

                    content.append("\n");

                }

                //                  System.out.println(child.tag() + "\n" );
                //                  System.out.println(child.outerHtml() + "\n" + child.text());
            }

            letter.setContent(content.toString());
            letters.add(letter);
        }

        ObjectWriter ow = new com.fasterxml.jackson.databind.ObjectMapper().writer().withDefaultPrettyPrinter();

        for (Letter letter : letters) {
            //                if (letter.getDate() == null)
            //                {

            //                        if (StringUtils.isNotEmpty(person.getLastName()))
            //                        {
            String json = ow.writeValueAsString(letter);

            System.out.println(json);
            //                        }

            //}

        }

    } catch (IOException e) {
        e.printStackTrace();
    }

}