Example usage for org.jsoup.nodes Element attributes

List of usage examples for org.jsoup.nodes Element attributes

Introduction

In this page you can find the example usage for org.jsoup.nodes Element attributes.

Prototype

Attributes attributes

To view the source code for org.jsoup.nodes Element attributes.

Click Source Link

Usage

From source file:uk.co.certait.htmlexporter.writer.AbstractTableCellWriter.java

/**
 * // w  w w . j  av  a2s.co  m
 * @param element
 * 
 * @return
 */
protected String getFunctionOutputReference(Element element) {
    String functionOutputGroup = null;

    for (Attribute attribute : element.attributes()) {
        if (attribute.getKey().equalsIgnoreCase(DATA_GROUP_OUTPUT_ATTRIBUTE)) {
            functionOutputGroup = attribute.getValue();
            break;
        }
    }

    return functionOutputGroup;
}

From source file:uk.co.certait.htmlexporter.writer.AbstractTableCellWriter.java

/**
 * /* w ww .  j  a  v  a  2 s.co  m*/
 * @param element
 * @return
 */
protected String getCellCommentText(Element element) {
    String commentText = null;

    for (Attribute attribute : element.attributes()) {
        if (attribute.getKey().equalsIgnoreCase(DATA_CELL_COMMENT_ATTRIBUTE)) {
            commentText = attribute.getValue();
            break;
        }
    }

    return StringUtils.trimToNull(commentText);
}

From source file:uk.co.certait.htmlexporter.writer.AbstractTableCellWriter.java

/**
 * Return the Dimension for the cell comment. Return a Dimension of 3,1 if
 * the dimension attribute is not present or has an invalid value.
 * //from   w w  w  .  ja v a  2 s  .co m
 * @return
 */
protected Dimension getCellCommentDimension(Element element) {
    Dimension dimension = null;

    for (Attribute attribute : element.attributes()) {
        if (attribute.getKey().equalsIgnoreCase(DATA_CELL_COMMENT_DIMENSION_ATTRIBUTE)) {
            try {
                dimension = new Dimension(attribute.getValue());
            } catch (IllegalArgumentException ex) {
                dimension = new Dimension(3, 1);
            }
        }
    }

    return dimension != null ? dimension : new Dimension(3, 1);
}

From source file:us.colloquy.sandbox.TestExtractor.java

@Test
public void useJsoup() {

    String homeDir = System.getProperty("user.home");

    System.out.println(homeDir);/*w  w  w. j a v a2s.  c om*/

    //JSOUP API allows to extract all  elements of letters in files

    // File input = new File("samples/OEBPS/Text/0001_1006_2001.xhtml");

    File input = new File("samples/pisma-1904/OEBPS/Text/single_doc.html");

    try {
        Document doc = Jsoup.parse(input, "UTF-8");

        List<Letter> letters = new ArrayList<>(); //our model contains only a subset of fields

        String previousYear = "";

        for (Element element : doc.getElementsByClass("section")) {
            Letter letter = new Letter();

            StringBuilder content = new StringBuilder();

            for (Element child : element.children()) {

                for (Attribute att : child.attributes()) {
                    System.out.println(att.getKey() + " " + att.getValue());
                }

                if ("center".equalsIgnoreCase(child.className())) {
                    String toWhom = child.getElementsByTag("strong").text();

                    if (StringUtils.isEmpty(toWhom)) {
                        toWhom = child.text();
                        // System.out.println(toWhom);
                    }

                    String[] toWhomArray = toWhom.split("(\\s\\s)|(,)");

                    for (String to : toWhomArray) {
                        RussianDate.parseToWhom(letter, to); //here we need to recognize a russian name and store that but for now we store the content
                    }

                    //check if there is anything else here and find date and place - it will be replaced if exists below

                    String entireText = child.text();

                    String tail = entireText.replace(toWhom, "");

                    if (StringUtils.isNotEmpty(tail)) {
                        RussianDate.parseDateAndPlace(letter, tail, previousYear); //a parser that figures out date and place if they are present
                    }

                    // System.out.println("two whom\t " +  child.getElementsByTag("strong").text() );

                } else if ("Data".equalsIgnoreCase(child.className())) {

                    if (child.getElementsByTag("em") != null
                            && StringUtils.isNotEmpty(child.getElementsByTag("em").text())) {
                        RussianDate.parseDateAndPlace(letter, child.getElementsByTag("em").text(),
                                previousYear); //most often date and place are enclosed in em tag

                        if (letter.getDate() != null) {
                            LocalDate localDate = letter.getDate().toInstant().atZone(ZoneId.systemDefault())
                                    .toLocalDate();
                            int year = localDate.getYear();
                            previousYear = year + "";
                        }
                    }

                    // System.out.println("when and where\t " + child.getElementsByTag("em").text());

                } else if ("petit".equalsIgnoreCase(child.className())
                        || "Textpetit_otstup".equalsIgnoreCase(child.className())) {
                    letter.getNotes().add(child.text());

                } else {
                    //System.out.println(child.text() );

                    Elements elements = child.getElementsByTag("sup");

                    for (Element e : elements) {
                        String value = e.text();

                        e.replaceWith(new TextNode("[" + value + "]", null));
                    }

                    for (Element el : child.getAllElements()) {
                        // System.out.println(el.tagName());
                        if ("sup".equalsIgnoreCase(el.tagName())) {
                            content.append(" [" + el.text() + "] ");
                        } else {
                            content.append(el.text());
                        }

                    }

                    content.append("\n");

                }

                //                  System.out.println(child.tag() + "\n" );
                //                  System.out.println(child.outerHtml() + "\n" + child.text());
            }

            letter.setContent(content.toString());
            letters.add(letter);
        }

        ObjectWriter ow = new com.fasterxml.jackson.databind.ObjectMapper().writer().withDefaultPrettyPrinter();

        for (Letter letter : letters) {
            //                if (letter.getDate() == null)
            //                {

            //                        if (StringUtils.isNotEmpty(person.getLastName()))
            //                        {
            String json = ow.writeValueAsString(letter);

            System.out.println(json);
            //                        }

            //}

        }

    } catch (IOException e) {
        e.printStackTrace();
    }

}