List of usage examples for org.jsoup.nodes Element attributes
Attributes attributes
To view the source code for org.jsoup.nodes Element attributes.
Click Source Link
From source file:uk.co.certait.htmlexporter.writer.AbstractTableCellWriter.java
/** * // w w w . j av a2s.co m * @param element * * @return */ protected String getFunctionOutputReference(Element element) { String functionOutputGroup = null; for (Attribute attribute : element.attributes()) { if (attribute.getKey().equalsIgnoreCase(DATA_GROUP_OUTPUT_ATTRIBUTE)) { functionOutputGroup = attribute.getValue(); break; } } return functionOutputGroup; }
From source file:uk.co.certait.htmlexporter.writer.AbstractTableCellWriter.java
/** * /* w ww . j a v a 2 s.co m*/ * @param element * @return */ protected String getCellCommentText(Element element) { String commentText = null; for (Attribute attribute : element.attributes()) { if (attribute.getKey().equalsIgnoreCase(DATA_CELL_COMMENT_ATTRIBUTE)) { commentText = attribute.getValue(); break; } } return StringUtils.trimToNull(commentText); }
From source file:uk.co.certait.htmlexporter.writer.AbstractTableCellWriter.java
/** * Return the Dimension for the cell comment. Return a Dimension of 3,1 if * the dimension attribute is not present or has an invalid value. * //from w w w . ja v a 2 s .co m * @return */ protected Dimension getCellCommentDimension(Element element) { Dimension dimension = null; for (Attribute attribute : element.attributes()) { if (attribute.getKey().equalsIgnoreCase(DATA_CELL_COMMENT_DIMENSION_ATTRIBUTE)) { try { dimension = new Dimension(attribute.getValue()); } catch (IllegalArgumentException ex) { dimension = new Dimension(3, 1); } } } return dimension != null ? dimension : new Dimension(3, 1); }
From source file:us.colloquy.sandbox.TestExtractor.java
@Test public void useJsoup() { String homeDir = System.getProperty("user.home"); System.out.println(homeDir);/*w w w. j a v a2s. c om*/ //JSOUP API allows to extract all elements of letters in files // File input = new File("samples/OEBPS/Text/0001_1006_2001.xhtml"); File input = new File("samples/pisma-1904/OEBPS/Text/single_doc.html"); try { Document doc = Jsoup.parse(input, "UTF-8"); List<Letter> letters = new ArrayList<>(); //our model contains only a subset of fields String previousYear = ""; for (Element element : doc.getElementsByClass("section")) { Letter letter = new Letter(); StringBuilder content = new StringBuilder(); for (Element child : element.children()) { for (Attribute att : child.attributes()) { System.out.println(att.getKey() + " " + att.getValue()); } if ("center".equalsIgnoreCase(child.className())) { String toWhom = child.getElementsByTag("strong").text(); if (StringUtils.isEmpty(toWhom)) { toWhom = child.text(); // System.out.println(toWhom); } String[] toWhomArray = toWhom.split("(\\s\\s)|(,)"); for (String to : toWhomArray) { RussianDate.parseToWhom(letter, to); //here we need to recognize a russian name and store that but for now we store the content } //check if there is anything else here and find date and place - it will be replaced if exists below String entireText = child.text(); String tail = entireText.replace(toWhom, ""); if (StringUtils.isNotEmpty(tail)) { RussianDate.parseDateAndPlace(letter, tail, previousYear); //a parser that figures out date and place if they are present } // System.out.println("two whom\t " + child.getElementsByTag("strong").text() ); } else if ("Data".equalsIgnoreCase(child.className())) { if (child.getElementsByTag("em") != null && StringUtils.isNotEmpty(child.getElementsByTag("em").text())) { RussianDate.parseDateAndPlace(letter, child.getElementsByTag("em").text(), previousYear); //most often date and place are enclosed in em tag if (letter.getDate() != null) { LocalDate localDate = letter.getDate().toInstant().atZone(ZoneId.systemDefault()) .toLocalDate(); int year = localDate.getYear(); previousYear = year + ""; } } // System.out.println("when and where\t " + child.getElementsByTag("em").text()); } else if ("petit".equalsIgnoreCase(child.className()) || "Textpetit_otstup".equalsIgnoreCase(child.className())) { letter.getNotes().add(child.text()); } else { //System.out.println(child.text() ); Elements elements = child.getElementsByTag("sup"); for (Element e : elements) { String value = e.text(); e.replaceWith(new TextNode("[" + value + "]", null)); } for (Element el : child.getAllElements()) { // System.out.println(el.tagName()); if ("sup".equalsIgnoreCase(el.tagName())) { content.append(" [" + el.text() + "] "); } else { content.append(el.text()); } } content.append("\n"); } // System.out.println(child.tag() + "\n" ); // System.out.println(child.outerHtml() + "\n" + child.text()); } letter.setContent(content.toString()); letters.add(letter); } ObjectWriter ow = new com.fasterxml.jackson.databind.ObjectMapper().writer().withDefaultPrettyPrinter(); for (Letter letter : letters) { // if (letter.getDate() == null) // { // if (StringUtils.isNotEmpty(person.getLastName())) // { String json = ow.writeValueAsString(letter); System.out.println(json); // } //} } } catch (IOException e) { e.printStackTrace(); } }