List of utility methods to do HTML to Text
String | html2text(final String html) Converts HTML to plaintext. Document document = Jsoup.parse(html); document.select("br").append("\\n"); document.select("p").prepend("\\n\\n"); return document.text().replaceAll("\\\\n", "\n"); |
String | html2text(String html) htmltext return Jsoup.parse(html).text();
|
String | html2text(String htmlStr) Converts html content to text. if ((null == htmlStr) || (htmlStr.isEmpty())) { throw new IllegalArgumentException("The input html string was null or empty"); return Jsoup.parse(htmlStr).text(); |
String | text(Element e) Fetches the text of an element but preserves newlines. checkNotNull(e, "e should not be null."); e.select("br").append("\\n"); e.select("p").prepend("\\n\\n"); return e.text().replaceAll("\\\\n", "\n").trim(); |
String | text(Element element) text final StringBuilder accum = new StringBuilder(); new NodeTraversor(new NodeVisitor() { public void head(Node node, int depth) { if (node instanceof TextNode) { TextNode textNode = (TextNode) node; String str = textNode.getWholeText(); str = WHITESPACE_BLOCK.matcher(str).replaceAll(" "); accum.append(str); ... |
String | textOf(final Element el) text Of final StringBuilder accum = new StringBuilder(); new NodeTraversor(new NodeVisitor() { public void head(final Node node, final int depth) { if (node instanceof TextNode) { TextNode textNode = (TextNode) node; accum.append(textNode.text()); } else if (node instanceof Element) { Element element = (Element) node; ... |
Element | toElement(String html) Converts an HTML string to an HTML element. return toElement(html, null);
|
Element | toHtmlByHtml(String html) insert html tag in the top Beware: It's include head and body tag too input:The output, can managing by #newOutputSetting(Document.OutputSettings) return new Element("html").append(html); |
Element | toHtmlByPlain(String plainText) same work with #toHtmlByHtml(String) but think input parameter as plain text (so meaning if there have charactor that cannot convert to html it's will change to other) Example: input: Element html = new Element("html"); Element head = new Element("head"); Element body = new Element("body").text(plainText); return html.prependChild(body).prependChild(head); |