List of utility methods to do HTML Parse Jsoup
void | parseTemplate1_2(Element element) parse Templat_ Element infotemplatebox = element.select("table").first(); Elements elements = infotemplatebox.select("tr"); for (int i = 0, total = elements.size(); i < total; i++) { Element item = elements.get(i); if (i == 1) { System.err.println(item.select("img").attr("src")); } else { System.err.println(item.text()); ... |
Document | parseUTF8HTMLDocument(String html) parse UTFHTML Document return Parser.parse(html, "utf-8"); |
Document | parseWithAdultCheck(URL url, int timeout) parse With Adult Check return verifyAdultNotice(parse(url, timeout));
|
String | prettyPrint(String html) pretty Print Document doc = Jsoup.parse(html);
doc.outputSettings().prettyPrint(true);
return doc.toString();
|
String | processHtml(String html) process Html String newHtml = html; Document doc = Jsoup.parse(html); Elements eles = doc.select("[dict]"); for (Element ele : eles) { System.out.println(ele.attr("dict")); ele.parent().html("fffffffffffffff"); System.out.println("doc:\n" + doc.html()); ... |
String | removeAllHtmlTags(String unsafe) Remove all HTML tags from the given string if (unsafe == null) { return null; } else { Document dirty = Jsoup.parseBodyFragment(unsafe); Cleaner cleaner = new Cleaner(Whitelist.none()); Document clean = cleaner.clean(dirty); return clean.body().text(); |
String | removeHTMLTags(final String text) remove HTML Tags return Jsoup.parse(text).body().text();
|
String | removeTag(String html) remove top tag and return as string input:The output, can managing by #newOutputSetting(Document.OutputSettings) return parse(html).child(0).html();
|
String | sanitizeHTML(String html) sanitize HTML Whitelist whitelist = Whitelist.relaxed().addAttributes(":all", "style") .addAttributes("span", "class") .addAttributes("table", "border", "align", "cellspacing", "cellpadding") .preserveRelativeLinks(true); return Jsoup.clean(html, "https://dummydomain.com/", whitelist); |
String | stripHTML(final String value) User JSoup to remove all HTML tags from a string If value is empty, it will be converted to an empty string. return Jsoup.parse(Strings.nullToEmpty(value)).text();
|