List of utility methods to do HTML Jsoup Document
String | keepLineBreak(Document docRes) keep Line Break docRes.outputSettings(new Document.OutputSettings().prettyPrint(false)); docRes.select("br").append("\\n"); docRes.select("p").prepend("\\n\\n"); String result = docRes.html().replaceAll("\\\\n", "\n"); result = Jsoup.clean(result, "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)); return result; |
void | makeAbsolute(Document doc) make Absolute doc.traverse(new NodeVisitor() { @Override public void head(Node node, int i) { if (node instanceof Element) { Element tag = (Element) node; if (tag.hasAttr("href")) { String href = tag.attr("abs:href"); tag.attr("href", href); ... |
Document | normalizeWhitespaces(Document doc) Normalizes the whitespaces in text nodes of the specified document. for (TextNode node : doc.body().textNodes()) { node.text(node.text()); return doc; |
Document | postDocument(String url, Collection post Document KeyVal kv = org.jsoup.helper.HttpConnection.KeyVal.create("authenticity_token", AUTHENTICITY_TOKEN); data.add(kv); return Jsoup.connect(url).timeout(TIME_OUT).data(data).post(); |
void | removeTag(Document doc, String selector) remove Tag for (Element e : doc.select(selector).toArray(new Element[0])) { String text = e.text(); e.after(text); e.remove(); |
Map | retrieveHiddenInputs(Document doc) retrieve Hidden Inputs Map<String, String> map = new HashMap<>(); for (Element e : doc.select("form input[type=hidden]")) { String name = e.attr("name"); String value = e.attr("value"); map.put(name, value); return map; |
Path | saveDocumentToDirectory(final org.jsoup.nodes.Document doc, final String fileName, final Path tmpDir) save Document To Directory final Path outTmpPath = tmpDir.resolve(fileName); writeAll(outTmpPath, doc.outerHtml(), ENCODING_UTF8); return outTmpPath; |
String | stripTags(Document document) strip Tags return document.body().text();
|
Document | verifyAdultNotice(Document doc) verify Adult Notice Document document = doc; if (ADULT_NOTICE.equals(doc.title())) { Element form = document.select("form[action~=adult_\\w+.bml$]").first(); Element hidden = form.select("input[name=ret]").first(); Element submit = form.select("input[name=adult_check]").first(); Connection conn = Jsoup.connect(form.attr("action")); Iterator<Element> iterator = form.select("input").iterator(); while (iterator.hasNext()) { ... |