Java Utililty Methods HTML Jsoup Document

List of utility methods to do HTML Jsoup Document

Description

The list of methods to do HTML Jsoup Document are organized into topic(s).

Method

StringkeepLineBreak(Document docRes)
keep Line Break
docRes.outputSettings(new Document.OutputSettings().prettyPrint(false));
docRes.select("br").append("\\n");
docRes.select("p").prepend("\\n\\n");
String result = docRes.html().replaceAll("\\\\n", "\n");
result = Jsoup.clean(result, "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
return result;
voidmakeAbsolute(Document doc)
make Absolute
doc.traverse(new NodeVisitor() {
    @Override
    public void head(Node node, int i) {
        if (node instanceof Element) {
            Element tag = (Element) node;
            if (tag.hasAttr("href")) {
                String href = tag.attr("abs:href");
                tag.attr("href", href);
...
DocumentnormalizeWhitespaces(Document doc)
Normalizes the whitespaces in text nodes of the specified document.
for (TextNode node : doc.body().textNodes()) {
    node.text(node.text());
return doc;
DocumentpostDocument(String url, Collection data)
post Document
KeyVal kv = org.jsoup.helper.HttpConnection.KeyVal.create("authenticity_token", AUTHENTICITY_TOKEN);
data.add(kv);
return Jsoup.connect(url).timeout(TIME_OUT).data(data).post();
voidremoveTag(Document doc, String selector)
remove Tag
for (Element e : doc.select(selector).toArray(new Element[0])) {
    String text = e.text();
    e.after(text);
    e.remove();
MapretrieveHiddenInputs(Document doc)
retrieve Hidden Inputs
Map<String, String> map = new HashMap<>();
for (Element e : doc.select("form input[type=hidden]")) {
    String name = e.attr("name");
    String value = e.attr("value");
    map.put(name, value);
return map;
PathsaveDocumentToDirectory(final org.jsoup.nodes.Document doc, final String fileName, final Path tmpDir)
save Document To Directory
final Path outTmpPath = tmpDir.resolve(fileName);
writeAll(outTmpPath, doc.outerHtml(), ENCODING_UTF8);
return outTmpPath;
StringstripTags(Document document)
strip Tags
return document.body().text();
DocumentverifyAdultNotice(Document doc)
verify Adult Notice
Document document = doc;
if (ADULT_NOTICE.equals(doc.title())) {
    Element form = document.select("form[action~=adult_\\w+.bml$]").first();
    Element hidden = form.select("input[name=ret]").first();
    Element submit = form.select("input[name=adult_check]").first();
    Connection conn = Jsoup.connect(form.attr("action"));
    Iterator<Element> iterator = form.select("input").iterator();
    while (iterator.hasNext()) {
...