Here you can find the source of clearBody(String html)
public static String clearBody(String html)
//package com.java2s; //License from project: Open Source License import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.safety.Whitelist; public class Main { public static String clearBody(String html) { Document document = Jsoup.parse(html); document.outputSettings(new Document.OutputSettings().prettyPrint(false));//makes html() preserve linebreaks and spacing document.select("br").append("\n"); document.select("p").prepend("\n"); String result = Jsoup.clean(document.html(), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)); result = result.replace("\r", "\n"); result = result.replace("\n ", "\n"); result = result.replaceAll("[\\n\\r]+", "\n"); return result; }//from w w w . jav a2 s . c o m }