StringgetMetaValue(String html, String metaKey)
get Meta Value
try {
    Document doc = Jsoup.parse(html);
    Elements name = doc.head().getElementsByAttributeValue("name", metaKey);
    return name.get(0).attr("content");
} catch (Exception ex) {
    return "";
StringgetPlainText(String htmlText)
Turn a hunk of text that contains HTML into plaintext.
Document d = Jsoup.parse(htmlText);
return d.text();
StringgetPlainTextFromHtml(String html)
Extracts plain text from given HTML String.
html = html.replaceAll("(?i)>\\s*\\n*\\s*<b>", "><br><b>");
html = html.replaceAll("(?i)\\s*\\n*\\s*</?b>\\s*", " ");
String breakTagPlaceholder = "%BREAK%";
html = html.replaceAll("(?i)(<br[^>]*>|\\n)", breakTagPlaceholder);
html = html.replaceAll("(?i)(<p>|\\n)", breakTagPlaceholder);
html = Jsoup.parse(html).text();
html = html.replaceAll("\\u00A0", " ");
html = html.replaceAll(breakTagPlaceholder, "\n");
StringgetTitle(String htmlContent)
Get the title of the HTML.
Document doc = Jsoup.parse(htmlContent);
Elements titleNode ="head > title");
return titleNode.isEmpty() ? null : doc.title();
ListhtmlArray2textArray(List htmlArray)
html Arraytext Array
List<String> cleanTextArray = new ArrayList<>();
if (htmlArray == null) {
    return cleanTextArray;
for (String html : htmlArray) {
return cleanTextArray;
booleanisHTMLEmpty(String textToCheck)
is HTML Empty
String emptyContent = "<p><br></p>";
Document doc = Jsoup.parse(textToCheck);
Elements elements ="body").first().children();
if (elements.size() == 0)
    return true;
for (Element el : elements) {
    if ("".equals(el.toString()) || emptyContent.equals(el.toString()))
        return true;
Documentparse(final String html)
Parses the specified html code.
Document doc = Jsoup.parseBodyFragment(html);
return doc;
Documentparse(InputStream input, String documentIRI, String encoding)
if (documentIRI == null) {
    documentIRI = "";
if (encoding == null) {
    int c;
    do {
        c =;
    } while (c != -1 && Character.isWhitespace(c));
Stringparse(String html)
try {
    Document doc = Jsoup.parse(html);
    return doc.text();
} catch (NoClassDefFoundError e) {
    return "";
Documentparse(String html)
convert html String to Document (A lot more easier to manage it)
Document document = Jsoup.parse(html);
if (setting != null)
    return document.outputSettings(setting);
return document;