Here you can find the source of htmlToText(String input)
public static String htmlToText(String input)
//package com.java2s; // Licensed under the Apache License, Version 2.0 (the "License"); public class Main { /**/*from w w w . j a v a 2s . c o m*/ * Converts HTML to plain text, according to the following rules: * <ul> * <li> Replaces any newlines or carriage returns in the source text with single spaces. * <li> Replaces <code><P></code> and <code><BR></code> with newlines. * <li> Replaces <code><LI></code> with newline followed by "* ". * <li> Removes all other tags, including their attributes. * <li> Leaves text behind. * </ul> * * @since 1.0.2 */ public static String htmlToText(String input) { if (input == null) input = ""; input = input.replaceAll("[\r\n]+", " "); StringBuilder buf = new StringBuilder(input.trim()); int openIdx = 0; while ((openIdx = buf.indexOf("<", openIdx)) >= 0) { int closeIdx = buf.indexOf(">", openIdx); if (closeIdx < 0) { // punt on unclosed tag buf.delete(openIdx, buf.length()); break; } String tag = buf.substring(openIdx + 1, closeIdx).trim().toUpperCase(); buf.delete(openIdx, closeIdx + 1); tag = tag.replaceAll("\\s+.*", ""); if (tag.equals("P") || tag.startsWith("BR")) buf.insert(openIdx, "\n"); else if (tag.equals("LI")) buf.insert(openIdx, "\n* "); } return buf.toString(); } }