Plain Text From Html - CSharp System

CSharp examples for System:String HTML

Description

Plain Text From Html

Demo Code


using System.Text.RegularExpressions;
using System.Text;
using System.Globalization;
using System;/*from  w  ww . j  a  v a2  s. co  m*/

public class Main{
        public static string PlainTextFromHtml(string input)
        {
            string result = input;
            if (!string.IsNullOrEmpty(result))
            {
                // decode HTML escape character
                result = System.Web.HttpUtility.HtmlDecode(result);

                // Replace   with space
                result = Regex.Replace(result, @" ", " ", RegexOptions.Multiline);

                // Remove double spaces
                result = Regex.Replace(result, @"  +", "", RegexOptions.Multiline);

                // Replace <br/> with \n
                result = Regex.Replace(result, @"< *br */*>", "\n", RegexOptions.IgnoreCase | RegexOptions.Multiline);

                // Remove remaining HTML tags                
                result = Regex.Replace(result, @"<[^>]*>", "", RegexOptions.Multiline);

                // Replace multiple newlines with just one
                result = Regex.Replace(result, @"(\r?\n)+", "\n", RegexOptions.IgnoreCase | RegexOptions.Multiline);

                // Remove whitespace at the beginning and end
                result = result.Trim();
            }
            return result;
        }
}

Related Tutorials