Get html anchor element from html source string using regex - CSharp System

CSharp examples for System:String HTML

Description

Get html anchor element from html source string using regex

Demo Code


using System.Text.RegularExpressions;
using System.Text;
using System.Net;
using System.IO;/*from   ww w. j  a  v a2 s  .c  om*/
using System.Collections.Generic;
using System;

public class Main{

        public static Dictionary<string, string> GetDomElem_A(string source)
        {
            var matchList = new Dictionary<string, string>();
            const string pattern = "<a[^>]*? href=[\"'](?<url>[^\"']*?)[\"'][^>]*?>(?<text>[\\w\\W]*?)</a>";
            try
            {
                var regex = new Regex(pattern, RegexOptions.Compiled | RegexOptions.IgnoreCase);
                MatchCollection matches = regex.Matches(source);

                foreach (Match match in matches)
                {
                    string key = RemoveHtml(match.Value);
                    if (!matchList.ContainsKey(key))
                    {
                        matchList.Add(key, GetUrlArray(match.Value)[0]);
                    }
                }
            }
            catch (Exception ex)
            {
                matchList.Add(ex.Message, "");
            }
            return matchList;
        }
        public static string RemoveHtml(string input)
        {
            var stripTags = new Regex("</?[a-z][^<>]*>", RegexOptions.IgnoreCase);
            return stripTags.Replace(input, string.Empty);
        }
}

Related Tutorials