Get Links From HTML : HTML « Network « C# / C Sharp






Get Links From HTML

 

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Collections.Specialized;
using System.Text.RegularExpressions;
using System.Net;

public static class LinkCheckerUtilities
{

    public static StringCollection GetLinksFromHTML(string HtmlContent)
    {


        StringCollection links = new StringCollection();

        MatchCollection AnchorTags = Regex.Matches(HtmlContent.ToLower(), @"(<a.*?>.*?</a>)", RegexOptions.Singleline);

        foreach (Match AnchorTag in AnchorTags)
        {
            string value = AnchorTag.Groups[1].Value;

            Match HrefAttribute = Regex.Match(value, @"href=\""(.*?)\""",
                RegexOptions.Singleline);
            if (HrefAttribute.Success)
            {
                string HrefValue = HrefAttribute.Groups[1].Value;
                if (!links.Contains(HrefValue))
                {
                    links.Add(HrefValue);
                }
            }
        }

        return links;
    }
}

   
  








Related examples in the same category

1.Parses the value information from any INPUT tag in an HTML string where the name="" attribute matched the tagID parameter
2.Html Utilities
3.Convert HTML To Text
4.Converts a FontUnit to a size for the HTML FONT tag
5.Strip HTML
6.Remove tags from a html string
7.Sanitize any potentially dangerous tags from the provided raw HTML input using a whitelist based approach
8.Get Type As Html
9.HTML-encodes a string and returns the encoded string.
10.Strips all HTML tags from the specified string.
11.Removes the HTML whitespace.
12.Array To Html Breaked String
13.Show Html Page in String with Process