Downloads a web page from the Internet and returns a string. : HttpWebRequest « Network « C# / C Sharp






Downloads a web page from the Internet and returns a string.

    
using System;
using System.IO;
using System.Net.Mail;
using System.Collections.Generic;
using System.Security.Cryptography;
using System.Text;
using System.Text.RegularExpressions;
using System.Configuration;
using System.Globalization;
using System.Web;
using System.Web.Configuration;
using System.Threading;
using System.Reflection;
using System.Collections;
using System.Xml;
using System.Net;
using System.Web.Caching;

namespace BlogEngine.Core
{
  /// <summary>
  /// Utilities for the entire solution to use.
  /// </summary>
  public static class Utils
  {

    /// <summary>
    /// Downloads a web page from the Internet and returns a string. .
    /// </summary>
    /// <param name="url">The URL to download from.</param>
    /// <returns>The HTML or null if the URL isn't valid.</returns>
    public static string DownloadWebPage(Uri url)
    {
      try
      {
        using (WebClient client = new WebClient())
        {
          client.UseDefaultCredentials = true;
          client.Headers.Add(System.Net.HttpRequestHeader.UserAgent, "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1;)");
          using (StreamReader reader = new StreamReader(client.OpenRead(url)))
          {
            return reader.ReadToEnd();
          }
        }
      }
      catch (WebException)
      {
        return null;
      }
    }


    private static XmlDocument LoadDocument(Uri url, Uri xmlUrl)
    {
      XmlDocument doc = new XmlDocument();

      try
      {
        if (url.IsAbsoluteUri)
        {
          doc.Load(xmlUrl.ToString());
        }
        else
        {
          string absoluteUrl = null;
          if (!url.ToString().StartsWith("/"))
            absoluteUrl = (url + xmlUrl.ToString());
          else
            absoluteUrl = url.Scheme + "://" + url.Authority + xmlUrl;

          doc.Load(absoluteUrl);
        }
      }
      catch (Exception)
      {
        return null;
      }

      return doc;
    }

    private const string PATTERN = "<head.*<link( [^>]*title=\"{0}\"[^>]*)>.*</head>";
    private static readonly Regex HREF = new Regex("href=\"(.*)\"", RegexOptions.IgnoreCase | RegexOptions.Compiled);

    /// <summary>
    /// Finds semantic links in a given HTML document.
    /// </summary>
    /// <param name="type">The type of link. Could be foaf, apml or sioc.</param>
    /// <param name="html">The HTML to look through.</param>
    /// <returns></returns>
    public static List<Uri> FindLinks(string type, string html)
    {
      MatchCollection matches = Regex.Matches(html, string.Format(PATTERN, type), RegexOptions.IgnoreCase | RegexOptions.Singleline);
      List<Uri> urls = new List<Uri>();

      foreach (Match match in matches)
      {
        if (match.Groups.Count == 2)
        {
          string link = match.Groups[1].Value;
          Match hrefMatch = HREF.Match(link);

          if (hrefMatch.Groups.Count == 2)
          {
            Uri url;
            string value = hrefMatch.Groups[1].Value;
            if (Uri.TryCreate(value, UriKind.Absolute, out url))
            {
              urls.Add(url);
            }
          }
        }
      }

      return urls;
    }
  }
}

   
    
    
    
  








Related examples in the same category

1.Begins an asynchronous request for a Stream object to use to write data.
2.Gets or sets the value of the Connection HTTP header.
3.Gets or sets the cookies associated with the request.
4.Performs online file transfer operations.