Correctly encode a name for a URL.
//CruiseControl is open source software and is developed and maintained by a group of dedicated volunteers.
//CruiseControl is distributed under a BSD-style license.
//http://cruisecontrol.sourceforge.net/
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
namespace ThoughtWorks.CruiseControl.Core.Util
{
/// <summary>
/// Class with handy stirng routines
/// </summary>
public class StringUtil
{
private static readonly Regex urlEncodeRegex = new Regex("[^a-zA-Z0-9\\.\\-_~]", RegexOptions.Compiled);
// public for testing only
public const string DEFAULT_DELIMITER = ",";
/// <summary>
/// Correctly encode a name for a URL.
/// </summary>
/// <param name="name">The name to encode.</param>
/// <returns>The encoded name.</returns>
/// <remarks>
/// <para>
/// HttpUtility.UrlEncode does not correctly encode for a URL, spaces get converted into
/// pluses, which can cause security errors.
/// </para>
/// <para>
/// This method will encode characters according to RFC 3986. This means only the following
/// characters are allowed un-encoded:
/// </para>
/// <para>
/// A B C D E F G H I J K L M N O P Q R S T U V W X Y Z a b c d e f g h i j k l m n o p q r s
/// t u v w x y z 0 1 2 3 4 5 6 7 8 9 - _ . ~
/// </para>
/// <para>
/// However, since the encoding only uses two-hex digits, it is not possible to encode non-ASCII
/// characters using this approach. Therefore we are using the RFC 3986 recommendation and assuming
/// the string will be using UTF-8 encoding and leaving the characters as they are.
/// </para>
/// </remarks>
public static string UrlEncodeName(string name)
{
var encodedName = urlEncodeRegex.Replace(name, (match) =>
{
var charValue = (int)match.Value[0];
var value = charValue >= 255 ? match.Value : "%" + string.Format("{0:x2}", charValue);
return value;
});
return encodedName;
}
}
}
Related examples in the same category