Returns the number of additional bytes in a UTF-8 character sequence (not including the first byte).
/******************************************************************************
* The MIT License
* Copyright (c) 2003 Novell Inc. www.novell.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the Software), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*******************************************************************************/
//
// Novell.Directory.Ldap.Utilclass.Base64.cs
//
// Author:
// Sunil Kumar (Sunilk@novell.com)
//
// (C) 2003 Novell, Inc (http://www.novell.com)
//
using System;
namespace Novell.Directory.Ldap.Utilclass
{
/// <summary> The Base64 utility class performs base64 encoding and decoding.
///
/// The Base64 Content-Transfer-Encoding is designed to represent
/// arbitrary sequences of octets in a form that need not be humanly
/// readable. The encoding and decoding algorithms are simple, but the
/// encoded data are consistently only about 33 percent larger than the
/// unencoded data. The base64 encoding algorithm is defined by
/// RFC 2045.
/// </summary>
public class Base64
{
/* **************UTF-8 Validation methods and members*******************
* The following text is taken from draft-yergeau-rfc2279bis-02 and explains
* UTF-8 encoding:
*
*In UTF-8, characters are encoded using sequences of 1 to 6 octets.
* If the range of character numbers is restricted to U+0000..U+10FFFF
* (the UTF-16 accessible range), then only sequences of one to four
* octets will occur. The only octet of a "sequence" of one has the
* higher-order bit set to 0, the remaining 7 bits being used to encode
* the character number. In a sequence of n octets, n>1, the initial
* octet has the n higher-order bits set to 1, followed by a bit set to
* 0. The remaining bit(s) of that octet contain bits from the number
* of the character to be encoded. The following octet(s) all have the
* higher-order bit set to 1 and the following bit set to 0, leaving 6
* bits in each to contain bits from the character to be encoded.
*
* The table below summarizes the format of these different octet types.
* The letter x indicates bits available for encoding bits of the
* character number.
*
* <pre>
* Char. number range | UTF-8 octet sequence
* (hexadecimal) | (binary)
* --------------------+---------------------------------------------
* 0000 0000-0000 007F | 0xxxxxxx
* 0000 0080-0000 07FF | 110xxxxx 10xxxxxx
* 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
* 0001 0000-001F FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
* 0020 0000-03FF FFFF | 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
* 0400 0000-7FFF FFFF | 1111110x 10xxxxxx ... 10xxxxxx
* </pre>
*/
/// <summary> Given the first byte in a sequence, getByteCount returns the number of
/// additional bytes in a UTF-8 character sequence (not including the first
/// byte).
///
/// </summary>
/// <param name="b"> The first byte in a UTF-8 character sequence.
///
/// </param>
/// <returns> the number of additional bytes in a UTF-8 character sequence.
/// </returns>
private static int getByteCount(sbyte b)
{
if (b > 0)
return 0;
if ((b & 0xE0) == 0xC0)
{
return 1; //one additional byte (2 bytes total)
}
if ((b & 0xF0) == 0xE0)
{
return 2; //two additional bytes (3 bytes total)
}
if ((b & 0xF8) == 0xF0)
{
return 3; //three additional bytes (4 bytes total)
}
if ((b & 0xFC) == 0xF8)
{
return 4; //four additional bytes (5 bytes total)
}
if ((b & 0xFF) == 0xFC)
{
return 5; //five additional bytes (6 bytes total)
}
return - 1;
}
}
}
Related examples in the same category