ASCII character handling functions
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* This class implements some basic ASCII character handling functions.
*
* @author dac@eng.sun.com
* @author James Todd [gonzo@eng.sun.com]
*/
public final class Ascii {
/*
* Character translation tables.
*/
private static final byte[] toUpper = new byte[256];
private static final byte[] toLower = new byte[256];
/*
* Character type tables.
*/
private static final boolean[] isAlpha = new boolean[256];
private static final boolean[] isUpper = new boolean[256];
private static final boolean[] isLower = new boolean[256];
private static final boolean[] isWhite = new boolean[256];
private static final boolean[] isDigit = new boolean[256];
/*
* Initialize character translation and type tables.
*/
static {
for (int i = 0; i < 256; i++) {
toUpper[i] = (byte)i;
toLower[i] = (byte)i;
}
for (int lc = 'a'; lc <= 'z'; lc++) {
int uc = lc + 'A' - 'a';
toUpper[lc] = (byte)uc;
toLower[uc] = (byte)lc;
isAlpha[lc] = true;
isAlpha[uc] = true;
isLower[lc] = true;
isUpper[uc] = true;
}
isWhite[ ' '] = true;
isWhite['\t'] = true;
isWhite['\r'] = true;
isWhite['\n'] = true;
isWhite['\f'] = true;
isWhite['\b'] = true;
for (int d = '0'; d <= '9'; d++) {
isDigit[d] = true;
}
}
/**
* Returns the upper case equivalent of the specified ASCII character.
*/
public static int toUpper(int c) {
return toUpper[c & 0xff] & 0xff;
}
/**
* Returns the lower case equivalent of the specified ASCII character.
*/
public static int toLower(int c) {
return toLower[c & 0xff] & 0xff;
}
/**
* Returns true if the specified ASCII character is upper or lower case.
*/
public static boolean isAlpha(int c) {
return isAlpha[c & 0xff];
}
/**
* Returns true if the specified ASCII character is upper case.
*/
public static boolean isUpper(int c) {
return isUpper[c & 0xff];
}
/**
* Returns true if the specified ASCII character is lower case.
*/
public static boolean isLower(int c) {
return isLower[c & 0xff];
}
/**
* Returns true if the specified ASCII character is white space.
*/
public static boolean isWhite(int c) {
return isWhite[c & 0xff];
}
/**
* Returns true if the specified ASCII character is a digit.
*/
public static boolean isDigit(int c) {
return isDigit[c & 0xff];
}
/**
* Parses an unsigned integer from the specified subarray of bytes.
* @param b the bytes to parse
* @param off the start offset of the bytes
* @param len the length of the bytes
* @exception NumberFormatException if the integer format was invalid
*/
public static int parseInt(byte[] b, int off, int len)
throws NumberFormatException
{
int c;
if (b == null || len <= 0 || !isDigit(c = b[off++])) {
throw new NumberFormatException();
}
int n = c - '0';
while (--len > 0) {
if (!isDigit(c = b[off++])) {
throw new NumberFormatException();
}
n = n * 10 + c - '0';
}
return n;
}
public static int parseInt(char[] b, int off, int len)
throws NumberFormatException
{
int c;
if (b == null || len <= 0 || !isDigit(c = b[off++])) {
throw new NumberFormatException();
}
int n = c - '0';
while (--len > 0) {
if (!isDigit(c = b[off++])) {
throw new NumberFormatException();
}
n = n * 10 + c - '0';
}
return n;
}
/**
* Parses an unsigned long from the specified subarray of bytes.
* @param b the bytes to parse
* @param off the start offset of the bytes
* @param len the length of the bytes
* @exception NumberFormatException if the long format was invalid
*/
public static long parseLong(byte[] b, int off, int len)
throws NumberFormatException
{
int c;
if (b == null || len <= 0 || !isDigit(c = b[off++])) {
throw new NumberFormatException();
}
long n = c - '0';
long m;
while (--len > 0) {
if (!isDigit(c = b[off++])) {
throw new NumberFormatException();
}
m = n * 10 + c - '0';
if (m < n) {
// Overflow
throw new NumberFormatException();
} else {
n = m;
}
}
return n;
}
public static long parseLong(char[] b, int off, int len)
throws NumberFormatException
{
int c;
if (b == null || len <= 0 || !isDigit(c = b[off++])) {
throw new NumberFormatException();
}
long n = c - '0';
long m;
while (--len > 0) {
if (!isDigit(c = b[off++])) {
throw new NumberFormatException();
}
m = n * 10 + c - '0';
if (m < n) {
// Overflow
throw new NumberFormatException();
} else {
n = m;
}
}
return n;
}
}
Related examples in the same category
1. | Convert Encoding | | |
2. | Utility class for working with character sets | | |
3. | Utility methods for ASCII character checking. | | |
4. | Reader for UCS-2 and UCS-4 encodings. (i.e., encodings from ISO-10646-UCS-(2|4)). | | |
5. | Conversions between IANA encoding names and Java encoding names, and vice versa. | | |
6. | This class represents an encoding. | | |
7. | Provides information about encodings. | | |
8. | Codec for the Quoted-Printable section of http://www.ietf.org/rfc/rfc1521.txt (RFC 1521) | | |
9. | ISO 8859-8, ASCII plus Hebrew | | |
10. | TIS-620 does not have the non-breaking space or the C1 controls. | | |
11. | ISO-8859-1; a.k.a. Latin-1 | | |
12. | ISO 8859-2, a.k.a. Latin-2 | | |
13. | ISO 8859-3 | | |
14. | ISO 8859-4, Latin plus the characters needed for Greenlandic, Icelandic, and Lappish. | | |
15. | ISO 8859-9 for Turkish. | | |
16. | ISO-8859-10, for Lithuanian, Estonian, Greenlandic, Icelandic, Inuit, Lappish, and other Northern European languages. | | |
17. | ISO-8859-13, for Latvian and other Baltic languages. | | |
18. | ISO-8859-14, for Gaelic, Welsh, and other Celtic languages. | | |
19. | ISO 8859-9 for Western Europe. Includes the Euro sign and several uncommon French letters | | |
20. | ISO 8859-16, Romanian | | |
21. | ASCII Writer | | |
22. | UCS Writer | | |
23. | Unicode Writer | | |
24. | Whether a character is or is not available in a particular encoding | | |
25. | ISO 8859-6, ASCII plus Arabic | | |
26. | ISO 8859-5, ASCII plus Cyrillic (Russian, Byelorussian, etc.) | | |
27. | ISO 8859-7, ASCII plus Greek | | |
28. | IANA to Java Mapping | | |
29. | Java to IANA Mapping | | |
30. | EncodingMap is a convenience class which handles conversions between IANA encoding names and Java encoding names, and vice versa. | | |
31. | Get file encoding | | |