Java examples for java.lang:String Unicode
Parse text of string token to unicode characters.
/*/*from ww w.ja v a 2s .c o m*/ * Reference ETL Parser for Java * Copyright (c) 2000-2009 Constantine A Plotnikov * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without restriction, * including without limitation the rights to use, copy, modify, merge, * publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ import java.math.BigInteger; public class Main{ /** * Parse text of string token to unicode characters. The string prefix is * ignored. Note it is assumed that the token has been already parsed by the * lexer, so minimal additional validation is performed. * * @param stringToken * a string token to parse or null * @return parsed string or null if null has been passed as argument */ public static String parseString(String stringToken) { if (stringToken == null) { return null; } final StringBuilder rc = new StringBuilder(); int n = stringToken.length(); if (n < 2) { throw new IllegalArgumentException( "Unexpected end of the token " + n); } int i = 0; while (Character.isUnicodeIdentifierPart(stringToken.charAt(i))) { i++; } final char quote = stringToken.charAt(i); switch (quote) { case '\'': case '"': break; default: throw new IllegalArgumentException("Invalid quote character " + stringToken.charAt(0)); } boolean multiline = stringToken.length() > 6 + i && stringToken.charAt(i + 1) == quote && stringToken.charAt(i + 2) == quote; // ignore last and first characters n -= multiline ? 3 : 1; i += multiline ? 3 : 1; if (i > n || stringToken.charAt(n) != quote || !(multiline ? stringToken.charAt(n + 1) == quote && stringToken.charAt(n + 2) == quote : true)) { throw new IllegalArgumentException( "The string is in invalid format: " + stringToken); } while (i < n) { char ch = stringToken.charAt(i++); if ((ch >= '\uD800' && ch <= '\uDBFF') || (ch >= '\uDC00' && ch <= '\uDFFF')) { // NOTE POST 0.2: fix it throw new IllegalArgumentException( "Large codepoints are not yet handled: " + ((int) ch)); } switch (ch) { case '\\': if (i >= n) { throw new IllegalArgumentException( "Unexpected end of the token " + i); } ch = stringToken.charAt(i++); switch (ch) { case 'U': final int start = i; while (i < n && (ch = stringToken.charAt(i++)) != ';') { if (('0' > ch || ch > '9') && ('a' > ch || ch > 'f') && ('A' > ch || ch > 'F')) { throw new IllegalArgumentException( "Invalid symbol in escape sequence " + ch); } } if (i == start || stringToken.charAt(i - 1) != ';') { throw new IllegalArgumentException( "Unexpected end of the token " + i); } final int codepoint = Integer.parseInt( stringToken.substring(start, i - 1), 16); rc.appendCodePoint(codepoint); break; case 'u': final int ch16 = Integer.parseInt( stringToken.substring(i, i + 4), 16); rc.append((char) ch16); i += 4; break; case 'x': final int ch8 = Integer.parseInt( stringToken.substring(i, i + 2), 16) & 0xFF; rc.append((char) ch8); i += 2; break; case 'n': rc.append('\n'); break; case 'r': rc.append('\r'); break; case 't': rc.append('\t'); break; case 'f': rc.append('\f'); break; case 'b': rc.append('\b'); break; default: rc.append(ch); } break; default: rc.append(ch); } } return rc.toString(); } /** * Parse text of integer token to integer value. * * @param intToken * a integer token to parse * @return parsed value */ public static int parseInt(String intToken) { final NumberInfo n = parseNumber(intToken); if (n.kind != Tokens.INTEGER && n.kind != Tokens.INTEGER_WITH_SUFFIX) { throw new NumberFormatException("wrong token kind: " + n.kind); } String textToParse = n.text; if (n.sign == -1) { textToParse = "-" + textToParse; } return Integer.parseInt(textToParse, n.base); } /** * Parse number * * @param input * an input token * @return information about number. */ public static NumberInfo parseNumber(String input) { return new NumberParser(input).parse(); } }