Java Utililty Methods String Normalize

List of utility methods to do String Normalize

Description

The list of methods to do String Normalize are organized into topic(s).

Method

StringnormalizeToAlpha(String input)
normalize To Alpha
StringBuffer buf = new StringBuffer();
for (int i = 0; i < input.length(); i++) {
    char charAt = input.charAt(i);
    if (Character.isLetterOrDigit(charAt))
        buf.append(charAt);
return buf.toString();
StringnormalizeUnicode(CharSequence text)
Apply Unicode NFC normalization to a string.
return Normalizer.isNormalized(text, Normalizer.Form.NFC) ? text.toString()
        : Normalizer.normalize(text, Normalizer.Form.NFC);
StringnormalizeUnicode(final String str)
Normalize a string potentially containing Unicode to NFC form.
if (Normalizer.isNormalized(str, Normalizer.Form.NFC)) {
    return str;
} else {
    return Normalizer.normalize(str, Normalizer.Form.NFC);
StringnormalizeUnicode(String input)
performs a unicode normalization to NFC form (java.text.Normalizer.Form.NFC) for the given input
if (input != null && !Normalizer.isNormalized(input, Normalizer.Form.NFC)) {
    return Normalizer.normalize(input, Normalizer.Form.NFC);
return input;
StringnormalizeUnicode(String str)
Normalize to "Normalization Form Canonical Decomposition" (NFD) REF: http: //stackoverflow.com/questions/3610013/file-listfiles-mangles-unicode- names-with-jdk-6-unicode-normalization-issues This supports proper file name retrieval from file system, among other things.
Normalizer.Form form = Normalizer.Form.NFD;
if (!Normalizer.isNormalized(str, form)) {
    return Normalizer.normalize(str, form);
return str;
StringnormalizeUnicodeDiacritics(String text)
normalize Unicode Diacritics
text = Normalizer.normalize(text, Form.NFD);
Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
text = pattern.matcher(text).replaceAll("");
return text;
StringnormalizeWhitespace(final String str)
Utility to normalize whitespace in a String, i.e.
if (isNullOrEmpty(str))
    return str;
final StringBuilder buf = new StringBuilder();
final CharacterIterator iter = new StringCharacterIterator(str);
boolean inWhitespace = false; 
for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
    if (Character.isWhitespace(c)) {
        if (!inWhitespace) {
...
StringnormalizeWhiteSpace(String str)
normalize White Space
if (str == null) {
    return null;
String normalized = Normalizer.normalize(str, java.text.Normalizer.Form.NFD);
int len = normalized.length();
StringBuffer sb = new StringBuffer();
int spaceCount = 0;
for (int i = 0; i < len;) {
...
StringnormalizeWidth(String text)
Normalize the width of characters in the supplied text.
StringBuilder sb = new StringBuilder(text);
int ch;
for (int i = 0; i < sb.length(); i++) {
    ch = sb.charAt(i);
    if ((ch >= 0xFF01) && (ch <= 0xFF5E)) {
        sb.setCharAt(i, (char) (ch - 0xFEE0));
        continue;
    if (ch == 0x3000) {
        sb.setCharAt(i, ' ');
    switch (ch) {
    case 0xFF61:
        sb.setCharAt(i, (char) 0x3002);
        break;
    case 0xFF62:
        sb.setCharAt(i, (char) 0x300C);
        break;
    case 0xFF63:
        sb.setCharAt(i, (char) 0x300D);
        break;
    case 0xFF64:
        sb.setCharAt(i, (char) 0x3001);
        break;
    case 0xFF65:
        sb.setCharAt(i, (char) 0x30FB);
        break;
    case 0xFF66:
        sb.setCharAt(i, (char) 0x30F2);
        break;
    case 0xFF67:
        sb.setCharAt(i, (char) 0x30A1);
        break;
    case 0xFF68:
        sb.setCharAt(i, (char) 0x30A3);
        break;
    case 0xFF69:
        sb.setCharAt(i, (char) 0x30A5);
        break;
    case 0xFF6A:
        sb.setCharAt(i, (char) 0x30A7);
        break;
    case 0xFF6B:
        sb.setCharAt(i, (char) 0x30A9);
        break;
    case 0xFF6C:
        sb.setCharAt(i, (char) 0x30E3);
        break;
    case 0xFF6D:
        sb.setCharAt(i, (char) 0x30E5);
        break;
    case 0xFF6E:
        sb.setCharAt(i, (char) 0x30E7);
        break;
    case 0xFF6F:
        sb.setCharAt(i, (char) 0x30C3);
        break;
    case 0xFF70:
        sb.setCharAt(i, (char) 0x30FC);
        break;
    case 0xFF71:
        sb.setCharAt(i, (char) 0x30A2);
        break;
    case 0xFF72:
        sb.setCharAt(i, (char) 0x30A4);
        break;
    case 0xFF73:
        sb.setCharAt(i, (char) 0x30A6);
        break;
    case 0xFF74:
        sb.setCharAt(i, (char) 0x30A8);
        break;
    case 0xFF75:
        sb.setCharAt(i, (char) 0x30AA);
        break;
    case 0xFF76:
        sb.setCharAt(i, (char) 0x30AB);
        break;
    case 0xFF77:
        sb.setCharAt(i, (char) 0x30AD);
        break;
    case 0xFF78:
        sb.setCharAt(i, (char) 0x30AF);
        break;
    case 0xFF79:
        sb.setCharAt(i, (char) 0x30B1);
        break;
    case 0xFF7A:
        sb.setCharAt(i, (char) 0x30B3);
        break;
    case 0xFF7B:
        sb.setCharAt(i, (char) 0x30B5);
        break;
    case 0xFF7C:
        sb.setCharAt(i, (char) 0x30B7);
        break;
    case 0xFF7D:
        sb.setCharAt(i, (char) 0x30B9);
        break;
    case 0xFF7E:
        sb.setCharAt(i, (char) 0x30BB);
        break;
    case 0xFF7F:
        sb.setCharAt(i, (char) 0x30BD);
        break;
    case 0xFF80:
        sb.setCharAt(i, (char) 0x30BF);
        break;
    case 0xFF81:
        sb.setCharAt(i, (char) 0x30C1);
        break;
    case 0xFF82:
        sb.setCharAt(i, (char) 0x30C4);
        break;
    case 0xFF83:
        sb.setCharAt(i, (char) 0x30C6);
        break;
    case 0xFF84:
        sb.setCharAt(i, (char) 0x30C8);
        break;
    case 0xFF85:
        sb.setCharAt(i, (char) 0x30CA);
        break;
    case 0xFF86:
        sb.setCharAt(i, (char) 0x30CB);
        break;
    case 0xFF87:
        sb.setCharAt(i, (char) 0x30CC);
        break;
    case 0xFF88:
        sb.setCharAt(i, (char) 0x30CD);
        break;
    case 0xFF89:
        sb.setCharAt(i, (char) 0x30CE);
        break;
    case 0xFF8A:
        sb.setCharAt(i, (char) 0x30CF);
        break;
    case 0xFF8B:
        sb.setCharAt(i, (char) 0x30D2);
        break;
    case 0xFF8C:
        sb.setCharAt(i, (char) 0x30D5);
        break;
    case 0xFF8D:
        sb.setCharAt(i, (char) 0x30D8);
        break;
    case 0xFF8E:
        sb.setCharAt(i, (char) 0x30DB);
        break;
    case 0xFF8F:
        sb.setCharAt(i, (char) 0x30DE);
        break;
    case 0xFF90:
        sb.setCharAt(i, (char) 0x30DF);
        break;
    case 0xFF91:
        sb.setCharAt(i, (char) 0x30E0);
        break;
    case 0xFF92:
        sb.setCharAt(i, (char) 0x30E1);
        break;
    case 0xFF93:
        sb.setCharAt(i, (char) 0x30E2);
        break;
    case 0xFF94:
        sb.setCharAt(i, (char) 0x30E4);
        break;
    case 0xFF95:
        sb.setCharAt(i, (char) 0x30E6);
        break;
    case 0xFF96:
        sb.setCharAt(i, (char) 0x30E8);
        break;
    case 0xFF97:
        sb.setCharAt(i, (char) 0x30E9);
        break;
    case 0xFF98:
        sb.setCharAt(i, (char) 0x30EA);
        break;
    case 0xFF99:
        sb.setCharAt(i, (char) 0x30EB);
        break;
    case 0xFF9A:
        sb.setCharAt(i, (char) 0x30EC);
        break;
    case 0xFF9B:
        sb.setCharAt(i, (char) 0x30ED);
        break;
    case 0xFF9C:
        sb.setCharAt(i, (char) 0x30EF);
        break;
    case 0xFF9D:
        sb.setCharAt(i, (char) 0x30F3);
        break;
    case 0xFF9E:
        sb.setCharAt(i, (char) 0x3099);
        break;
    case 0xFF9F:
        sb.setCharAt(i, (char) 0x309A);
        break;
    if ((ch > 0xFFA1) && (ch <= 0xFFBE)) {
        sb.setCharAt(i, (char) (ch - 0xCE70));
        continue;
    switch (ch) {
    case 0xFFA0:
        sb.setCharAt(i, (char) 0x3164);
        break;
    case 0xFFDA:
        sb.setCharAt(i, (char) 0x3161);
        break;
    case 0xFFDB:
        sb.setCharAt(i, (char) 0x3162);
        break;
    case 0xFFDC:
        sb.setCharAt(i, (char) 0x3163);
        break;
    case 0xFFE8:
        sb.setCharAt(i, (char) 0x2502);
        break;
    case 0xFFE9:
        sb.setCharAt(i, (char) 0x2190);
        break;
    case 0xFFEA:
        sb.setCharAt(i, (char) 0x2191);
        break;
    case 0xFFEB:
        sb.setCharAt(i, (char) 0x2192);
        break;
    case 0xFFEC:
        sb.setCharAt(i, (char) 0x2193);
        break;
    case 0xFFED:
        sb.setCharAt(i, (char) 0x25A0);
        break;
    case 0xFFEE:
        sb.setCharAt(i, (char) 0x25CB);
        break;
    switch (ch) {
    case 0x2100:
        sb.setCharAt(i, 'a');
        sb.insert(i + 1, "/c");
        i += 2;
        break;
    case 0x2101:
        sb.setCharAt(i, 'a');
        sb.insert(i + 1, "/s");
        i += 2;
        break;
    case 0x2105:
        sb.setCharAt(i, 'c');
        sb.insert(i + 1, "/o");
        i += 2;
        break;
    case 0x2103:
        sb.setCharAt(i, (char) 0x00B0);
        sb.insert(i + 1, "C");
        i++;
        break;
    case 0x2109:
        sb.setCharAt(i, (char) 0x00B0);
        sb.insert(i + 1, "F");
        i++;
        break;
    case 0x2116:
        sb.setCharAt(i, 'N');
        sb.insert(i + 1, "o");
        i++;
        break;
    case 0x212A:
        sb.setCharAt(i, 'K');
        break;
    case 0x212B:
        sb.setCharAt(i, (char) 0x00C5);
        break;
    switch (ch) {
    case 0x3371:
        sb.setCharAt(i, 'h');
        sb.insert(i + 1, "Pa");
        i += 2;
        break;
    case 0x3372:
        sb.setCharAt(i, 'd');
        sb.insert(i + 1, "a");
        i++;
        break;
    case 0x3373:
        sb.setCharAt(i, 'A');
        sb.insert(i + 1, "U");
        i++;
        break;
    case 0x3374:
        sb.setCharAt(i, 'b');
        sb.insert(i + 1, "ar");
        i += 2;
        break;
    case 0x3375:
        sb.setCharAt(i, 'o');
        sb.insert(i + 1, "V");
        i++;
        break;
    case 0x3376:
        sb.setCharAt(i, 'p');
        sb.insert(i + 1, "c");
        i++;
        break;
    case 0x3377:
        sb.setCharAt(i, 'd');
        sb.insert(i + 1, "m");
        i++;
        break;
    case 0x3378:
        sb.setCharAt(i, 'd');
        sb.insert(i + 1, "m\u00B2");
        i += 2;
        break;
    case 0x3379:
        sb.setCharAt(i, 'd');
        sb.insert(i + 1, "m\u00B3");
        i += 2;
        break;
    case 0x337A:
        sb.setCharAt(i, 'I');
        sb.insert(i + 1, "U");
        i++;
        break;
    case 0x3380:
        sb.setCharAt(i, 'p');
        sb.insert(i + 1, "A");
        i++;
        break;
    case 0x3381:
        sb.setCharAt(i, 'n');
        sb.insert(i + 1, "A");
        i++;
        break;
    case 0x3382:
        sb.setCharAt(i, (char) 0x03BC);
        sb.insert(i + 1, "A");
        i++;
        break;
    case 0x3383:
        sb.setCharAt(i, 'm');
        sb.insert(i + 1, "A");
        i++;
        break;
    case 0x3384:
        sb.setCharAt(i, 'k');
        sb.insert(i + 1, "A");
        i++;
        break;
    case 0x3385:
        sb.setCharAt(i, 'K');
        sb.insert(i + 1, "B");
        i++;
        break;
    case 0x3386:
        sb.setCharAt(i, 'M');
        sb.insert(i + 1, "B");
        i++;
        break;
    case 0x3387:
        sb.setCharAt(i, 'G');
        sb.insert(i + 1, "B");
        i++;
        break;
    case 0x3388:
        sb.setCharAt(i, 'c');
        sb.insert(i + 1, "al");
        i += 2;
        break;
    case 0x3389:
        sb.setCharAt(i, 'k');
        sb.insert(i + 1, "cal");
        i += 3;
        break;
    case 0x338A:
        sb.setCharAt(i, 'p');
        sb.insert(i + 1, "F");
        i++;
        break;
    case 0x338B:
        sb.setCharAt(i, 'n');
        sb.insert(i + 1, "F");
        i++;
        break;
    case 0x338C:
        sb.setCharAt(i, (char) 0x03BC);
        sb.insert(i + 1, "F");
        i++;
        break;
    case 0x338D:
        sb.setCharAt(i, (char) 0x03BC);
        sb.insert(i + 1, "g");
        i++;
        break;
    case 0x338E:
        sb.setCharAt(i, 'm');
        sb.insert(i + 1, "g");
        i++;
        break;
    case 0x338F:
        sb.setCharAt(i, 'k');
        sb.insert(i + 1, "g");
        i++;
        break;
    case 0x3390:
        sb.setCharAt(i, 'H');
        sb.insert(i + 1, "z");
        i++;
        break;
    case 0x3391:
        sb.setCharAt(i, 'k');
        sb.insert(i + 1, "Hz");
        i += 2;
        break;
    case 0x3392:
        sb.setCharAt(i, 'M');
        sb.insert(i + 1, "Hz");
        i += 2;
        break;
    case 0x3393:
        sb.setCharAt(i, 'G');
        sb.insert(i + 1, "Hz");
        i += 2;
        break;
    case 0x3394:
        sb.setCharAt(i, 'T');
        sb.insert(i + 1, "Hz");
        i += 2;
        break;
    case 0x3395:
        sb.setCharAt(i, (char) 0x03BC);
        sb.insert(i + 1, "\u2113");
        i++;
        break;
    case 0x3396:
        sb.setCharAt(i, 'm');
        sb.insert(i + 1, "\u2113");
        i++;
        break;
    case 0x3397:
        sb.setCharAt(i, 'd');
        sb.insert(i + 1, "\u2113");
        i++;
        break;
    case 0x3398:
        sb.setCharAt(i, 'k');
        sb.insert(i + 1, "\u2113");
        i++;
        break;
    case 0x3399:
        sb.setCharAt(i, 'f');
        sb.insert(i + 1, "m");
        i++;
        break;
    case 0x339A:
        sb.setCharAt(i, 'n');
        sb.insert(i + 1, "m");
        i++;
        break;
    case 0x339B:
        sb.setCharAt(i, (char) 0x03BC);
        sb.insert(i + 1, "m");
        i++;
        break;
    case 0x339C:
        sb.setCharAt(i, 'm');
        sb.insert(i + 1, "m");
        i++;
        break;
    case 0x339D:
        sb.setCharAt(i, 'c');
        sb.insert(i + 1, "m");
        i++;
        break;
    case 0x339E:
        sb.setCharAt(i, 'k');
        sb.insert(i + 1, "m");
        i++;
        break;
    case 0x339F:
        sb.setCharAt(i, 'm');
        sb.insert(i + 1, "m\u00B2");
        i += 2;
        break;
    case 0x33A0:
        sb.setCharAt(i, 'c');
        sb.insert(i + 1, "m\u00B2");
        i += 2;
        break;
    case 0x33A1:
        sb.setCharAt(i, 'm');
        sb.insert(i + 1, "\u00B2");
        i++;
        break;
    case 0x33A2:
        sb.setCharAt(i, 'k');
        sb.insert(i + 1, "m\u00B2");
        i += 2;
        break;
    case 0x33A3:
        sb.setCharAt(i, 'm');
        sb.insert(i + 1, "m\u00B3");
        i += 2;
        break;
    case 0x33A4:
        sb.setCharAt(i, 'c');
        sb.insert(i + 1, "m\u00B3");
        i += 2;
        break;
    case 0x33A5:
        sb.setCharAt(i, 'm');
        sb.insert(i + 1, "\u00B3");
        i++;
        break;
    case 0x33A6:
        sb.setCharAt(i, 'k');
        sb.insert(i + 1, "m\u00B3");
        i += 2;
        break;
    case 0x33A7:
        sb.setCharAt(i, 'm');
        sb.insert(i + 1, "/s");
        i += 2;
        break;
    case 0x33A8:
        sb.setCharAt(i, 'm');
        sb.insert(i + 1, "/s\u00B2");
        i += 3;
        break;
    case 0x33A9:
        sb.setCharAt(i, 'P');
        sb.insert(i + 1, "a");
        i++;
        break;
    case 0x33AA:
        sb.setCharAt(i, 'k');
        sb.insert(i + 1, "Pa");
        i += 2;
        break;
    case 0x33AB:
        sb.setCharAt(i, 'M');
        sb.insert(i + 1, "Pa");
        i += 2;
        break;
    case 0x33AC:
        sb.setCharAt(i, 'G');
        sb.insert(i + 1, "Pa");
        i += 2;
        break;
    case 0x33AD:
        sb.setCharAt(i, 'r');
        sb.insert(i + 1, "ad");
        i += 2;
        break;
    case 0x33AE:
        sb.setCharAt(i, 'r');
        sb.insert(i + 1, "ad/s");
        i += 4;
        break;
    case 0x33AF:
        sb.setCharAt(i, 'r');
        sb.insert(i + 1, "ad/s\u00B2");
        i += 5;
        break;
    case 0x33B0:
        sb.setCharAt(i, 'p');
        sb.insert(i + 1, "s");
        i++;
        break;
    case 0x33B1:
        sb.setCharAt(i, 'n');
        sb.insert(i + 1, "s");
        i++;
        break;
    case 0x33B2:
        sb.setCharAt(i, (char) 0x03BC);
        sb.insert(i + 1, "s");
        i++;
        break;
    case 0x33B3:
        sb.setCharAt(i, 'm');
        sb.insert(i + 1, "s");
        i++;
        break;
    case 0x33B4:
        sb.setCharAt(i, 'p');
        sb.insert(i + 1, "V");
        i++;
        break;
    case 0x33B5:
        sb.setCharAt(i, 'n');
        sb.insert(i + 1, "V");
        i++;
        break;
    case 0x33B6:
        sb.setCharAt(i, (char) 0x03BC);
        sb.insert(i + 1, "V");
        i++;
        break;
    case 0x33B7:
        sb.setCharAt(i, 'm');
        sb.insert(i + 1, "V");
        i++;
        break;
    case 0x33B8:
        sb.setCharAt(i, 'k');
        sb.insert(i + 1, "V");
        i++;
        break;
    case 0x33B9:
        sb.setCharAt(i, 'M');
        sb.insert(i + 1, "V");
        i++;
        break;
    case 0x33BA:
        sb.setCharAt(i, 'p');
        sb.insert(i + 1, "W");
        i++;
        break;
    case 0x33BB:
        sb.setCharAt(i, 'n');
        sb.insert(i + 1, "W");
        i++;
        break;
    case 0x33BC:
        sb.setCharAt(i, (char) 0x03BC);
        sb.insert(i + 1, "W");
        i++;
        break;
    case 0x33BD:
        sb.setCharAt(i, 'm');
        sb.insert(i + 1, "W");
        i++;
        break;
    case 0x33BE:
        sb.setCharAt(i, 'k');
        sb.insert(i + 1, "W");
        i++;
        break;
    case 0x33BF:
        sb.setCharAt(i, 'M');
        sb.insert(i + 1, "W");
        i++;
        break;
    case 0x33C0:
        sb.setCharAt(i, 'k');
        sb.insert(i + 1, "\u03A9");
        i++;
        break;
    case 0x33C1:
        sb.setCharAt(i, 'M');
        sb.insert(i + 1, "\u03A9");
        i++;
        break;
    case 0x33C2:
        sb.setCharAt(i, 'a');
        sb.insert(i + 1, ".m.");
        i += 3;
        break;
    case 0x33C3:
        sb.setCharAt(i, 'B');
        sb.insert(i + 1, "q");
        i++;
        break;
    case 0x33C4:
        sb.setCharAt(i, 'c');
        sb.insert(i + 1, "c");
        i++;
        break;
    case 0x33C5:
        sb.setCharAt(i, 'c');
        sb.insert(i + 1, "d");
        i++;
        break;
    case 0x33C6:
        sb.setCharAt(i, 'C');
        sb.insert(i + 1, "/kg");
        i += 3;
        break;
    case 0x33C7:
        sb.setCharAt(i, 'C');
        sb.insert(i + 1, "o.");
        i += 2;
        break;
    case 0x33C8:
        sb.setCharAt(i, 'd');
        sb.insert(i + 1, "B");
        i++;
        break;
    case 0x33C9:
        sb.setCharAt(i, 'G');
        sb.insert(i + 1, "y");
        i++;
        break;
    case 0x33CA:
        sb.setCharAt(i, 'h');
        sb.insert(i + 1, "a");
        i++;
        break;
    case 0x33CB:
        sb.setCharAt(i, 'H');
        sb.insert(i + 1, "P");
        i++;
        break;
    case 0x33CC:
        sb.setCharAt(i, 'i');
        sb.insert(i + 1, "n");
        i++;
        break;
    case 0x33CD:
        sb.setCharAt(i, 'K');
        sb.insert(i + 1, "K");
        i++;
        break;
    case 0x33CE:
        sb.setCharAt(i, 'K');
        sb.insert(i + 1, "M");
        i++;
        break;
    case 0x33CF:
        sb.setCharAt(i, 'K');
        sb.insert(i + 1, "t");
        i++;
        break;
    case 0x33D0:
        sb.setCharAt(i, 'l');
        sb.insert(i + 1, "m");
        i++;
        break;
    case 0x33D1:
        sb.setCharAt(i, 'l');
        sb.insert(i + 1, "n");
        i++;
        break;
    case 0x33D2:
        sb.setCharAt(i, 'l');
        sb.insert(i + 1, "og");
        i += 2;
        break;
    case 0x33D3:
        sb.setCharAt(i, 'l');
        sb.insert(i + 1, "x");
        i++;
        break;
    case 0x33D4:
        sb.setCharAt(i, 'm');
        sb.insert(i + 1, "b");
        i++;
        break;
    case 0x33D5:
        sb.setCharAt(i, 'm');
        sb.insert(i + 1, "il");
        i += 2;
        break;
    case 0x33D6:
        sb.setCharAt(i, 'm');
        sb.insert(i + 1, "ol");
        i += 2;
        break;
    case 0x33D7:
        sb.setCharAt(i, 'p');
        sb.insert(i + 1, "H");
        i++;
        break;
    case 0x33D8:
        sb.setCharAt(i, 'p');
        sb.insert(i + 1, ".m.");
        i += 3;
        break;
    case 0x33D9:
        sb.setCharAt(i, 'P');
        sb.insert(i + 1, "PM");
        i += 2;
        break;
    case 0x33DA:
        sb.setCharAt(i, 'P');
        sb.insert(i + 1, "R");
        i++;
        break;
    case 0x33DB:
        sb.setCharAt(i, 's');
        sb.insert(i + 1, "r");
        i++;
        break;
    case 0x33DC:
        sb.setCharAt(i, 'S');
        sb.insert(i + 1, "v");
        i++;
        break;
    case 0x33DD:
        sb.setCharAt(i, 'W');
        sb.insert(i + 1, "b");
        i++;
        break;
    case 0x33DE:
        sb.setCharAt(i, 'v');
        sb.insert(i + 1, "/m");
        i += 2;
        break;
    case 0x33DF:
        sb.setCharAt(i, 'a');
        sb.insert(i + 1, "/m");
        i += 2;
        break;
    case 0x33FF:
        sb.setCharAt(i, 'g');
        sb.insert(i + 1, "al");
        i += 2;
        break;
String result = sb.toString();
if (text.equals(result)) {
    return text;
return normalizeUnicode(result);
StringnormalizeWord(String word)
normalize Word
try {
    int i;
    Class<?> normalizerClass = Class.forName("java.text.Normalizer");
    Class<?> normalizerFormClass = null;
    Class<?>[] nestedClasses = normalizerClass.getDeclaredClasses();
    for (i = 0; i < nestedClasses.length; i++) {
        Class<?> nestedClass = nestedClasses[i];
        if (nestedClass.getName().equals("java.text.Normalizer$Form")) {
...