Example usage for java.lang Character getType

Introduction

In this page you can find the example usage for java.lang Character getType.

Prototype

public static int getType(int codePoint)

Source Link

Document

Returns a value indicating a character's general category.

Usage

From source file:Main.java

/**
 * Indicates whether a character is classified as "Alphabetic" by the Unicode standard.
 *
 * @param c//from   w  w  w  . j  av  a  2 s .  c  o  m
 *     the character
 * @return true if the character is "Alphabetic"
 */
public static boolean isAlphabetic(int c) {
    //http://www.unicode.org/Public/UNIDATA/UCD.html#Alphabetic
    //Generated from: Other_Alphabetic + Lu + Ll + Lt + Lm + Lo + Nl
    int generalCategory = Character.getType((char) c);
    switch (generalCategory) {
    case Character.UPPERCASE_LETTER: //Lu
    case Character.LOWERCASE_LETTER: //Ll
    case Character.TITLECASE_LETTER: //Lt
    case Character.MODIFIER_LETTER: //Lm
    case Character.OTHER_LETTER: //Lo
    case Character.LETTER_NUMBER: //Nl
        return true;
    default:
        //TODO if (ch in Other_Alphabetic) return true; (Probably need ICU4J for that)
        //Other_Alphabetic contains mostly more exotic characters
        return false;
    }
}

From source file:Main.java

/**
 * Converts any numbers and punctuation into standard ASCII
 * @param inputString//  w  w  w.j a  v  a 2 s.com
 * @return
 */
static public String normalizeNumbersAndPunctuation(String inputString) {
    char[] chars = inputString.toCharArray();

    for (int curCharNum = 0; curCharNum < chars.length; curCharNum++) {
        char curChar = chars[curCharNum];
        if (Character.isDigit(curChar)) {
            int curDigit = Integer.parseInt(inputString.substring(curCharNum, curCharNum + 1));
            chars[curCharNum] = Integer.toString(curDigit).charAt(0);
        }
        if (Character.getType(curChar) == Character.DASH_PUNCTUATION || curChar == 8722)
            chars[curCharNum] = '-';
    }
    String returnString = new String(chars);
    return returnString;
}

From source file:org.flowr.utils.NamingStrategy.java

/**
 * builds an abbreviation based on the given name parts using the "camel cased" words first letter.
 *
 * @param names//w ww .  j  a  v a  2s .  c om
 *        the name element parts.
 * @return
 */
public static String initials(String... names) {
    StringBuilder b = new StringBuilder();
    for (String name : names) {
        b.append(camelCase(name, CLASSNAME_IGNORE_CHARS, null));
    }
    String[] words = StringUtilsExt.splitByCharacterTypeCamelCase(b.toString());
    b = new StringBuilder();
    for (String word : words) {
        char c = word.charAt(0);
        switch (Character.getType(c)) {
        case Character.UPPERCASE_LETTER:
            if (word.length() > 1 && Character.getType(word.charAt(1)) == Character.UPPERCASE_LETTER) {
                b.append(word);
            } else {
                b.append(c);
            }
            break;
        case Character.LOWERCASE_LETTER:
            b.append(c);
            break;
        }
    }
    return b.toString();
}

From source file:de.fau.cs.osr.utils.StringUtils.java

public static String escHtml(String text, boolean forAttribute) {
    // StringEscapeUtils.escapeHtml(in) does not escape '\'' but a lot of 
    // other stuff that doesn't need escaping.

    if (text == null)
        return "";

    int n = text.length();
    StringBuilder sb = new StringBuilder(n * 4 / 3);
    for (int i = 0; i < n; i++) {
        char ch = text.charAt(i);
        switch (ch) {
        case ' ':
        case '\n':
        case '\t':
            sb.append(ch);/*ww w . j a v  a  2s.  com*/
            break;
        case '<':
            sb.append("&lt;");
            break;
        case '>':
            sb.append(forAttribute ? "&gt;" : ">");
            break;
        case '&':
            sb.append("&amp;");
            break;
        case '\'':
            // &apos; cannot safely be used, see wikipedia
            sb.append("&#39;");
            break;
        case '"':
            sb.append(forAttribute ? "&quot;" : "\"");
            break;
        default:
            if ((ch >= 0 && ch < 0x20) || (ch == 0xFE)) {
                hexCharRef(sb, ch);
                break;
            } else if (Character.isHighSurrogate(ch)) {
                ++i;
                if (i < n) {
                    char ch2 = text.charAt(i);
                    if (Character.isLowSurrogate(ch2)) {
                        int codePoint = Character.toCodePoint(ch, ch2);
                        switch (Character.getType(codePoint)) {
                        case Character.CONTROL:
                        case Character.PRIVATE_USE:
                        case Character.UNASSIGNED:
                            hexCharRef(sb, codePoint);
                            break;

                        default:
                            sb.append(ch);
                            sb.append(ch2);
                            break;
                        }

                        continue;
                    }
                }
            } else if (!Character.isLowSurrogate(ch)) {
                sb.append(ch);
                continue;
            }

            // No low surrogate followed or only low surrogate
            throw new IllegalArgumentException("String contains isolated surrogates!");
        }
    }

    return sb.toString();
}

From source file:Main.java

/**
 * <p>/*from   w  w  w . ja  v a2s  .com*/
 * Splits a String by Character type as returned by
 * <code>java.lang.Character.getType(char)</code>. Groups of contiguous
 * characters of the same type are returned as complete tokens, with the
 * following exception: if <code>camelCase</code> is <code>true</code>,
 * the character of type <code>Character.UPPERCASE_LETTER</code>, if any,
 * immediately preceding a token of type
 * <code>Character.LOWERCASE_LETTER</code> will belong to the following
 * token rather than to the preceding, if any,
 * <code>Character.UPPERCASE_LETTER</code> token.
 * 
 * @param str
 *          the String to split, may be <code>null</code>
 * @param camelCase
 *          whether to use so-called "camel-case" for letter types
 * @return an array of parsed Strings, <code>null</code> if null String
 *         input
 * @since 2.4
 */
private static String[] splitByCharacterType(String str, boolean camelCase) {
    if (str == null) {
        return null;
    }
    if (str.length() == 0) {
        return new String[0];
    }
    char[] c = str.toCharArray();
    List list = new ArrayList();
    int tokenStart = 0;
    int currentType = Character.getType(c[tokenStart]);
    for (int pos = tokenStart + 1; pos < c.length; pos++) {
        int type = Character.getType(c[pos]);
        if (type == currentType) {
            continue;
        }
        if (camelCase && type == Character.LOWERCASE_LETTER && currentType == Character.UPPERCASE_LETTER) {
            int newTokenStart = pos - 1;
            if (newTokenStart != tokenStart) {
                list.add(new String(c, tokenStart, newTokenStart - tokenStart));
                tokenStart = newTokenStart;
            }
        } else {
            list.add(new String(c, tokenStart, pos - tokenStart));
            tokenStart = pos;
        }
        currentType = type;
    }
    list.add(new String(c, tokenStart, c.length - tokenStart));
    return (String[]) list.toArray(new String[list.size()]);
}

From source file:ORG.oclc.os.SRW.Utilities.java

public static String byteArrayToString(byte array[], int offset, int length) {
    StringBuffer str = new StringBuffer();
    StringBuffer alpha = new StringBuffer();
    int stopat = length + offset;
    char c;/*from  w ww . j a v a  2s .c  o  m*/
    int i, type;

    for (i = 1; offset < stopat; offset++, i++) {
        if ((array[offset] & 0xff) < 16)
            str.append(" 0");
        else
            str.append(" ");
        str.append(Integer.toString(array[offset] & 0xff, 16));

        c = (char) array[offset];
        type = Character.getType(c);

        //      if (Character.isLetterOrDigit(c) || (c > )
        if (c < ' ' || c >= 0x7f)
            alpha.append('.');
        else
            alpha.append(c);

        if ((i % 16) == 0) {
            str.append("  " + alpha + newLine);
            alpha.setLength(0);
        }
    }
    while (i++ % 16 != 1)
        str.append("   ");
    offset = 0;

    str.append("  " + alpha + newLine);
    str.append(newLine);

    return str.toString();
}

From source file:org.apache.accumulo.monitor.rest.logs.LogResource.java

private String sanitize(String s) {
    StringBuilder text = new StringBuilder();
    for (int i = 0; i < s.length(); i++) {
        char c = s.charAt(i);
        int type = Character.getType(c);
        boolean notPrintable = type == Character.UNASSIGNED || type == Character.LINE_SEPARATOR
                || type == Character.NON_SPACING_MARK || type == Character.PRIVATE_USE;
        text.append(notPrintable ? '?' : c);
    }//w ww .  j a  v a  2s .  c om
    return text.toString().replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;");
}

From source file:ru.caffeineim.protocols.icq.tool.Dumper.java

/**
 * This method filters all non-displayable characters and replace them
 * with a '.' in the resulting String./*from   w w w.j  a  v  a  2 s  .  c o m*/
 *
 * @param array The receive byte array.
 * @return The representation of all displayable characters.
 */
private static String stringTranslation(byte[] array) {
    String ent = new String(array);
    String res = new String();

    for (int i = 0; i < ent.length(); i++) {
        if (Character.getType(ent.charAt(i)) == Character.CONTROL)
            res += ".";
        else
            res += ent.charAt(i);
    }

    return res;
}

From source file:pl.edu.icm.coansys.commons.java.DiacriticsRemover.java

/**
 * Removes diacritics from a text./* w  w  w .  j  av  a 2  s  .  c om*/
 *
 * @param text Text to process.
 * @return Text without diacritics.
 */
public static String removeDiacritics(String text) {
    if (text == null) {
        return null;
    }

    String tmp = Normalizer.normalize(text, Normalizer.Form.NFKD);

    StringBuilder builder = new StringBuilder();
    for (int i = 0; i < tmp.length(); i++) {
        Character ch = tmp.charAt(i);
        if (Character.getType(ch) == Character.NON_SPACING_MARK) {
            continue;
        }

        if (lookup.containsKey(ch)) {
            builder.append(lookup.get(ch));
        } else {
            builder.append(ch);
        }
    }

    return builder.toString();
}

From source file:XmlChars.java

/**
 * Returns true if the character is an XML "letter". XML Names must start with
 * Letters or a few other characters, but other characters in names must only
 * satisfy the <em>isNameChar</em> predicate.
 * //from  w ww  . j  a v  a2s.c o m
 * @see #isNameChar
 * @see #isNCNameChar
 */
public static boolean isLetter(char c) {
    // [84] Letter ::= BaseChar | Ideographic
    // [85] BaseChar ::= ... too much to repeat
    // [86] Ideographic ::= ... too much to repeat

    //
    // Optimize the typical case.
    //
    if (c >= 'a' && c <= 'z')
        return true;
    if (c == '/')
        return false;
    if (c >= 'A' && c <= 'Z')
        return true;

    //
    // Since the tables are too ridiculous to use in code,
    // we're using the footnotes here to drive this test.
    //
    switch (Character.getType(c)) {
    // app. B footnote says these are 'name start'
    // chars' ...
    case Character.LOWERCASE_LETTER: // Ll
    case Character.UPPERCASE_LETTER: // Lu
    case Character.OTHER_LETTER: // Lo
    case Character.TITLECASE_LETTER: // Lt
    case Character.LETTER_NUMBER: // Nl

        // OK, here we just have some exceptions to check...
        return !isCompatibilityChar(c)
                // per "5.14 of Unicode", rule out some combiners
                && !(c >= 0x20dd && c <= 0x20e0);

    default:
        // check for some exceptions: these are "alphabetic"
        return ((c >= 0x02bb && c <= 0x02c1) || c == 0x0559 || c == 0x06e5 || c == 0x06e6);
    }
}