List of usage examples for java.lang Character MODIFIER_LETTER
byte MODIFIER_LETTER
To view the source code for java.lang Character MODIFIER_LETTER.
Click Source Link
From source file:Main.java
public static void main(String[] args) { for (int ch = Character.MIN_VALUE; ch < Character.MAX_VALUE; ch++) { if (Character.MODIFIER_LETTER == Character.getType(ch)) { System.out.println((char) ch); }/*from w w w .j av a 2 s . com*/ } }
From source file:Main.java
/** * Indicates whether a character is classified as "Alphabetic" by the Unicode standard. * * @param c/* w w w .ja va 2s. c om*/ * the character * @return true if the character is "Alphabetic" */ public static boolean isAlphabetic(int c) { //http://www.unicode.org/Public/UNIDATA/UCD.html#Alphabetic //Generated from: Other_Alphabetic + Lu + Ll + Lt + Lm + Lo + Nl int generalCategory = Character.getType((char) c); switch (generalCategory) { case Character.UPPERCASE_LETTER: //Lu case Character.LOWERCASE_LETTER: //Ll case Character.TITLECASE_LETTER: //Lt case Character.MODIFIER_LETTER: //Lm case Character.OTHER_LETTER: //Lo case Character.LETTER_NUMBER: //Nl return true; default: //TODO if (ch in Other_Alphabetic) return true; (Probably need ICU4J for that) //Other_Alphabetic contains mostly more exotic characters return false; } }
From source file:gov.va.vinci.leo.ae.ExampleWhitespaceTokenizer.java
/** * Given a character c return the type definition from the * list of public static type definitions in this class. * * @param c/* w w w.j a v a2s . c o m*/ * @return type definition for the character c */ private static int characterType(char c) { switch (Character.getType(c)) { //letters case Character.UPPERCASE_LETTER: case Character.LOWERCASE_LETTER: case Character.TITLECASE_LETTER: case Character.MODIFIER_LETTER: case Character.OTHER_LETTER: case Character.NON_SPACING_MARK: case Character.ENCLOSING_MARK: case Character.COMBINING_SPACING_MARK: case Character.PRIVATE_USE: case Character.SURROGATE: case Character.MODIFIER_SYMBOL: return TK_LETTER; //numbers case Character.DECIMAL_DIGIT_NUMBER: case Character.LETTER_NUMBER: case Character.OTHER_NUMBER: return TK_NUMBER; //Regular Whitespace case Character.SPACE_SEPARATOR: return TK_WHITESPACE; //Punctuation case Character.DASH_PUNCTUATION: case Character.START_PUNCTUATION: case Character.END_PUNCTUATION: case Character.OTHER_PUNCTUATION: return TK_PUNCTUATION; //Simple NewLine case Character.LINE_SEPARATOR: case Character.PARAGRAPH_SEPARATOR: return TK_NEWLINE; //Other types of "control" characters case Character.CONTROL: if (c == '\n' || c == '\r') return TK_NEWLINE; if (Character.isWhitespace(c)) //Tab char is a "Control" character return TK_WHITESPACE; return TK_CONTROL; default: if (Character.isWhitespace(c)) { return TK_WHITESPACE; } //if return TK_UNKNOWN; }//switch }
From source file:XmlChars.java
private static boolean isLetter2(char c) { // [84] Letter ::= BaseChar | Ideographic // [85] BaseChar ::= ... too much to repeat // [86] Ideographic ::= ... too much to repeat // [87] CombiningChar ::= ... too much to repeat ////from ww w . j a v a 2 s .com // Optimize the typical case. // if (c >= 'a' && c <= 'z') return true; if (c == '>') return false; if (c >= 'A' && c <= 'Z') return true; // // Since the tables are too ridiculous to use in code, // we're using the footnotes here to drive this test. // switch (Character.getType(c)) { // app. B footnote says these are 'name start' // chars' ... case Character.LOWERCASE_LETTER: // Ll case Character.UPPERCASE_LETTER: // Lu case Character.OTHER_LETTER: // Lo case Character.TITLECASE_LETTER: // Lt case Character.LETTER_NUMBER: // Nl // ... and these are name characters 'other // than name start characters' case Character.COMBINING_SPACING_MARK: // Mc case Character.ENCLOSING_MARK: // Me case Character.NON_SPACING_MARK: // Mn case Character.MODIFIER_LETTER: // Lm case Character.DECIMAL_DIGIT_NUMBER: // Nd // OK, here we just have some exceptions to check... return !isCompatibilityChar(c) // per "5.14 of Unicode", rule out some combiners && !(c >= 0x20dd && c <= 0x20e0); default: // added a character ... return c == 0x0387; } }
From source file:marytts.util.string.StringUtils.java
/** * Determine whether the given codepoint is either a letter or * a modifier according to the Unicode standard. More precisely, * this returns true if codepoint belongs to one of the following categories * as defined at http://unicode.org/Public/UNIDATA/UCD.html#General_Category_Values: * <ul>// ww w. j a v a 2 s.c o m * <li>Lu Letter, Uppercase</li> * <li>Ll Letter, Lowercase</li> * <li>Lt Letter, Titlecase</li> * <li>Lm Letter, Modifier</li> * <li>Lo Letter, Other</li> * <li>Mn Mark, Nonspacing</li> * <li>Mc Mark, Spacing Combining</li> * <li>Me Mark, Enclosing</li> * </ul> * Whether a given character is associated with this category can be looked up * at http://unicode.org/Public/UNIDATA/UnicodeData.txt * @param codePoint the unicode codepoint as determined e.g. by String.codePointAt(). * @return true if the above condition is met, false otherwise */ public static boolean isLetterOrModifier(int codePoint) { int type = Character.getType(codePoint); return type == Character.UPPERCASE_LETTER || type == Character.LOWERCASE_LETTER || type == Character.TITLECASE_LETTER || type == Character.MODIFIER_LETTER || type == Character.OTHER_LETTER || type == Character.NON_SPACING_MARK || type == Character.COMBINING_SPACING_MARK || type == Character.ENCLOSING_MARK; }
From source file:org.apache.orc.impl.mask.RedactMaskFactory.java
/** * Given a UTF code point, find the replacement codepoint * @param codepoint a UTF character/*from ww w .ja va 2s .c o m*/ * @return the replacement codepoint */ int getReplacement(int codepoint) { switch (Character.getType(codepoint)) { case Character.UPPERCASE_LETTER: return UPPPER_REPLACEMENT; case Character.LOWERCASE_LETTER: return LOWER_REPLACEMENT; case Character.TITLECASE_LETTER: case Character.MODIFIER_LETTER: case Character.OTHER_LETTER: return OTHER_LETTER_REPLACEMENT; case Character.NON_SPACING_MARK: case Character.ENCLOSING_MARK: case Character.COMBINING_SPACING_MARK: return MARK_REPLACEMENT; case Character.DECIMAL_DIGIT_NUMBER: return DIGIT_CP_REPLACEMENT; case Character.LETTER_NUMBER: case Character.OTHER_NUMBER: return OTHER_NUMBER_REPLACEMENT; case Character.SPACE_SEPARATOR: case Character.LINE_SEPARATOR: case Character.PARAGRAPH_SEPARATOR: return SEPARATOR_REPLACEMENT; case Character.MATH_SYMBOL: case Character.CURRENCY_SYMBOL: case Character.MODIFIER_SYMBOL: case Character.OTHER_SYMBOL: return SYMBOL_REPLACEMENT; case Character.DASH_PUNCTUATION: case Character.START_PUNCTUATION: case Character.END_PUNCTUATION: case Character.CONNECTOR_PUNCTUATION: case Character.OTHER_PUNCTUATION: return PUNCTUATION_REPLACEMENT; default: return OTHER_REPLACEMENT; } }
From source file:org.apache.pdfbox.text.TextPosition.java
/** * @return True if the current character is a diacritic char. *//*from w w w . j ava 2 s . c o m*/ public boolean isDiacritic() { String text = this.getUnicode(); if (text.length() != 1) { return false; } int type = Character.getType(text.charAt(0)); return type == Character.NON_SPACING_MARK || type == Character.MODIFIER_SYMBOL || type == Character.MODIFIER_LETTER; }