List of usage examples for java.lang Character getType
public static int getType(int codePoint)
From source file:Main.java
/** * Indicates whether a character is classified as "Alphabetic" by the Unicode standard. * * @param c//from w w w . j av a 2 s . c o m * the character * @return true if the character is "Alphabetic" */ public static boolean isAlphabetic(int c) { //http://www.unicode.org/Public/UNIDATA/UCD.html#Alphabetic //Generated from: Other_Alphabetic + Lu + Ll + Lt + Lm + Lo + Nl int generalCategory = Character.getType((char) c); switch (generalCategory) { case Character.UPPERCASE_LETTER: //Lu case Character.LOWERCASE_LETTER: //Ll case Character.TITLECASE_LETTER: //Lt case Character.MODIFIER_LETTER: //Lm case Character.OTHER_LETTER: //Lo case Character.LETTER_NUMBER: //Nl return true; default: //TODO if (ch in Other_Alphabetic) return true; (Probably need ICU4J for that) //Other_Alphabetic contains mostly more exotic characters return false; } }
From source file:Main.java
/** * Converts any numbers and punctuation into standard ASCII * @param inputString// w w w.j a v a 2 s.com * @return */ static public String normalizeNumbersAndPunctuation(String inputString) { char[] chars = inputString.toCharArray(); for (int curCharNum = 0; curCharNum < chars.length; curCharNum++) { char curChar = chars[curCharNum]; if (Character.isDigit(curChar)) { int curDigit = Integer.parseInt(inputString.substring(curCharNum, curCharNum + 1)); chars[curCharNum] = Integer.toString(curDigit).charAt(0); } if (Character.getType(curChar) == Character.DASH_PUNCTUATION || curChar == 8722) chars[curCharNum] = '-'; } String returnString = new String(chars); return returnString; }
From source file:org.flowr.utils.NamingStrategy.java
/** * builds an abbreviation based on the given name parts using the "camel cased" words first letter. * * @param names//w ww . j a v a 2s . c om * the name element parts. * @return */ public static String initials(String... names) { StringBuilder b = new StringBuilder(); for (String name : names) { b.append(camelCase(name, CLASSNAME_IGNORE_CHARS, null)); } String[] words = StringUtilsExt.splitByCharacterTypeCamelCase(b.toString()); b = new StringBuilder(); for (String word : words) { char c = word.charAt(0); switch (Character.getType(c)) { case Character.UPPERCASE_LETTER: if (word.length() > 1 && Character.getType(word.charAt(1)) == Character.UPPERCASE_LETTER) { b.append(word); } else { b.append(c); } break; case Character.LOWERCASE_LETTER: b.append(c); break; } } return b.toString(); }
From source file:de.fau.cs.osr.utils.StringUtils.java
public static String escHtml(String text, boolean forAttribute) { // StringEscapeUtils.escapeHtml(in) does not escape '\'' but a lot of // other stuff that doesn't need escaping. if (text == null) return ""; int n = text.length(); StringBuilder sb = new StringBuilder(n * 4 / 3); for (int i = 0; i < n; i++) { char ch = text.charAt(i); switch (ch) { case ' ': case '\n': case '\t': sb.append(ch);/*ww w . j a v a 2s. com*/ break; case '<': sb.append("<"); break; case '>': sb.append(forAttribute ? ">" : ">"); break; case '&': sb.append("&"); break; case '\'': // ' cannot safely be used, see wikipedia sb.append("'"); break; case '"': sb.append(forAttribute ? """ : "\""); break; default: if ((ch >= 0 && ch < 0x20) || (ch == 0xFE)) { hexCharRef(sb, ch); break; } else if (Character.isHighSurrogate(ch)) { ++i; if (i < n) { char ch2 = text.charAt(i); if (Character.isLowSurrogate(ch2)) { int codePoint = Character.toCodePoint(ch, ch2); switch (Character.getType(codePoint)) { case Character.CONTROL: case Character.PRIVATE_USE: case Character.UNASSIGNED: hexCharRef(sb, codePoint); break; default: sb.append(ch); sb.append(ch2); break; } continue; } } } else if (!Character.isLowSurrogate(ch)) { sb.append(ch); continue; } // No low surrogate followed or only low surrogate throw new IllegalArgumentException("String contains isolated surrogates!"); } } return sb.toString(); }
From source file:Main.java
/** * <p>/*from w w w . ja v a2s .com*/ * Splits a String by Character type as returned by * <code>java.lang.Character.getType(char)</code>. Groups of contiguous * characters of the same type are returned as complete tokens, with the * following exception: if <code>camelCase</code> is <code>true</code>, * the character of type <code>Character.UPPERCASE_LETTER</code>, if any, * immediately preceding a token of type * <code>Character.LOWERCASE_LETTER</code> will belong to the following * token rather than to the preceding, if any, * <code>Character.UPPERCASE_LETTER</code> token. * * @param str * the String to split, may be <code>null</code> * @param camelCase * whether to use so-called "camel-case" for letter types * @return an array of parsed Strings, <code>null</code> if null String * input * @since 2.4 */ private static String[] splitByCharacterType(String str, boolean camelCase) { if (str == null) { return null; } if (str.length() == 0) { return new String[0]; } char[] c = str.toCharArray(); List list = new ArrayList(); int tokenStart = 0; int currentType = Character.getType(c[tokenStart]); for (int pos = tokenStart + 1; pos < c.length; pos++) { int type = Character.getType(c[pos]); if (type == currentType) { continue; } if (camelCase && type == Character.LOWERCASE_LETTER && currentType == Character.UPPERCASE_LETTER) { int newTokenStart = pos - 1; if (newTokenStart != tokenStart) { list.add(new String(c, tokenStart, newTokenStart - tokenStart)); tokenStart = newTokenStart; } } else { list.add(new String(c, tokenStart, pos - tokenStart)); tokenStart = pos; } currentType = type; } list.add(new String(c, tokenStart, c.length - tokenStart)); return (String[]) list.toArray(new String[list.size()]); }
From source file:ORG.oclc.os.SRW.Utilities.java
public static String byteArrayToString(byte array[], int offset, int length) { StringBuffer str = new StringBuffer(); StringBuffer alpha = new StringBuffer(); int stopat = length + offset; char c;/*from w ww . j a v a 2s .c o m*/ int i, type; for (i = 1; offset < stopat; offset++, i++) { if ((array[offset] & 0xff) < 16) str.append(" 0"); else str.append(" "); str.append(Integer.toString(array[offset] & 0xff, 16)); c = (char) array[offset]; type = Character.getType(c); // if (Character.isLetterOrDigit(c) || (c > ) if (c < ' ' || c >= 0x7f) alpha.append('.'); else alpha.append(c); if ((i % 16) == 0) { str.append(" " + alpha + newLine); alpha.setLength(0); } } while (i++ % 16 != 1) str.append(" "); offset = 0; str.append(" " + alpha + newLine); str.append(newLine); return str.toString(); }
From source file:org.apache.accumulo.monitor.rest.logs.LogResource.java
private String sanitize(String s) { StringBuilder text = new StringBuilder(); for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); int type = Character.getType(c); boolean notPrintable = type == Character.UNASSIGNED || type == Character.LINE_SEPARATOR || type == Character.NON_SPACING_MARK || type == Character.PRIVATE_USE; text.append(notPrintable ? '?' : c); }//w ww . j a v a 2s . c om return text.toString().replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">"); }
From source file:ru.caffeineim.protocols.icq.tool.Dumper.java
/** * This method filters all non-displayable characters and replace them * with a '.' in the resulting String./*from w w w.j a v a 2 s . c o m*/ * * @param array The receive byte array. * @return The representation of all displayable characters. */ private static String stringTranslation(byte[] array) { String ent = new String(array); String res = new String(); for (int i = 0; i < ent.length(); i++) { if (Character.getType(ent.charAt(i)) == Character.CONTROL) res += "."; else res += ent.charAt(i); } return res; }
From source file:pl.edu.icm.coansys.commons.java.DiacriticsRemover.java
/** * Removes diacritics from a text./* w w w . j av a 2 s . c om*/ * * @param text Text to process. * @return Text without diacritics. */ public static String removeDiacritics(String text) { if (text == null) { return null; } String tmp = Normalizer.normalize(text, Normalizer.Form.NFKD); StringBuilder builder = new StringBuilder(); for (int i = 0; i < tmp.length(); i++) { Character ch = tmp.charAt(i); if (Character.getType(ch) == Character.NON_SPACING_MARK) { continue; } if (lookup.containsKey(ch)) { builder.append(lookup.get(ch)); } else { builder.append(ch); } } return builder.toString(); }
From source file:XmlChars.java
/** * Returns true if the character is an XML "letter". XML Names must start with * Letters or a few other characters, but other characters in names must only * satisfy the <em>isNameChar</em> predicate. * //from w ww . j a v a2s.c o m * @see #isNameChar * @see #isNCNameChar */ public static boolean isLetter(char c) { // [84] Letter ::= BaseChar | Ideographic // [85] BaseChar ::= ... too much to repeat // [86] Ideographic ::= ... too much to repeat // // Optimize the typical case. // if (c >= 'a' && c <= 'z') return true; if (c == '/') return false; if (c >= 'A' && c <= 'Z') return true; // // Since the tables are too ridiculous to use in code, // we're using the footnotes here to drive this test. // switch (Character.getType(c)) { // app. B footnote says these are 'name start' // chars' ... case Character.LOWERCASE_LETTER: // Ll case Character.UPPERCASE_LETTER: // Lu case Character.OTHER_LETTER: // Lo case Character.TITLECASE_LETTER: // Lt case Character.LETTER_NUMBER: // Nl // OK, here we just have some exceptions to check... return !isCompatibilityChar(c) // per "5.14 of Unicode", rule out some combiners && !(c >= 0x20dd && c <= 0x20e0); default: // check for some exceptions: these are "alphabetic" return ((c >= 0x02bb && c <= 0x02c1) || c == 0x0559 || c == 0x06e5 || c == 0x06e6); } }