List of usage examples for java.lang Character getType
public static int getType(int codePoint)
From source file:pl.edu.icm.coansys.commons.java.DiacriticsRemover.java
/** * Generates a sort key for a given text. This key is useful in environments * where only basic Latin characters are reliably sorted (for example, a * RDBMS with unknown collation settings). * * @param text Text to process./*from ww w . java2 s .com*/ * @param idempotent Whether the conversion should be idempotent. This is * guaranteed to be true: * <code>alphaSortable(s, true).equals(alphaSortable(alphaSortable(s, true), true)</code>, * while this is not necessarily true: * <code>alphaSortable(s, false).equals(alphaSortable(alphaSortable(s, false), false)</code>. * @return */ public static String alphaSortable(String text, boolean idempotent) { if (text == null) { return null; } if (idempotent && text.startsWith(MAGIC)) { return text; } String tmp = text.toLowerCase(Locale.ENGLISH); tmp = Normalizer.normalize(tmp, Normalizer.Form.NFKD); StringBuilder builder = new StringBuilder(); if (idempotent) { builder.append(MAGIC); } boolean wasSpaceSeparator = false; for (int i = 0; i < tmp.length(); i++) { Character ch = tmp.charAt(i); if (!ArrayUtils.contains(INTERESTING_TYPES, Character.getType(ch)) && !ArrayUtils.contains(INTERESTING_CHARACTERS, ch)) { continue; } String s; // TODO quick fix of mantis 3231 if (isSpaceSeparator(ch)) { if (wasSpaceSeparator) { continue; } wasSpaceSeparator = true; } else { wasSpaceSeparator = false; } if (alphaSortableMapping.containsKey(ch)) { s = alphaSortableMapping.get(ch); } else if (lookup.containsKey(ch)) { s = lookup.get(ch); } else { s = ch.toString(); } for (int j = 0; j < s.length(); j++) { Character c = s.charAt(j); // TODO Very ugly workaround of the problem described in 0002643 if (ArrayUtils.contains(INTERESTING_CHARACTERS, c)) { builder.append(c); } else { builder.append(StringUtils.leftPad(Integer.toHexString(c.charValue()), 4, '0')); } } } return builder.toString(); }
From source file:info.magnolia.cms.taglibs.util.BaseImageTag.java
/** * Replace any special characters that are not letters or numbers with a replacement string. The two exceptions are * '-' and '_', which are allowed./*from w w w . j a v a 2 s . c om*/ */ public String convertToSimpleString(String string) { final StringBuffer result = new StringBuffer(); final StringCharacterIterator iterator = new StringCharacterIterator(string); char character = iterator.current(); while (character != CharacterIterator.DONE) { int charType = Character.getType(character); if (charType == Character.SPACE_SEPARATOR) { result.append("-"); } else if ((charType != Character.UPPERCASE_LETTER) && (charType != Character.LOWERCASE_LETTER) && (charType != Character.DECIMAL_DIGIT_NUMBER) && (charType != Character.CONNECTOR_PUNCTUATION) && (charType != Character.DASH_PUNCTUATION)) { result.append("u" + (int) character); } else { // the char is not a special one // add it to the result as is result.append(character); } character = iterator.next(); } return result.toString(); }
From source file:XmlChars.java
/** * Returns true if the character is an XML "letter". XML Names must * start with Letters or a few other characters, but other characters * in names must only satisfy the <em>isNameChar</em> predicate. * * @see #isNameChar/*w w w.j av a 2s . com*/ * @see #isNCNameChar */ public static boolean isLetter(char c) { // [84] Letter ::= BaseChar | Ideographic // [85] BaseChar ::= ... too much to repeat // [86] Ideographic ::= ... too much to repeat // // Optimize the typical case. // if (c >= 'a' && c <= 'z') return true; if (c == '/') return false; if (c >= 'A' && c <= 'Z') return true; // // Since the tables are too ridiculous to use in code, // we're using the footnotes here to drive this test. // switch (Character.getType(c)) { // app. B footnote says these are 'name start' // chars' ... case Character.LOWERCASE_LETTER: // Ll case Character.UPPERCASE_LETTER: // Lu case Character.OTHER_LETTER: // Lo case Character.TITLECASE_LETTER: // Lt case Character.LETTER_NUMBER: // Nl // OK, here we just have some exceptions to check... return !isCompatibilityChar(c) // per "5.14 of Unicode", rule out some combiners && !(c >= 0x20dd && c <= 0x20e0); default: // check for some exceptions: these are "alphabetic" return ((c >= 0x02bb && c <= 0x02c1) || c == 0x0559 || c == 0x06e5 || c == 0x06e6); } }
From source file:pl.edu.icm.coansys.commons.java.DiacriticsRemover.java
private static boolean isSpaceSeparator(char ch) { return Character.SPACE_SEPARATOR == Character.getType(ch); }
From source file:jef.tools.string.CharUtils.java
/** * ??//from w ww . j a va 2 s .co m * @param c * @return */ public static boolean isPunctuation(char c) { int type = Character.getType(c); return type >= 20 && type <= 25; }
From source file:org.mule.transport.legstar.tcp.LegstarTcpSocketFactory.java
/** * Expecting an ACK reply from the socket server. * @param socket the opened socket/* ww w . j a v a 2 s . co m*/ * @throws IOException if ACK is not received */ private void receiveAck(final Socket socket) throws IOException { byte[] response = read(socket, MAX_PROT_REPLY_LEN); if (response == null) { throw new IOException(I18N.noResponseFromHostMessage().getMessage()); } String ackString = (new String(response, HostCodec.HEADER_CODE_PAGE)).trim(); if (LOG.isDebugEnabled()) { LOG.debug("Socket server reply is: " + ackString); } /* If this is not a valid ACK, it could be an error report*/ if (REPLY_ACK_MSG_EC.compareTo(ackString.substring(0, REPLY_ACK_MSG_EC.length())) != 0) { /* Sanity check for characters being displayable. We expect * the host error reply to start with an error code in * uppercase characters. */ if (Character.getType(ackString.charAt(0)) == Character.UPPERCASE_LETTER) { throw (new IOException(ackString)); } else { throw (new IOException(I18N.unrecognizedResponseFromHostMessage().getMessage())); } } }
From source file:ome.services.blitz.test.utests.FilePathRestrictionsTest.java
/** * Test that two complex sets of rules combined as expected. * (On a rainy day this test could be broken up into several smaller tests.) *//*from www . j av a 2 s . com*/ @Test public void testCombineRules() { /* these variables define the X set of rules to combine */ final SetMultimap<Integer, Integer> transformationMatrixX = HashMultimap.create(); final Set<String> unsafePrefixesX = new HashSet<String>(); final Set<String> unsafeSuffixesX = new HashSet<String>(); final Set<String> unsafeNamesX = new HashSet<String>(); final Set<Character> safeCharactersX = new HashSet<Character>(); /* these variables define the Y set of rules to combine */ final SetMultimap<Integer, Integer> transformationMatrixY = HashMultimap.create(); final Set<String> unsafePrefixesY = new HashSet<String>(); final Set<String> unsafeSuffixesY = new HashSet<String>(); final Set<String> unsafeNamesY = new HashSet<String>(); final Set<Character> safeCharactersY = new HashSet<Character>(); /* these variables define the expected result of combining X and Y */ final SetMultimap<Integer, Integer> transformationMatrixXY = HashMultimap.create(); final Set<String> unsafePrefixesXY = new HashSet<String>(); final Set<String> unsafeSuffixesXY = new HashSet<String>(); final Set<String> unsafeNamesXY = new HashSet<String>(); final Set<Character> safeCharactersXY = new HashSet<Character>(); /* automatically map control characters to the safe characters; * we will remove and replace any that are to be tested specially */ for (int codePoint = 0; codePoint < 0x100; codePoint++) { if (Character.getType(codePoint) == Character.CONTROL) { transformationMatrixXY.put(codePoint, 65); } } /* choose four control characters and remove them from the transformation matrix */ final Iterator<Integer> controlCodePointIterator = transformationMatrixXY.keySet().iterator(); final int controlCharacterP = controlCodePointIterator.next(); final int controlCharacterQ = controlCodePointIterator.next(); final int controlCharacterR = controlCodePointIterator.next(); final int controlCharacterS = controlCodePointIterator.next(); transformationMatrixXY.removeAll(controlCharacterP); transformationMatrixXY.removeAll(controlCharacterQ); transformationMatrixXY.removeAll(controlCharacterR); transformationMatrixXY.removeAll(controlCharacterS); /* set up test case for combining control character mappings */ transformationMatrixX.put(controlCharacterP, 65); transformationMatrixX.put(controlCharacterP, 67); transformationMatrixX.put(controlCharacterQ, 65); transformationMatrixX.put(controlCharacterQ, 66); transformationMatrixX.put(controlCharacterR, 66); transformationMatrixY.put(controlCharacterQ, 66); transformationMatrixY.put(controlCharacterR, 66); transformationMatrixY.put(controlCharacterS, 68); transformationMatrixXY.put(controlCharacterP, 65); transformationMatrixXY.put(controlCharacterP, 67); transformationMatrixXY.put(controlCharacterQ, 66); transformationMatrixXY.put(controlCharacterR, 66); transformationMatrixXY.put(controlCharacterS, 68); /* choose four non-control characters and remove them from the transformation matrix */ int[] normalCodePoints = new int[4]; int index = 0; int codePoint = 0; while (index < normalCodePoints.length) { if (Character.getType(codePoint) != Character.CONTROL) { normalCodePoints[index++] = codePoint; transformationMatrixXY.removeAll(codePoint); } codePoint++; } int normalCharacterP = normalCodePoints[0]; int normalCharacterQ = normalCodePoints[1]; int normalCharacterR = normalCodePoints[2]; int normalCharacterS = normalCodePoints[3]; /* set up test case for combining non-control character mappings */ transformationMatrixX.put(normalCharacterP, 65); transformationMatrixX.put(normalCharacterP, 67); transformationMatrixX.put(normalCharacterQ, 65); transformationMatrixX.put(normalCharacterQ, 66); transformationMatrixX.put(normalCharacterR, 66); transformationMatrixY.put(normalCharacterQ, 66); transformationMatrixY.put(normalCharacterR, 66); transformationMatrixY.put(normalCharacterS, 68); transformationMatrixXY.put(normalCharacterP, 65); transformationMatrixXY.put(normalCharacterP, 67); transformationMatrixXY.put(normalCharacterQ, 66); transformationMatrixXY.put(normalCharacterR, 66); transformationMatrixXY.put(normalCharacterS, 68); /* set up test cases for combining proscribed strings */ unsafePrefixesX.add("XP"); unsafePrefixesX.add("YP"); unsafePrefixesY.add("YP"); unsafePrefixesY.add("ZP"); unsafePrefixesXY.add("XP"); unsafePrefixesXY.add("YP"); unsafePrefixesXY.add("ZP"); unsafeSuffixesX.add("XS"); unsafeSuffixesX.add("YS"); unsafeSuffixesY.add("YS"); unsafeSuffixesY.add("ZS"); unsafeSuffixesXY.add("XS"); unsafeSuffixesXY.add("YS"); unsafeSuffixesXY.add("ZS"); unsafeNamesX.add("XN"); unsafeNamesX.add("YN"); unsafeNamesY.add("YN"); unsafeNamesY.add("ZN"); unsafeNamesXY.add("XN"); unsafeNamesXY.add("YN"); unsafeNamesXY.add("ZN"); /* set up test case for combining safe characters */ safeCharactersX.add('A'); safeCharactersX.add('B'); safeCharactersY.add('A'); safeCharactersXY.add('A'); /* perform the combination */ final FilePathRestrictions rulesX = new FilePathRestrictions(transformationMatrixX, unsafePrefixesX, unsafeSuffixesX, unsafeNamesX, safeCharactersX); final FilePathRestrictions rulesY = new FilePathRestrictions(transformationMatrixY, unsafePrefixesY, unsafeSuffixesY, unsafeNamesY, safeCharactersY); final FilePathRestrictions rulesXY = FilePathRestrictions.combineFilePathRestrictions(rulesX, rulesY); /* test that the combination is as expected in all respects */ Assert.assertTrue(CollectionUtils.isEqualCollection(rulesXY.safeCharacters, safeCharactersXY)); Assert.assertTrue(CollectionUtils.isEqualCollection(rulesXY.unsafePrefixes, unsafePrefixesXY)); Assert.assertTrue(CollectionUtils.isEqualCollection(rulesXY.unsafeSuffixes, unsafeSuffixesXY)); Assert.assertTrue(CollectionUtils.isEqualCollection(rulesXY.unsafeNames, unsafeNamesXY)); assertEqualMultimaps(rulesXY.transformationMatrix, transformationMatrixXY); /* given a mapping choice, prefer the safe character */ Assert.assertEquals((int) rulesXY.transformationMap.get(controlCharacterP), 65); Assert.assertEquals((int) rulesXY.transformationMap.get(normalCharacterP), 65); }
From source file:CharUtils.java
/** * True if character is punctuation.//from w w w.j ava 2 s . co m */ public static boolean isPunctuation(char ch) { return punctuationSet.contains(new Integer(Character.getType(ch))) || (ch == '`') || (ch == UNKNOWN_PUNC); }
From source file:CharUtils.java
/** * True if character is symbol./*ww w . j av a 2 s . c o m*/ */ public static boolean isSymbol(char ch) { return symbolSet.contains(new Integer(Character.getType(ch))); }
From source file:gov.va.vinci.leo.ae.ExampleWhitespaceTokenizer.java
/** * Given a character c return the type definition from the * list of public static type definitions in this class. * * @param c//from ww w . ja va 2 s . c om * @return type definition for the character c */ private static int characterType(char c) { switch (Character.getType(c)) { //letters case Character.UPPERCASE_LETTER: case Character.LOWERCASE_LETTER: case Character.TITLECASE_LETTER: case Character.MODIFIER_LETTER: case Character.OTHER_LETTER: case Character.NON_SPACING_MARK: case Character.ENCLOSING_MARK: case Character.COMBINING_SPACING_MARK: case Character.PRIVATE_USE: case Character.SURROGATE: case Character.MODIFIER_SYMBOL: return TK_LETTER; //numbers case Character.DECIMAL_DIGIT_NUMBER: case Character.LETTER_NUMBER: case Character.OTHER_NUMBER: return TK_NUMBER; //Regular Whitespace case Character.SPACE_SEPARATOR: return TK_WHITESPACE; //Punctuation case Character.DASH_PUNCTUATION: case Character.START_PUNCTUATION: case Character.END_PUNCTUATION: case Character.OTHER_PUNCTUATION: return TK_PUNCTUATION; //Simple NewLine case Character.LINE_SEPARATOR: case Character.PARAGRAPH_SEPARATOR: return TK_NEWLINE; //Other types of "control" characters case Character.CONTROL: if (c == '\n' || c == '\r') return TK_NEWLINE; if (Character.isWhitespace(c)) //Tab char is a "Control" character return TK_WHITESPACE; return TK_CONTROL; default: if (Character.isWhitespace(c)) { return TK_WHITESPACE; } //if return TK_UNKNOWN; }//switch }