List of usage examples for java.lang Character charCount
public static int charCount(int codePoint)
From source file:Main.java
public static void main(String[] args) { int cp = 0x12345; int res = Character.charCount(cp); String str1 = "It is not a valid supplementary character"; String str2 = "It is a valid supplementary character"; if (res == 1) { System.out.println(str1); } else if (res == 2) { System.out.println(str2); }/*www . j a va 2 s . c om*/ }
From source file:Main.java
public static final String filterUCS4(String str) { if (TextUtils.isEmpty(str)) { return str; }// w ww . j av a 2 s .c o m if (str.codePointCount(0, str.length()) == str.length()) { return str; } StringBuilder sb = new StringBuilder(); int index = 0; while (index < str.length()) { int codePoint = str.codePointAt(index); index += Character.charCount(codePoint); if (Character.isSupplementaryCodePoint(codePoint)) { continue; } sb.appendCodePoint(codePoint); } return sb.toString(); }
From source file:Main.java
public static String $$insertWordBreaks(String value, int maxCharsBetweenWordBreaks) { StringBuilder result = new StringBuilder(); // These variables keep track of important state while looping through the string below. boolean isInTag = false; // whether we're inside an HTML tag boolean isMaybeInEntity = false; // whether we might be inside an HTML entity int numCharsWithoutBreak = 0; // number of characters since the last word break for (int codePoint, i = 0; i < value.length(); i += Character.charCount(codePoint)) { codePoint = value.codePointAt(i); // If hit maxCharsBetweenWordBreaks, and next char is not a space, then add <wbr>. if (numCharsWithoutBreak >= maxCharsBetweenWordBreaks && codePoint != ' ') { result.append("<wbr>"); numCharsWithoutBreak = 0;/*from w ww. j av a 2 s . c o m*/ } if (isInTag) { // If inside an HTML tag and we see '>', it's the end of the tag. if (codePoint == '>') { isInTag = false; } } else if (isMaybeInEntity) { switch (codePoint) { // If maybe inside an entity and we see ';', it's the end of the entity. The entity // that just ended counts as one char, so increment numCharsWithoutBreak. case ';': isMaybeInEntity = false; ++numCharsWithoutBreak; break; // If maybe inside an entity and we see '<', we weren't actually in an entity. But // now we're inside an HTML tag. case '<': isMaybeInEntity = false; isInTag = true; break; // If maybe inside an entity and we see ' ', we weren't actually in an entity. Just // correct the state and reset the numCharsWithoutBreak since we just saw a space. case ' ': isMaybeInEntity = false; numCharsWithoutBreak = 0; break; } } else { // !isInTag && !isInEntity switch (codePoint) { // When not within a tag or an entity and we see '<', we're now inside an HTML tag. case '<': isInTag = true; break; // When not within a tag or an entity and we see '&', we might be inside an entity. case '&': isMaybeInEntity = true; break; // When we see a space, reset the numCharsWithoutBreak count. case ' ': numCharsWithoutBreak = 0; break; // When we see a non-space, increment the numCharsWithoutBreak. default: ++numCharsWithoutBreak; break; } } // In addition to adding <wbr>s, we still have to add the original characters. result.appendCodePoint(codePoint); } return result.toString(); }
From source file:Main.java
/** * Anything other than letter and numbers are considered delimiters. Remove start and end * delimiters since they are not relevant to search. * * @param query The query string to clean. * @return The cleaned query. Empty string if all characters are cleaned out. *///from w w w . jav a 2 s. c o m public static String cleanStartAndEndOfSearchQuery(String query) { int start = 0; while (start < query.length()) { int codePoint = query.codePointAt(start); if (Character.isLetterOrDigit(codePoint)) { break; } start += Character.charCount(codePoint); } if (start == query.length()) { // All characters are delimiters. return ""; } int end = query.length() - 1; while (end > -1) { if (Character.isLowSurrogate(query.charAt(end))) { // Assume valid i18n string. There should be a matching high surrogate before it. end--; } int codePoint = query.codePointAt(end); if (Character.isLetterOrDigit(codePoint)) { break; } end--; } // end is a letter or digit. return query.substring(start, end + 1); }
From source file:Main.java
/** * This method ensures that the output String has only valid XML unicode characters as specified by the * XML 1.0 standard. For reference, please see the * standard. This method will return an empty String if the input is null or empty. * * @param s - The String whose non-valid characters we want to replace. * @return The in String, where non-valid characters are replace by spaces. * @author Nuno Freire// www. ja v a 2 s. com */ public static String removeInvalidXMLCharacters(String s) { StringBuilder out = new StringBuilder(); // Used to hold the output. int codePoint; // Used to reference the current character. int i = 0; while (i < s.length()) { codePoint = s.codePointAt(i); // This is the unicode code of the character. if ((codePoint == 0x9) || // Consider testing larger ranges first to improve speed. (codePoint == 0xA) || (codePoint == 0xD) || ((codePoint >= 0x20) && (codePoint <= 0xD7FF)) || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD)) || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) { out.append(Character.toChars(codePoint)); } else { out.append(' '); } i += Character.charCount(codePoint); // Increment with the number of code units(java chars) needed to represent a Unicode char. } return out.toString(); }
From source file:Main.java
/** * Converts a stream of plaintext into valid XML. Output stream must convert * stream to UTF-8 when saving to disk./*w w w .j ava 2 s .co m*/ */ public static String makeValidXML(String plaintext) { StringBuilder out = new StringBuilder(); String text = removeXMLInvalidChars(plaintext); for (int cp, i = 0; i < text.length(); i += Character.charCount(cp)) { cp = text.codePointAt(i); out.append(escapeXMLChars(cp)); } return out.toString(); }
From source file:Main.java
/** * Determines if the input character sequence <code>cs</code> is a NCName * (Non-Colon Name). An NCName is a string which starts with an NCName start * character and is followed by zero or more NCName characters. * //ww w . jav a 2 s . co m * Source: http://www.w3.org/TR/xml-names/#NT-NCName * * @param cs * The character sequence to test. * @return Returns <code>true</code> if the input character sequence is a * NCName or <code>false</code> otherwise. */ public static boolean isNCName(CharSequence cs) { if (isEmpty(cs)) { return false; } int firstChar = Character.codePointAt(cs, 0); if (!isNCNameStartChar(firstChar)) { return false; } for (int i = Character.charCount(firstChar); i < cs.length();) { int c = Character.codePointAt(cs, i); if (!isNCNameChar(c)) { return false; } i += Character.charCount(c); } return true; }
From source file:Main.java
/*** This method ensures that the output String has only * * valid XML unicode characters as specified by the * * XML 1.0 standard. For reference, please see * * <a href="http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char">the * * standard</a>. This method will return an empty * * String if the input is null or empty. * * @param in The String whose non-valid characters we want to remove. * * @return The in String, stripped of non-valid characters. * *//*from w w w .j a v a 2 s . c om*/ public static String stripNonValidXMLCharacters(String s) { StringBuilder out = new StringBuilder(); // Used to hold the output. int codePoint; // Used to reference the current character. //String ss = "\ud801\udc00"; // This is actualy one unicode character, represented by two code units!!!. int i = 0; while (i < s.length()) { codePoint = s.codePointAt(i); // This is the unicode code of the character. if ((codePoint == 0x9) || // Consider testing larger ranges first to improve speed. (codePoint == 0xA) || (codePoint == 0xD) || ((codePoint >= 0x20) && (codePoint <= 0xD7FF)) || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD)) || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) { out.append(Character.toChars(codePoint)); } i += Character.charCount(codePoint); // Increment with the number of code units(java chars) needed to represent a Unicode char. } return out.toString(); }
From source file:org.marketcetera.util.test.UnicodeDataTest.java
private static void singleValid(String str, char[] chars, int[] ucps, byte[] nat, byte[] utf8, byte[] utf16be, byte[] utf16le, byte[] utf32be, byte[] utf32le) { assertArrayEquals(str.toCharArray(), chars); int i = 0;/*from ww w . j av a 2 s . c o m*/ int j = 0; while (i < str.length()) { int ucp = str.codePointAt(i); assertEquals("At code point position " + j, ucp, ucps[j++]); i += Character.charCount(ucp); } assertArrayEquals(str.getBytes(), nat); assertArrayEquals(str.getBytes(UTF8), utf8); assertArrayEquals(str.getBytes(UTF16BE), utf16be); assertArrayEquals(str.getBytes(UTF16LE), utf16le); assertArrayEquals(str.getBytes(UTF32BE), utf32be); assertArrayEquals(str.getBytes(UTF32LE), utf32le); }
From source file:Main.java
public static String removeXMLInvalidChars(String str) { StringBuilder sb = new StringBuilder(str.length()); for (int c, i = 0; i < str.length(); i += Character.charCount(c)) { c = str.codePointAt(i);// www .j a va 2 s . co m if (!isValidXMLChar(c)) { c = ' '; } sb.appendCodePoint(c); } return sb.toString(); }