List of usage examples for java.lang Character isLowSurrogate
public static boolean isLowSurrogate(char ch)
From source file:Main.java
public static void main(String[] args) { char ch1 = '\udc28', ch2 = 'a'; boolean b1 = Character.isLowSurrogate(ch1); boolean b2 = Character.isLowSurrogate(ch2); System.out.println(b1);//from w w w . j a v a 2 s .com System.out.println(b2); }
From source file:Main.java
/** * Replaces instances of Emoji unicode characters with their Emoji-Cheat sheet key * * @param s/* ww w . j av a 2 s .c o m*/ * @return */ public static String replaceUnicodeEmojis(String s) { if (TextUtils.isEmpty(s)) { return ""; } for (int i = 0; i < s.length(); i++) { String key = s.substring(i, i + 1); if ((Character.isLowSurrogate(key.charAt(0)) || Character.isHighSurrogate(key.charAt(0))) && s.length() > i + 1) { key = s.substring(i, i + 2); } String emoji = UNICODE_TO_CHEAT_SHEET.get(key); if (null != emoji) { s = s.replace(key, emoji); } } return s; }
From source file:Main.java
protected static int testEscape(char[] chars, char[] encodings) { int index = 0; int length = chars.length; while (index < length) { char c1 = chars[index++]; if (Character.isHighSurrogate(c1)) { if (index < length) { char c2 = chars[index++]; if (Character.isLowSurrogate(c2)) { int cp = Character.toCodePoint(c1, c2); if (isValidCodePoint(cp)) { continue; }/*from w ww. ja va 2 s . com*/ } return index - 2; } return index - 1; } else { if (isValidCodePoint(c1)) { if (encodings != null) { for (char ch : encodings) { if (c1 == ch) { return index - 1; } } } continue; } return index - 1; } } return length; }
From source file:Main.java
public static String $$truncate(String str, int maxLen, boolean doAddEllipsis) { if (str.length() <= maxLen) { return str; // no need to truncate }/* www.j a v a2 s . com*/ // If doAddEllipsis, either reduce maxLen to compensate, or else if maxLen is too small, just // turn off doAddEllipsis. if (doAddEllipsis) { if (maxLen > 3) { maxLen -= 3; } else { doAddEllipsis = false; } } // Make sure truncating at maxLen doesn't cut up a unicode surrogate pair. if (Character.isHighSurrogate(str.charAt(maxLen - 1)) && Character.isLowSurrogate(str.charAt(maxLen))) { maxLen -= 1; } // Truncate. str = str.substring(0, maxLen); // Add ellipsis. if (doAddEllipsis) { str += "..."; } return str; }
From source file:Main.java
/** * Gets the index of the longest NCName that is the suffix of a character * sequence.//from ww w .j a v a2 s . c om * * @param cs * The character sequence. * @return Returns the index of the longest suffix of the specified character * sequence <code>cs</code> that is an NCName, or -1 if the character * sequence <code>cs</code> does not have a suffix that is an NCName. */ public static int getNCNameSuffixIndex(CharSequence cs) { int index = -1; for (int i = cs.length() - 1; i > -1; i--) { if (!Character.isLowSurrogate(cs.charAt(i))) { int c = Character.codePointAt(cs, i); if (isNCNameStartChar(c)) { index = i; } if (!isNCNameChar(c)) { break; } } } return index; }
From source file:Main.java
/** * Anything other than letter and numbers are considered delimiters. Remove start and end * delimiters since they are not relevant to search. * * @param query The query string to clean. * @return The cleaned query. Empty string if all characters are cleaned out. */// ww w. j a v a 2 s. co m public static String cleanStartAndEndOfSearchQuery(String query) { int start = 0; while (start < query.length()) { int codePoint = query.codePointAt(start); if (Character.isLetterOrDigit(codePoint)) { break; } start += Character.charCount(codePoint); } if (start == query.length()) { // All characters are delimiters. return ""; } int end = query.length() - 1; while (end > -1) { if (Character.isLowSurrogate(query.charAt(end))) { // Assume valid i18n string. There should be a matching high surrogate before it. end--; } int codePoint = query.codePointAt(end); if (Character.isLetterOrDigit(codePoint)) { break; } end--; } // end is a letter or digit. return query.substring(start, end + 1); }
From source file:Main.java
/** * This method ensures that the output String has only valid XML unicode * characters as specified by the XML 1.0 standard. For reference, please * see/*from w ww . j ava 2 s . c o m*/ * <a href="http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char">the * standard</a>. This method will return an empty String if the input is * null or empty. * * @param in The String whose non-valid characters we want to remove. * @return The in String, stripped of non-valid characters. */ public static String cleanInvalidXmlChars(String text) { if (null == text || text.isEmpty()) { return text; } final int len = text.length(); char current = 0; int codePoint = 0; StringBuilder sb = new StringBuilder(); for (int i = 0; i < len; i++) { current = text.charAt(i); boolean surrogate = false; if (Character.isHighSurrogate(current) && i + 1 < len && Character.isLowSurrogate(text.charAt(i + 1))) { surrogate = true; codePoint = text.codePointAt(i++); } else { codePoint = current; } if ((codePoint == 0x9) || (codePoint == 0xA) || (codePoint == 0xD) || ((codePoint >= 0x20) && (codePoint <= 0xD7FF)) || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD)) || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) { sb.append(current); if (surrogate) { sb.append(text.charAt(i)); } } else { // // Invalid Char at index transformed into hex //System.err.println("Index=["+ i +"] Char=["+ String.format("%04x", (int)text.charAt(i)) +"] CodePoint=[" + codePoint + "]"); //sb.append("hex"+String.format("%04x", (int)text.charAt(i))); } } return sb.toString(); }
From source file:Strings.java
/** * Returns {@code true} if the specified character sequence is a * valid sequence of UTF-16 {@code char} values. A sequence is * legal if each high surrogate {@code char} value is followed by * a low surrogate value (as defined by {@link * Character#isHighSurrogate(char)} and {@link * Character#isLowSurrogate(char)})./*from ww w . j a va 2 s . co m*/ * * <p>This method does <b>not</b> check to see if the sequence of * code points defined by the UTF-16 consists only of code points * defined in the latest Unicode standard. The method only tests * the validity of the UTF-16 encoding sequence. * * @param cs Character sequence to test. * @return {@code true} if the sequence of characters is * legal in UTF-16. */ public static boolean isLegalUtf16(CharSequence cs) { for (int i = 0; i < cs.length(); ++i) { char high = cs.charAt(i); if (Character.isLowSurrogate(high)) return false; if (!Character.isHighSurrogate(high)) continue; ++i; if (i >= cs.length()) return false; char low = cs.charAt(i); if (!Character.isLowSurrogate(low)) return false; int codePoint = Character.toCodePoint(high, low); if (!Character.isValidCodePoint(codePoint)) return false; } return true; }
From source file:cherry.foundation.validator.CharTypeValidator.java
private int[] createAcceptable(String acceptable) { int[] result = new int[acceptable.codePointCount(0, acceptable.length())]; for (int i = 0, j = 0; i < acceptable.length(); i++) { if (Character.isLowSurrogate(acceptable.charAt(i))) { continue; }/*from w w w. j a va 2 s . co m*/ result[j++] = Character.codePointAt(acceptable, i); } return result; }
From source file:com.microsoft.windowsazure.mobileservices.zumoe2etestapp.framework.Util.java
public static String createSimpleRandomString(Random rndGen, int size, int minChar, int maxChar) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < size; i++) { int charRand; char c;//from w w w . j a va 2 s . co m do { charRand = rndGen.nextInt(maxChar - minChar); c = (char) (minChar + charRand); } while (Character.isLowSurrogate(c) || Character.isHighSurrogate(c)); sb.append(c); } return sb.toString(); }