List of usage examples for java.lang Character isHighSurrogate
public static boolean isHighSurrogate(char ch)
From source file:mobisocial.musubi.ui.util.EmojiSpannableFactory.java
public void updateSpannable(Spannable span) { Spannable source = span;//w w w. j a va 2 s. co m for (int i = 0; i < source.length(); i++) { char high = source.charAt(i); if (high <= 127) { // fast exit ascii continue; } // Block until we're initialized waitForEmoji(); long codePoint = high; if (Character.isHighSurrogate(high)) { char low = source.charAt(++i); codePoint = Character.toCodePoint(high, low); if (Character.isSurrogatePair(high, low)) { // from BMP if (!mEmojiMap.containsKey(codePoint)) { if (i >= source.length() - 2) { continue; } high = source.charAt(++i); if (!Character.isHighSurrogate(high)) { Log.w(TAG, "bad unicode character? " + high); continue; } low = source.charAt(++i); if (!Character.isSurrogatePair(high, low)) { Log.d(TAG, "Bogus unicode surrogate " + high + ", " + low); continue; } int codePoint2 = Character.toCodePoint(high, low); //String label = String.format("U+%X U+%X", codePoint, codePoint2); codePoint = ((long) codePoint << 16) | codePoint2; } } else { Log.d(TAG, "Bogus unicode"); } } if (mEmojiMap.containsKey(codePoint)) { Bitmap b = mStickerCache.get(codePoint); if (b != null) { DynamicDrawableSpan im = createStickerSpan(b); span.setSpan(im, i, i + 1, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE); } else { Log.d(TAG, "failed to decode bitmap for codepoints: " + codePoint); } } } }
From source file:Main.java
/** * Check if the passed character is valid for XML content. Works for XML 1.0 * and XML 1.1.<br>//from www. j a v a 2 s . c om * Note: makes no difference between the runtime JAXP solution and the * explicit Xerces version * * @param c * The character to be checked. * @return <code>true</code> if the character is valid in XML, * <code>false</code> otherwise. */ public static boolean isInvalidXMLCharacter(final char c) { // Based on: http://www.w3.org/TR/2006/REC-xml11-20060816/#charsets // Speed up by separating the most common use cases first if (c < 256) { // Character <= 0x00ff - use precomposed table return ILLEGAL_XML_CHARS[c]; } // Character >= 0x0100 // For completeness, the Unicode line separator character, #x2028, is // also supported. // Surrogate blocks (no Java IDs found) // High surrogate: 0xd800-0xdbff // Low surrogate: 0xdc00-0xdfff return c == '\u2028' || (c >= '\ufdd0' && c <= '\ufddf') || c == '\ufffe' || c == '\uffff' || Character.isHighSurrogate(c) || Character.isLowSurrogate(c); }
From source file:org.apache.kylin.common.util.StringUtil.java
public static int utf8Length(CharSequence sequence) { int count = 0; for (int i = 0, len = sequence.length(); i < len; i++) { char ch = sequence.charAt(i); if (ch <= 0x7F) { count++;/*from w w w .j a v a 2 s .c o m*/ } else if (ch <= 0x7FF) { count += 2; } else if (Character.isHighSurrogate(ch)) { count += 4; ++i; } else { count += 3; } } return count; }
From source file:net.sourceforge.pmd.util.StringUtil.java
/** * @param buf/* w w w . ja va 2 s .c o m*/ * @param src * @param supportUTF8 override the default setting, whether special characters should be replaced with entities ( * <code>false</code>) or should be included as is ( <code>true</code>). * */ public static void appendXmlEscaped(StringBuilder buf, String src, boolean supportUTF8) { char c; int i = 0; while (i < src.length()) { c = src.charAt(i++); if (c > '~') { // 126 if (!supportUTF8) { int codepoint = c; // surrogate characters are not allowed in XML if (Character.isHighSurrogate(c)) { char low = src.charAt(i++); codepoint = Character.toCodePoint(c, low); } buf.append("&#x").append(Integer.toHexString(codepoint)).append(';'); } else { buf.append(c); } } else if (c == '&') { buf.append("&"); } else if (c == '"') { buf.append("""); } else if (c == '<') { buf.append("<"); } else if (c == '>') { buf.append(">"); } else { buf.append(c); } } }
From source file:com.ikon.util.FormatUtil.java
/** * Trim Unicode surrogate characters//from www. j a v a 2 s .c o m * * http://en.wikipedia.org/wiki/Mapping_of_Unicode_characters#Surrogates */ public static String trimUnicodeSurrogates(String text) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < text.length(); i++) { char ch = text.charAt(i); if (!Character.isHighSurrogate(ch) && !Character.isLowSurrogate(ch)) { sb.append(ch); } } return sb.toString(); }
From source file:CodePointInputMethod.java
private void waitDigit2(char c) { if (Character.digit(c, 16) != -1) { buffer.insert(insertionPoint++, c); char codePoint = (char) getCodePoint(buffer, 2, 5); if (Character.isHighSurrogate(codePoint)) { format = SURROGATE_PAIR;/*from ww w .j a va 2 s . c o m*/ buffer.append("\\u"); insertionPoint = 8; } else { format = ESCAPE; } sendComposedText(); } else { beep(); } }
From source file:de.fau.cs.osr.utils.StringUtils.java
public static boolean hasIsolatedSurrogates(String text) { final int length = text.length(); for (int i = 0; i < length; ++i) { char ch = text.charAt(i); if (Character.isHighSurrogate(ch)) { ++i;/*www . j a v a2 s .co m*/ if (i < length) { char ch2 = text.charAt(i); if (!Character.isLowSurrogate(ch2)) return true; } else return true; } else if (Character.isLowSurrogate(ch)) return true; } return false; }
From source file:com.anysoftkeyboard.keyboards.views.AnyKeyboardViewBase.java
private static boolean isLabelOfPictographic(CharSequence label) { if (label.length() == 0) return false; final char hs = label.charAt(0); if (0xd800 <= hs && hs <= 0xdbff) { return true; } else if (Character.isHighSurrogate(hs)) { return true; }/*from w w w.j a v a 2s. com*/ return false; }
From source file:org.bimserver.ifc.step.serializer.IfcStepSerializer.java
private void writePrimitive(Object val) throws SerializerException, IOException { if (val.getClass().getSimpleName().equals("Tristate")) { if (val.toString().equals("TRUE")) { print(BOOLEAN_TRUE);/*from w ww . j a v a 2s . c om*/ } else if (val.toString().equals("FALSE")) { print(BOOLEAN_FALSE); } else if (val.toString().equals("UNDEFINED")) { print(BOOLEAN_UNDEFINED); } } else if (val instanceof Double) { if (((Double) val).isInfinite() || (((Double) val).isNaN())) { LOGGER.info("Serializing infinite or NaN double as 0.0"); print("0.0"); } else { String string = val.toString(); if (string.endsWith(DOT_0)) { print(string.substring(0, string.length() - 1)); } else { print(string); } } } else if (val instanceof Boolean) { Boolean bool = (Boolean) val; if (bool) { print(BOOLEAN_TRUE); } else { print(BOOLEAN_FALSE); } } else if (val instanceof String) { print(SINGLE_QUOTE); String stringVal = (String) val; for (int i = 0; i < stringVal.length(); i++) { char c = stringVal.charAt(i); if (c == '\'') { print("\'\'"); } else if (c == '\\') { print("\\\\"); } else if (c >= 32 && c <= 126) { // ISO 8859-1 print("" + c); } else if (c < 255) { // ISO 10646 and ISO 8859-1 are the same < 255 , using ISO_8859_1 print("\\X\\" + new String(Hex.encodeHex( Charsets.ISO_8859_1.encode(CharBuffer.wrap(new char[] { (char) c })).array())) .toUpperCase()); } else { if (useIso8859_1) { // ISO 8859-1 with -128 offset ByteBuffer encode = Charsets.ISO_8859_1.encode(new String(new char[] { (char) (c - 128) })); print("\\S\\" + (char) encode.get()); } else { // The following code has not been tested (2012-04-25) // Use UCS-2 or UCS-4 // TODO when multiple sequential characters should be encoded in UCS-2 or UCS-4, we don't really need to add all those \X0\ \X2\ and \X4\ chars if (Character.isLowSurrogate(c)) { throw new SerializerException("Unexpected low surrogate range char"); } else if (Character.isHighSurrogate(c)) { // We need UCS-4, this is probably never happening if (i + 1 < stringVal.length()) { char low = stringVal.charAt(i + 1); if (!Character.isLowSurrogate(low)) { throw new SerializerException( "High surrogate char should be followed by char in low surrogate range"); } try { print("\\X4\\" + new String(Hex.encodeHex(Charset.forName("UTF-32") .encode(new String(new char[] { c, low })).array())).toUpperCase() + "\\X0\\"); } catch (UnsupportedCharsetException e) { throw new SerializerException(e); } i++; } else { throw new SerializerException( "High surrogate char should be followed by char in low surrogate range, but end of string reached"); } } else { // UCS-2 will do print("\\X2\\" + new String(Hex .encodeHex(Charsets.UTF_16BE.encode(CharBuffer.wrap(new char[] { c })).array())) .toUpperCase() + "\\X0\\"); } } } } print(SINGLE_QUOTE); } else if (val instanceof Enumerator) { print("." + val + "."); } else { print(val == null ? "$" : val.toString()); } }
From source file:CodePointInputMethod.java
private void finishComposition() { int len = buffer.length(); if (len == 6 && format != SPECIAL_ESCAPE) { char codePoint = (char) getCodePoint(buffer, 2, 5); if (Character.isValidCodePoint(codePoint) && codePoint != 0xFFFF) { buffer.setLength(0);//from ww w. j a v a2 s .c o m buffer.append(codePoint); sendCommittedText(); return; } } else if (len == 8 && format == SPECIAL_ESCAPE) { int codePoint = getCodePoint(buffer, 2, 7); if (Character.isValidCodePoint(codePoint) && codePoint != 0xFFFF) { buffer.setLength(0); buffer.appendCodePoint(codePoint); sendCommittedText(); return; } } else if (len == 12 && format == SURROGATE_PAIR) { char[] codePoint = { (char) getCodePoint(buffer, 2, 5), (char) getCodePoint(buffer, 8, 11) }; if (Character.isHighSurrogate(codePoint[0]) && Character.isLowSurrogate(codePoint[1])) { buffer.setLength(0); buffer.append(codePoint); sendCommittedText(); return; } } beep(); }