Example usage for java.lang Character isHighSurrogate

Introduction

In this page you can find the example usage for java.lang Character isHighSurrogate.

Prototype

public static boolean isHighSurrogate(char ch)

Source Link

Document

Determines if the given char value is a <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> Unicode high-surrogate code unit</a> (also known as <i>leading-surrogate code unit</i>).

Usage

From source file:mobisocial.musubi.ui.util.EmojiSpannableFactory.java

public void updateSpannable(Spannable span) {
    Spannable source = span;//w w w.  j a  va 2  s. co  m
    for (int i = 0; i < source.length(); i++) {
        char high = source.charAt(i);
        if (high <= 127) {
            // fast exit ascii
            continue;
        }

        // Block until we're initialized
        waitForEmoji();

        long codePoint = high;
        if (Character.isHighSurrogate(high)) {
            char low = source.charAt(++i);
            codePoint = Character.toCodePoint(high, low);
            if (Character.isSurrogatePair(high, low)) {
                // from BMP
                if (!mEmojiMap.containsKey(codePoint)) {
                    if (i >= source.length() - 2) {
                        continue;
                    }
                    high = source.charAt(++i);
                    if (!Character.isHighSurrogate(high)) {
                        Log.w(TAG, "bad unicode character? " + high);
                        continue;
                    }
                    low = source.charAt(++i);
                    if (!Character.isSurrogatePair(high, low)) {
                        Log.d(TAG, "Bogus unicode surrogate " + high + ", " + low);
                        continue;
                    }
                    int codePoint2 = Character.toCodePoint(high, low);
                    //String label = String.format("U+%X U+%X", codePoint, codePoint2);
                    codePoint = ((long) codePoint << 16) | codePoint2;
                }
            } else {
                Log.d(TAG, "Bogus unicode");
            }
        }

        if (mEmojiMap.containsKey(codePoint)) {
            Bitmap b = mStickerCache.get(codePoint);
            if (b != null) {
                DynamicDrawableSpan im = createStickerSpan(b);
                span.setSpan(im, i, i + 1, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
            } else {
                Log.d(TAG, "failed to decode bitmap for codepoints: " + codePoint);
            }
        }
    }
}

From source file:Main.java

/**
 * Check if the passed character is valid for XML content. Works for XML 1.0
 * and XML 1.1.<br>//from   www.  j a  v a  2  s .  c  om
 * Note: makes no difference between the runtime JAXP solution and the
 * explicit Xerces version
 *
 * @param c
 *        The character to be checked.
 * @return <code>true</code> if the character is valid in XML,
 *         <code>false</code> otherwise.
 */
public static boolean isInvalidXMLCharacter(final char c) {
    // Based on: http://www.w3.org/TR/2006/REC-xml11-20060816/#charsets

    // Speed up by separating the most common use cases first
    if (c < 256) {
        // Character <= 0x00ff - use precomposed table
        return ILLEGAL_XML_CHARS[c];
    }

    // Character >= 0x0100
    // For completeness, the Unicode line separator character, #x2028, is
    // also supported.
    // Surrogate blocks (no Java IDs found)
    // High surrogate: 0xd800-0xdbff
    // Low surrogate: 0xdc00-0xdfff
    return c == '\u2028' || (c >= '\ufdd0' && c <= '\ufddf') || c == '\ufffe' || c == '\uffff'
            || Character.isHighSurrogate(c) || Character.isLowSurrogate(c);
}

From source file:org.apache.kylin.common.util.StringUtil.java

public static int utf8Length(CharSequence sequence) {
    int count = 0;
    for (int i = 0, len = sequence.length(); i < len; i++) {
        char ch = sequence.charAt(i);
        if (ch <= 0x7F) {
            count++;/*from  w w  w  .j  a  v a  2 s .c  o m*/
        } else if (ch <= 0x7FF) {
            count += 2;
        } else if (Character.isHighSurrogate(ch)) {
            count += 4;
            ++i;
        } else {
            count += 3;
        }
    }
    return count;
}

From source file:net.sourceforge.pmd.util.StringUtil.java

/**
 * @param buf/*  w  w  w  .  ja va  2 s .c  o  m*/
 * @param src
 * @param supportUTF8 override the default setting, whether special characters should be replaced with entities (
 *                    <code>false</code>) or should be included as is ( <code>true</code>).
 *
 */
public static void appendXmlEscaped(StringBuilder buf, String src, boolean supportUTF8) {
    char c;
    int i = 0;
    while (i < src.length()) {
        c = src.charAt(i++);
        if (c > '~') {
            // 126
            if (!supportUTF8) {
                int codepoint = c;
                // surrogate characters are not allowed in XML
                if (Character.isHighSurrogate(c)) {
                    char low = src.charAt(i++);
                    codepoint = Character.toCodePoint(c, low);
                }
                buf.append("&#x").append(Integer.toHexString(codepoint)).append(';');
            } else {
                buf.append(c);
            }
        } else if (c == '&') {
            buf.append("&amp;");
        } else if (c == '"') {
            buf.append("&quot;");
        } else if (c == '<') {
            buf.append("&lt;");
        } else if (c == '>') {
            buf.append("&gt;");
        } else {
            buf.append(c);
        }
    }
}

From source file:com.ikon.util.FormatUtil.java

/**
 * Trim Unicode surrogate characters//from  www.  j  a v a  2 s .c o  m
 * 
 * http://en.wikipedia.org/wiki/Mapping_of_Unicode_characters#Surrogates
 */
public static String trimUnicodeSurrogates(String text) {
    StringBuilder sb = new StringBuilder();

    for (int i = 0; i < text.length(); i++) {
        char ch = text.charAt(i);

        if (!Character.isHighSurrogate(ch) && !Character.isLowSurrogate(ch)) {
            sb.append(ch);
        }
    }

    return sb.toString();
}

From source file:CodePointInputMethod.java

private void waitDigit2(char c) {
    if (Character.digit(c, 16) != -1) {
        buffer.insert(insertionPoint++, c);
        char codePoint = (char) getCodePoint(buffer, 2, 5);
        if (Character.isHighSurrogate(codePoint)) {
            format = SURROGATE_PAIR;/*from   ww w .j a  va  2 s . c  o  m*/
            buffer.append("\\u");
            insertionPoint = 8;
        } else {
            format = ESCAPE;
        }
        sendComposedText();
    } else {
        beep();
    }
}

From source file:de.fau.cs.osr.utils.StringUtils.java

public static boolean hasIsolatedSurrogates(String text) {
    final int length = text.length();
    for (int i = 0; i < length; ++i) {
        char ch = text.charAt(i);
        if (Character.isHighSurrogate(ch)) {
            ++i;/*www . j a v a2 s  .co m*/
            if (i < length) {
                char ch2 = text.charAt(i);
                if (!Character.isLowSurrogate(ch2))
                    return true;
            } else
                return true;
        } else if (Character.isLowSurrogate(ch))
            return true;
    }
    return false;
}

From source file:com.anysoftkeyboard.keyboards.views.AnyKeyboardViewBase.java

private static boolean isLabelOfPictographic(CharSequence label) {
    if (label.length() == 0)
        return false;
    final char hs = label.charAt(0);

    if (0xd800 <= hs && hs <= 0xdbff) {
        return true;
    } else if (Character.isHighSurrogate(hs)) {
        return true;
    }/*from   w w w.j  a v a  2s.  com*/
    return false;
}

From source file:org.bimserver.ifc.step.serializer.IfcStepSerializer.java

private void writePrimitive(Object val) throws SerializerException, IOException {
    if (val.getClass().getSimpleName().equals("Tristate")) {
        if (val.toString().equals("TRUE")) {
            print(BOOLEAN_TRUE);/*from   w  ww  .  j a  v a  2s .  c om*/
        } else if (val.toString().equals("FALSE")) {
            print(BOOLEAN_FALSE);
        } else if (val.toString().equals("UNDEFINED")) {
            print(BOOLEAN_UNDEFINED);
        }
    } else if (val instanceof Double) {
        if (((Double) val).isInfinite() || (((Double) val).isNaN())) {
            LOGGER.info("Serializing infinite or NaN double as 0.0");
            print("0.0");
        } else {
            String string = val.toString();
            if (string.endsWith(DOT_0)) {
                print(string.substring(0, string.length() - 1));
            } else {
                print(string);
            }
        }
    } else if (val instanceof Boolean) {
        Boolean bool = (Boolean) val;
        if (bool) {
            print(BOOLEAN_TRUE);
        } else {
            print(BOOLEAN_FALSE);
        }
    } else if (val instanceof String) {
        print(SINGLE_QUOTE);
        String stringVal = (String) val;
        for (int i = 0; i < stringVal.length(); i++) {
            char c = stringVal.charAt(i);
            if (c == '\'') {
                print("\'\'");
            } else if (c == '\\') {
                print("\\\\");
            } else if (c >= 32 && c <= 126) {
                // ISO 8859-1
                print("" + c);
            } else if (c < 255) {
                //  ISO 10646 and ISO 8859-1 are the same < 255 , using ISO_8859_1
                print("\\X\\" + new String(Hex.encodeHex(
                        Charsets.ISO_8859_1.encode(CharBuffer.wrap(new char[] { (char) c })).array()))
                                .toUpperCase());
            } else {
                if (useIso8859_1) {
                    // ISO 8859-1 with -128 offset
                    ByteBuffer encode = Charsets.ISO_8859_1.encode(new String(new char[] { (char) (c - 128) }));
                    print("\\S\\" + (char) encode.get());
                } else {
                    // The following code has not been tested (2012-04-25)
                    // Use UCS-2 or UCS-4

                    // TODO when multiple sequential characters should be encoded in UCS-2 or UCS-4, we don't really need to add all those \X0\ \X2\ and \X4\ chars
                    if (Character.isLowSurrogate(c)) {
                        throw new SerializerException("Unexpected low surrogate range char");
                    } else if (Character.isHighSurrogate(c)) {
                        // We need UCS-4, this is probably never happening
                        if (i + 1 < stringVal.length()) {
                            char low = stringVal.charAt(i + 1);
                            if (!Character.isLowSurrogate(low)) {
                                throw new SerializerException(
                                        "High surrogate char should be followed by char in low surrogate range");
                            }
                            try {
                                print("\\X4\\" + new String(Hex.encodeHex(Charset.forName("UTF-32")
                                        .encode(new String(new char[] { c, low })).array())).toUpperCase()
                                        + "\\X0\\");
                            } catch (UnsupportedCharsetException e) {
                                throw new SerializerException(e);
                            }
                            i++;
                        } else {
                            throw new SerializerException(
                                    "High surrogate char should be followed by char in low surrogate range, but end of string reached");
                        }
                    } else {
                        // UCS-2 will do
                        print("\\X2\\" + new String(Hex
                                .encodeHex(Charsets.UTF_16BE.encode(CharBuffer.wrap(new char[] { c })).array()))
                                        .toUpperCase()
                                + "\\X0\\");
                    }
                }
            }
        }
        print(SINGLE_QUOTE);
    } else if (val instanceof Enumerator) {
        print("." + val + ".");
    } else {
        print(val == null ? "$" : val.toString());
    }
}

From source file:CodePointInputMethod.java

private void finishComposition() {
    int len = buffer.length();
    if (len == 6 && format != SPECIAL_ESCAPE) {
        char codePoint = (char) getCodePoint(buffer, 2, 5);
        if (Character.isValidCodePoint(codePoint) && codePoint != 0xFFFF) {
            buffer.setLength(0);//from   ww w.  j a  v a2  s  .c  o  m
            buffer.append(codePoint);
            sendCommittedText();
            return;
        }
    } else if (len == 8 && format == SPECIAL_ESCAPE) {
        int codePoint = getCodePoint(buffer, 2, 7);
        if (Character.isValidCodePoint(codePoint) && codePoint != 0xFFFF) {
            buffer.setLength(0);
            buffer.appendCodePoint(codePoint);
            sendCommittedText();
            return;
        }
    } else if (len == 12 && format == SURROGATE_PAIR) {
        char[] codePoint = { (char) getCodePoint(buffer, 2, 5), (char) getCodePoint(buffer, 8, 11) };
        if (Character.isHighSurrogate(codePoint[0]) && Character.isLowSurrogate(codePoint[1])) {
            buffer.setLength(0);
            buffer.append(codePoint);
            sendCommittedText();
            return;
        }
    }

    beep();
}