Example usage for java.lang Character toCodePoint

Introduction

In this page you can find the example usage for java.lang Character toCodePoint.

Prototype

public static int toCodePoint(char high, char low)

Source Link

Document

Converts the specified surrogate pair to its supplementary code point value.

Usage

From source file:Main.java

public static void main(String[] args) {
    char ch1 = '\ud800', ch2 = '\udc00';

    int cp = Character.toCodePoint(ch1, ch2);

    String str = "Supplementary code point value is " + cp;

    System.out.println(str);/*from ww  w  .  jav  a2s .  c  om*/
}

From source file:Main.java

protected static int testEscape(char[] chars, char[] encodings) {
    int index = 0;
    int length = chars.length;
    while (index < length) {
        char c1 = chars[index++];
        if (Character.isHighSurrogate(c1)) {
            if (index < length) {
                char c2 = chars[index++];
                if (Character.isLowSurrogate(c2)) {
                    int cp = Character.toCodePoint(c1, c2);
                    if (isValidCodePoint(cp)) {
                        continue;
                    }//from w  w w . j  av  a2s.  c  om
                }
                return index - 2;
            }
            return index - 1;
        } else {
            if (isValidCodePoint(c1)) {
                if (encodings != null) {
                    for (char ch : encodings) {
                        if (c1 == ch) {
                            return index - 1;
                        }
                    }
                }
                continue;
            }
            return index - 1;
        }
    }
    return length;
}

From source file:Strings.java

/**
 * Returns {@code true} if the specified character sequence is a
 * valid sequence of UTF-16 {@code char} values.  A sequence is
 * legal if each high surrogate {@code char} value is followed by
 * a low surrogate value (as defined by {@link
 * Character#isHighSurrogate(char)} and {@link
 * Character#isLowSurrogate(char)}).//w  w w . ja v a  2s  .c om
 *
 * <p>This method does <b>not</b> check to see if the sequence of
 * code points defined by the UTF-16 consists only of code points
 * defined in the latest Unicode standard.  The method only tests
 * the validity of the UTF-16 encoding sequence.
 * 
 * @param cs Character sequence to test.
 * @return {@code true} if the sequence of characters is
 * legal in UTF-16.
 */
public static boolean isLegalUtf16(CharSequence cs) {
    for (int i = 0; i < cs.length(); ++i) {
        char high = cs.charAt(i);
        if (Character.isLowSurrogate(high))
            return false;
        if (!Character.isHighSurrogate(high))
            continue;
        ++i;
        if (i >= cs.length())
            return false;
        char low = cs.charAt(i);
        if (!Character.isLowSurrogate(low))
            return false;
        int codePoint = Character.toCodePoint(high, low);
        if (!Character.isValidCodePoint(codePoint))
            return false;
    }
    return true;
}

From source file:de.fau.cs.osr.utils.StringUtils.java

public static String escHtml(String text, boolean forAttribute) {
    // StringEscapeUtils.escapeHtml(in) does not escape '\'' but a lot of 
    // other stuff that doesn't need escaping.

    if (text == null)
        return "";

    int n = text.length();
    StringBuilder sb = new StringBuilder(n * 4 / 3);
    for (int i = 0; i < n; i++) {
        char ch = text.charAt(i);
        switch (ch) {
        case ' ':
        case '\n':
        case '\t':
            sb.append(ch);/*  ww  w .j  ava2s . c o m*/
            break;
        case '<':
            sb.append("&lt;");
            break;
        case '>':
            sb.append(forAttribute ? "&gt;" : ">");
            break;
        case '&':
            sb.append("&amp;");
            break;
        case '\'':
            // &apos; cannot safely be used, see wikipedia
            sb.append("&#39;");
            break;
        case '"':
            sb.append(forAttribute ? "&quot;" : "\"");
            break;
        default:
            if ((ch >= 0 && ch < 0x20) || (ch == 0xFE)) {
                hexCharRef(sb, ch);
                break;
            } else if (Character.isHighSurrogate(ch)) {
                ++i;
                if (i < n) {
                    char ch2 = text.charAt(i);
                    if (Character.isLowSurrogate(ch2)) {
                        int codePoint = Character.toCodePoint(ch, ch2);
                        switch (Character.getType(codePoint)) {
                        case Character.CONTROL:
                        case Character.PRIVATE_USE:
                        case Character.UNASSIGNED:
                            hexCharRef(sb, codePoint);
                            break;

                        default:
                            sb.append(ch);
                            sb.append(ch2);
                            break;
                        }

                        continue;
                    }
                }
            } else if (!Character.isLowSurrogate(ch)) {
                sb.append(ch);
                continue;
            }

            // No low surrogate followed or only low surrogate
            throw new IllegalArgumentException("String contains isolated surrogates!");
        }
    }

    return sb.toString();
}

From source file:mobisocial.musubi.ui.util.EmojiSpannableFactory.java

public void updateSpannable(Spannable span) {
    Spannable source = span;/*from  w w  w.  j av  a 2  s .  c  o  m*/
    for (int i = 0; i < source.length(); i++) {
        char high = source.charAt(i);
        if (high <= 127) {
            // fast exit ascii
            continue;
        }

        // Block until we're initialized
        waitForEmoji();

        long codePoint = high;
        if (Character.isHighSurrogate(high)) {
            char low = source.charAt(++i);
            codePoint = Character.toCodePoint(high, low);
            if (Character.isSurrogatePair(high, low)) {
                // from BMP
                if (!mEmojiMap.containsKey(codePoint)) {
                    if (i >= source.length() - 2) {
                        continue;
                    }
                    high = source.charAt(++i);
                    if (!Character.isHighSurrogate(high)) {
                        Log.w(TAG, "bad unicode character? " + high);
                        continue;
                    }
                    low = source.charAt(++i);
                    if (!Character.isSurrogatePair(high, low)) {
                        Log.d(TAG, "Bogus unicode surrogate " + high + ", " + low);
                        continue;
                    }
                    int codePoint2 = Character.toCodePoint(high, low);
                    //String label = String.format("U+%X U+%X", codePoint, codePoint2);
                    codePoint = ((long) codePoint << 16) | codePoint2;
                }
            } else {
                Log.d(TAG, "Bogus unicode");
            }
        }

        if (mEmojiMap.containsKey(codePoint)) {
            Bitmap b = mStickerCache.get(codePoint);
            if (b != null) {
                DynamicDrawableSpan im = createStickerSpan(b);
                span.setSpan(im, i, i + 1, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
            } else {
                Log.d(TAG, "failed to decode bitmap for codepoints: " + codePoint);
            }
        }
    }
}

From source file:net.sourceforge.pmd.util.StringUtil.java

/**
 * @param buf/*from   w  w w .  j a  v  a 2 s . c  om*/
 * @param src
 * @param supportUTF8 override the default setting, whether special characters should be replaced with entities (
 *                    <code>false</code>) or should be included as is ( <code>true</code>).
 *
 */
public static void appendXmlEscaped(StringBuilder buf, String src, boolean supportUTF8) {
    char c;
    int i = 0;
    while (i < src.length()) {
        c = src.charAt(i++);
        if (c > '~') {
            // 126
            if (!supportUTF8) {
                int codepoint = c;
                // surrogate characters are not allowed in XML
                if (Character.isHighSurrogate(c)) {
                    char low = src.charAt(i++);
                    codepoint = Character.toCodePoint(c, low);
                }
                buf.append("&#x").append(Integer.toHexString(codepoint)).append(';');
            } else {
                buf.append(c);
            }
        } else if (c == '&') {
            buf.append("&amp;");
        } else if (c == '"') {
            buf.append("&quot;");
        } else if (c == '<') {
            buf.append("&lt;");
        } else if (c == '>') {
            buf.append("&gt;");
        } else {
            buf.append(c);
        }
    }
}

From source file:se.sawano.java.security.otp.google.keyuri.UnicodeEscaper.java

/**
 * Returns the Unicode code point of the character at the given index.
 *
 * <p>Unlike {@link Character#codePointAt(CharSequence, int)} or {@link String#codePointAt(int)}
 * this method will never fail silently when encountering an invalid surrogate pair.
 *
 * <p>The behaviour of this method is as follows:
 * <ol>// ww  w.  j  a va  2s.  c om
 * <li>If {@code index >= end}, {@link IndexOutOfBoundsException} is thrown.
 * <li><b>If the character at the specified index is not a surrogate, it is returned.</b>
 * <li>If the first character was a high surrogate value, then an attempt is made to read the next
 * character.
 * <ol>
 * <li><b>If the end of the sequence was reached, the negated value of the trailing high
 * surrogate is returned.</b>
 * <li><b>If the next character was a valid low surrogate, the code point value of the
 * high/low surrogate pair is returned.</b>
 * <li>If the next character was not a low surrogate value, then {@link
 * IllegalArgumentException} is thrown.
 * </ol>
 * <li>If the first character was a low surrogate value, {@link IllegalArgumentException} is
 * thrown.
 * </ol>
 *
 * @param seq
 *         the sequence of characters from which to decode the code point
 * @param index
 *         the index of the first character to decode
 * @param end
 *         the index beyond the last valid character to decode
 *
 * @return the Unicode code point for the given index or the negated value of the trailing high surrogate character at the end of the sequence
 */
protected static int codePointAt(CharSequence seq, int index, int end) {
    notNull(seq);
    if (index < end) {
        char c1 = seq.charAt(index++);
        if (c1 < Character.MIN_HIGH_SURROGATE || c1 > Character.MAX_LOW_SURROGATE) {
            // Fast path (first test is probably all we need to do)
            return c1;
        } else if (c1 <= Character.MAX_HIGH_SURROGATE) {
            // If the high surrogate was the last character, return its inverse
            if (index == end) {
                return -c1;
            }
            // Otherwise look for the low surrogate following it
            char c2 = seq.charAt(index);
            if (Character.isLowSurrogate(c2)) {
                return Character.toCodePoint(c1, c2);
            }
            throw new IllegalArgumentException("Expected low surrogate but got char '" + c2 + "' with value "
                    + (int) c2 + " at index " + index + " in '" + seq + "'");
        } else {
            throw new IllegalArgumentException("Unexpected low surrogate character '" + c1 + "' with value "
                    + (int) c1 + " at index " + (index - 1) + " in '" + seq + "'");
        }
    }
    throw new IndexOutOfBoundsException("Index exceeds specified range");
}

From source file:org.eclipse.rdf4j.rio.ntriples.NTriplesParser.java

/**
 * Reads the next Unicode code point.//from   w ww .j  a  va 2s . co  m
 * 
 * @return the next Unicode code point, or -1 if the end of the stream has been reached.
 * @throws IOException
 */
protected int readCodePoint() throws IOException {
    int next = reader.read();
    if (Character.isHighSurrogate((char) next)) {
        next = Character.toCodePoint((char) next, (char) reader.read());
    }
    return next;
}

From source file:org.diorite.config.serialization.snakeyaml.emitter.Emitter.java

private void writeDoubleQuoted(String text, boolean split) throws IOException {
    this.writeIndicator("\"", true, false, false);
    int start = 0;
    int end = 0;//from  w w w . j av a2  s  . c  om
    while (end <= text.length()) {
        Character ch = null;
        if (end < text.length()) {
            ch = text.charAt(end);
        }
        if ((ch == null) || ("\"\\\u0085\u2028\u2029\uFEFF".indexOf(ch) != -1)
                || !(('\u0020' <= ch) && (ch <= '\u007E'))) {
            if (start < end) {
                int len = end - start;
                this.column += len;
                this.stream.write(text, start, len);
                start = end;
            }
            if (ch != null) {
                String data;
                if (ESCAPE_REPLACEMENTS.containsKey(ch)) {
                    data = "\\" + ESCAPE_REPLACEMENTS.get(ch);
                } else if (!this.allowUnicode || !StreamReader.isPrintable(ch)) {
                    // if !allowUnicode or the character is not printable,
                    // we must encode it
                    if (ch <= '\u00FF') {
                        String s = "0" + Integer.toString(ch, HEX_RADIX);
                        data = "\\x" + s.substring(s.length() - 2);
                    } else if ((ch >= '\uD800') && (ch <= '\uDBFF')) {
                        if ((end + 1) < text.length()) {
                            Character ch2 = text.charAt(++end);
                            String s = "000" + Long.toHexString(Character.toCodePoint(ch, ch2));
                            data = "\\U" + s.substring(s.length() - 8);
                        } else {
                            String s = "000" + Integer.toString(ch, HEX_RADIX);
                            data = "\\u" + s.substring(s.length() - 4);
                        }
                    } else {
                        String s = "000" + Integer.toString(ch, HEX_RADIX);
                        data = "\\u" + s.substring(s.length() - 4);
                    }
                } else {
                    data = String.valueOf(ch);
                }
                this.column += data.length();
                this.stream.write(data);
                start = end + 1;
            }
        }
        if (((0 < end) && (end < (text.length() - 1))) && ((Objects.equals(ch, ' ')) || (start >= end))
                && ((this.column + (end - start)) > this.bestWidth) && split) {
            String data;
            if (start >= end) {
                data = "\\";
            } else {
                data = text.substring(start, end) + "\\";
            }
            if (start < end) {
                start = end;
            }
            this.column += data.length();
            this.stream.write(data);
            this.writeIndent();
            this.whitespace = false;
            this.indention = false;
            if (text.charAt(start) == ' ') {
                data = "\\";
                this.column += data.length();
                this.stream.write(data);
            }
        }
        end += 1;
    }
    this.writeIndicator("\"", false, false, false);
}