Example usage for java.lang Character isHighSurrogate

Introduction

In this page you can find the example usage for java.lang Character isHighSurrogate.

Prototype

public static boolean isHighSurrogate(char ch)

Source Link

Document

Determines if the given char value is a <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> Unicode high-surrogate code unit</a> (also known as <i>leading-surrogate code unit</i>).

Usage

From source file:Main.java

public static void main(String[] args) {
    for (char ch = Character.MIN_VALUE; ch < Character.MAX_VALUE; ch++) {
        if (Character.isHighSurrogate(ch)) {
            String s = String.format("\\u%04x", (int) ch);
            System.out.println(s);
        }// w  w  w . ja v  a 2  s .  c  o  m
    }

}

From source file:Main.java

public static long bytesRequiredToEncode(final String s, final Charset encoding) {
    int ENCODE_CHUNK = 100;
    long count = 0;
    for (int i = 0; i < s.length();) {
        int end = i + ENCODE_CHUNK;
        if (end >= s.length()) {
            end = s.length();//  w ww.  j  a v  a2 s . c  om
        } else if (Character.isHighSurrogate(s.charAt(end))) {
            end++;
        }
        count += encoding.encode(s.substring(i, end)).remaining() + 1;
        i = end;
    }
    return count;
}

From source file:Main.java

/**
 * Replaces instances of Emoji unicode characters with their Emoji-Cheat sheet key
 *
 * @param s/*from w w w  . ja  v  a 2s. c  o  m*/
 * @return
 */
public static String replaceUnicodeEmojis(String s) {
    if (TextUtils.isEmpty(s)) {
        return "";
    }
    for (int i = 0; i < s.length(); i++) {
        String key = s.substring(i, i + 1);
        if ((Character.isLowSurrogate(key.charAt(0)) || Character.isHighSurrogate(key.charAt(0)))
                && s.length() > i + 1) {
            key = s.substring(i, i + 2);
        }
        String emoji = UNICODE_TO_CHEAT_SHEET.get(key);
        if (null != emoji) {
            s = s.replace(key, emoji);
        }
    }
    return s;
}

From source file:Main.java

protected static int testEscape(char[] chars, char[] encodings) {
    int index = 0;
    int length = chars.length;
    while (index < length) {
        char c1 = chars[index++];
        if (Character.isHighSurrogate(c1)) {
            if (index < length) {
                char c2 = chars[index++];
                if (Character.isLowSurrogate(c2)) {
                    int cp = Character.toCodePoint(c1, c2);
                    if (isValidCodePoint(cp)) {
                        continue;
                    }/*w  ww  . j av a  2 s.c  o  m*/
                }
                return index - 2;
            }
            return index - 1;
        } else {
            if (isValidCodePoint(c1)) {
                if (encodings != null) {
                    for (char ch : encodings) {
                        if (c1 == ch) {
                            return index - 1;
                        }
                    }
                }
                continue;
            }
            return index - 1;
        }
    }
    return length;
}

From source file:Main.java

public static String $$truncate(String str, int maxLen, boolean doAddEllipsis) {

    if (str.length() <= maxLen) {
        return str; // no need to truncate
    }/* w  ww.  j  av a2s.  c o  m*/

    // If doAddEllipsis, either reduce maxLen to compensate, or else if maxLen is too small, just
    // turn off doAddEllipsis.
    if (doAddEllipsis) {
        if (maxLen > 3) {
            maxLen -= 3;
        } else {
            doAddEllipsis = false;
        }
    }

    // Make sure truncating at maxLen doesn't cut up a unicode surrogate pair.
    if (Character.isHighSurrogate(str.charAt(maxLen - 1)) && Character.isLowSurrogate(str.charAt(maxLen))) {
        maxLen -= 1;
    }

    // Truncate.
    str = str.substring(0, maxLen);

    // Add ellipsis.
    if (doAddEllipsis) {
        str += "...";
    }

    return str;
}

From source file:Main.java

/**
 * This method ensures that the output String has only valid XML unicode
 * characters as specified by the XML 1.0 standard. For reference, please
 * see/*from w w w.  j  a  v  a 2s.  co m*/
 * <a href="http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char">the
 * standard</a>. This method will return an empty String if the input is
 * null or empty.
 *
 * @param in The String whose non-valid characters we want to remove.
 * @return The in String, stripped of non-valid characters.
 */
public static String cleanInvalidXmlChars(String text) {

    if (null == text || text.isEmpty()) {
        return text;
    }

    final int len = text.length();
    char current = 0;
    int codePoint = 0;
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < len; i++) {
        current = text.charAt(i);
        boolean surrogate = false;
        if (Character.isHighSurrogate(current) && i + 1 < len && Character.isLowSurrogate(text.charAt(i + 1))) {
            surrogate = true;
            codePoint = text.codePointAt(i++);
        } else {
            codePoint = current;
        }
        if ((codePoint == 0x9) || (codePoint == 0xA) || (codePoint == 0xD)
                || ((codePoint >= 0x20) && (codePoint <= 0xD7FF))
                || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD))
                || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) {
            sb.append(current);

            if (surrogate) {
                sb.append(text.charAt(i));
            }
        } else {

            // 
            // Invalid Char at index transformed into hex 
            //System.err.println("Index=["+ i +"] Char=["+ String.format("%04x", (int)text.charAt(i)) +"] CodePoint=[" + codePoint + "]");
            //sb.append("hex"+String.format("%04x", (int)text.charAt(i)));
        }
    }

    return sb.toString();
}

From source file:Strings.java

/**
 * Returns {@code true} if the specified character sequence is a
 * valid sequence of UTF-16 {@code char} values.  A sequence is
 * legal if each high surrogate {@code char} value is followed by
 * a low surrogate value (as defined by {@link
 * Character#isHighSurrogate(char)} and {@link
 * Character#isLowSurrogate(char)})./*from w ww . java 2 s .c  o  m*/
 *
 * <p>This method does <b>not</b> check to see if the sequence of
 * code points defined by the UTF-16 consists only of code points
 * defined in the latest Unicode standard.  The method only tests
 * the validity of the UTF-16 encoding sequence.
 * 
 * @param cs Character sequence to test.
 * @return {@code true} if the sequence of characters is
 * legal in UTF-16.
 */
public static boolean isLegalUtf16(CharSequence cs) {
    for (int i = 0; i < cs.length(); ++i) {
        char high = cs.charAt(i);
        if (Character.isLowSurrogate(high))
            return false;
        if (!Character.isHighSurrogate(high))
            continue;
        ++i;
        if (i >= cs.length())
            return false;
        char low = cs.charAt(i);
        if (!Character.isLowSurrogate(low))
            return false;
        int codePoint = Character.toCodePoint(high, low);
        if (!Character.isValidCodePoint(codePoint))
            return false;
    }
    return true;
}

From source file:de.fau.cs.osr.utils.StringUtils.java

public static String escHtml(String text, boolean forAttribute) {
    // StringEscapeUtils.escapeHtml(in) does not escape '\'' but a lot of 
    // other stuff that doesn't need escaping.

    if (text == null)
        return "";

    int n = text.length();
    StringBuilder sb = new StringBuilder(n * 4 / 3);
    for (int i = 0; i < n; i++) {
        char ch = text.charAt(i);
        switch (ch) {
        case ' ':
        case '\n':
        case '\t':
            sb.append(ch);// w  w w  . j  av  a 2 s  . c om
            break;
        case '<':
            sb.append("&lt;");
            break;
        case '>':
            sb.append(forAttribute ? "&gt;" : ">");
            break;
        case '&':
            sb.append("&amp;");
            break;
        case '\'':
            // &apos; cannot safely be used, see wikipedia
            sb.append("&#39;");
            break;
        case '"':
            sb.append(forAttribute ? "&quot;" : "\"");
            break;
        default:
            if ((ch >= 0 && ch < 0x20) || (ch == 0xFE)) {
                hexCharRef(sb, ch);
                break;
            } else if (Character.isHighSurrogate(ch)) {
                ++i;
                if (i < n) {
                    char ch2 = text.charAt(i);
                    if (Character.isLowSurrogate(ch2)) {
                        int codePoint = Character.toCodePoint(ch, ch2);
                        switch (Character.getType(codePoint)) {
                        case Character.CONTROL:
                        case Character.PRIVATE_USE:
                        case Character.UNASSIGNED:
                            hexCharRef(sb, codePoint);
                            break;

                        default:
                            sb.append(ch);
                            sb.append(ch2);
                            break;
                        }

                        continue;
                    }
                }
            } else if (!Character.isLowSurrogate(ch)) {
                sb.append(ch);
                continue;
            }

            // No low surrogate followed or only low surrogate
            throw new IllegalArgumentException("String contains isolated surrogates!");
        }
    }

    return sb.toString();
}

From source file:com.microsoft.windowsazure.mobileservices.zumoe2etestapp.framework.Util.java

public static String createSimpleRandomString(Random rndGen, int size, int minChar, int maxChar) {
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < size; i++) {

        int charRand;
        char c;/*from   ww w. j a v a  2 s  .  c o  m*/
        do {
            charRand = rndGen.nextInt(maxChar - minChar);
            c = (char) (minChar + charRand);
        } while (Character.isLowSurrogate(c) || Character.isHighSurrogate(c));

        sb.append(c);
    }

    return sb.toString();
}

From source file:it.geosolutions.httpproxy.utils.Utils.java

/**
 * @param ch/*from w  w w  .  j  a v a  2s.  com*/
 * @return
 */
final static int escapeHtmlFull(int ch) {
    if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= '0' && ch <= '9') {
        // safe
        return ch;
    } else if (Character.isWhitespace(ch)) {
        if (ch != '\n' && ch != '\r' && ch != '\t')
            // safe
            return ch;
    } else if (Character.isDefined(ch)) {
        // safe
        return ch;
    } else if (Character.isISOControl(ch)) {
        // paranoid version:isISOControl which are not isWhitespace
        // removed !
        // do nothing do not include in output !
        return -1;
    } else if (Character.isHighSurrogate((char) ch)) {
        // do nothing do not include in output !
        return -1;
    } else if (Character.isLowSurrogate((char) ch)) {
        // wrong char[] sequence, //TODO: LOG !!!
        return -1;
    }

    return -1;
}