Example usage for java.lang Character isLowSurrogate

Introduction

In this page you can find the example usage for java.lang Character isLowSurrogate.

Prototype

public static boolean isLowSurrogate(char ch)

Source Link

Document

Determines if the given char value is a <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> Unicode low-surrogate code unit</a> (also known as <i>trailing-surrogate code unit</i>).

Usage

From source file:de.fau.cs.osr.utils.StringUtils.java

public static String escHtml(String text, boolean forAttribute) {
    // StringEscapeUtils.escapeHtml(in) does not escape '\'' but a lot of 
    // other stuff that doesn't need escaping.

    if (text == null)
        return "";

    int n = text.length();
    StringBuilder sb = new StringBuilder(n * 4 / 3);
    for (int i = 0; i < n; i++) {
        char ch = text.charAt(i);
        switch (ch) {
        case ' ':
        case '\n':
        case '\t':
            sb.append(ch);/*w w  w .  ja  v a  2  s .  c o m*/
            break;
        case '<':
            sb.append("&lt;");
            break;
        case '>':
            sb.append(forAttribute ? "&gt;" : ">");
            break;
        case '&':
            sb.append("&amp;");
            break;
        case '\'':
            // &apos; cannot safely be used, see wikipedia
            sb.append("&#39;");
            break;
        case '"':
            sb.append(forAttribute ? "&quot;" : "\"");
            break;
        default:
            if ((ch >= 0 && ch < 0x20) || (ch == 0xFE)) {
                hexCharRef(sb, ch);
                break;
            } else if (Character.isHighSurrogate(ch)) {
                ++i;
                if (i < n) {
                    char ch2 = text.charAt(i);
                    if (Character.isLowSurrogate(ch2)) {
                        int codePoint = Character.toCodePoint(ch, ch2);
                        switch (Character.getType(codePoint)) {
                        case Character.CONTROL:
                        case Character.PRIVATE_USE:
                        case Character.UNASSIGNED:
                            hexCharRef(sb, codePoint);
                            break;

                        default:
                            sb.append(ch);
                            sb.append(ch2);
                            break;
                        }

                        continue;
                    }
                }
            } else if (!Character.isLowSurrogate(ch)) {
                sb.append(ch);
                continue;
            }

            // No low surrogate followed or only low surrogate
            throw new IllegalArgumentException("String contains isolated surrogates!");
        }
    }

    return sb.toString();
}

From source file:Main.java

/**
 * Gets the index of the longest NCName that is the suffix of a character
 * sequence.//ww  w  . j a v  a2 s .  c  o m
 * 
 * @param s
 *        The character sequence.
 * @return The index of the longest suffix of the specified character
 *         sequence {@code s} that is an NCName, or -1 if the character
 *         sequence {@code s} does not have a suffix that is an NCName.
 */
public static int getNCNameSuffixIndex(CharSequence s) {
    // identify bnode labels and do not try to split them
    if (s.length() > 1 && s.charAt(0) == '_' && s.charAt(1) == ':') {
        return -1;
    }
    int index = -1;
    for (int i = s.length() - 1; i > -1; i--) {
        if (!Character.isLowSurrogate(s.charAt(i))) {
            int codePoint = Character.codePointAt(s, i);
            if (isNCNameStartChar(codePoint)) {
                index = i;
            }
            if (!isNCNameChar(codePoint)) {
                break;
            }
        }
    }
    return index;
}

From source file:it.geosolutions.httpproxy.utils.Utils.java

/**
 * @param ch//from  w  w  w .  j  a v  a  2s . c o  m
 * @return
 */
final static int escapeHtmlFull(int ch) {
    if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= '0' && ch <= '9') {
        // safe
        return ch;
    } else if (Character.isWhitespace(ch)) {
        if (ch != '\n' && ch != '\r' && ch != '\t')
            // safe
            return ch;
    } else if (Character.isDefined(ch)) {
        // safe
        return ch;
    } else if (Character.isISOControl(ch)) {
        // paranoid version:isISOControl which are not isWhitespace
        // removed !
        // do nothing do not include in output !
        return -1;
    } else if (Character.isHighSurrogate((char) ch)) {
        // do nothing do not include in output !
        return -1;
    } else if (Character.isLowSurrogate((char) ch)) {
        // wrong char[] sequence, //TODO: LOG !!!
        return -1;
    }

    return -1;
}

From source file:Main.java

/**
 * Check if the passed character is valid for XML content. Works for XML 1.0
 * and XML 1.1.<br>/*from  www.  j  av a 2  s .  c om*/
 * Note: makes no difference between the runtime JAXP solution and the
 * explicit Xerces version
 *
 * @param c
 *        The character to be checked.
 * @return <code>true</code> if the character is valid in XML,
 *         <code>false</code> otherwise.
 */
public static boolean isInvalidXMLCharacter(final char c) {
    // Based on: http://www.w3.org/TR/2006/REC-xml11-20060816/#charsets

    // Speed up by separating the most common use cases first
    if (c < 256) {
        // Character <= 0x00ff - use precomposed table
        return ILLEGAL_XML_CHARS[c];
    }

    // Character >= 0x0100
    // For completeness, the Unicode line separator character, #x2028, is
    // also supported.
    // Surrogate blocks (no Java IDs found)
    // High surrogate: 0xd800-0xdbff
    // Low surrogate: 0xdc00-0xdfff
    return c == '\u2028' || (c >= '\ufdd0' && c <= '\ufddf') || c == '\ufffe' || c == '\uffff'
            || Character.isHighSurrogate(c) || Character.isLowSurrogate(c);
}

From source file:com.ikon.util.FormatUtil.java

/**
 * Trim Unicode surrogate characters//w w  w .j  av  a2s  . c om
 * 
 * http://en.wikipedia.org/wiki/Mapping_of_Unicode_characters#Surrogates
 */
public static String trimUnicodeSurrogates(String text) {
    StringBuilder sb = new StringBuilder();

    for (int i = 0; i < text.length(); i++) {
        char ch = text.charAt(i);

        if (!Character.isHighSurrogate(ch) && !Character.isLowSurrogate(ch)) {
            sb.append(ch);
        }
    }

    return sb.toString();
}

From source file:de.fau.cs.osr.utils.StringUtils.java

public static boolean hasIsolatedSurrogates(String text) {
    final int length = text.length();
    for (int i = 0; i < length; ++i) {
        char ch = text.charAt(i);
        if (Character.isHighSurrogate(ch)) {
            ++i;/*from  w  w  w.j a  va2 s  .  c  om*/
            if (i < length) {
                char ch2 = text.charAt(i);
                if (!Character.isLowSurrogate(ch2))
                    return true;
            } else
                return true;
        } else if (Character.isLowSurrogate(ch))
            return true;
    }
    return false;
}

From source file:org.bimserver.ifc.step.serializer.IfcStepSerializer.java

private void writePrimitive(Object val) throws SerializerException, IOException {
    if (val.getClass().getSimpleName().equals("Tristate")) {
        if (val.toString().equals("TRUE")) {
            print(BOOLEAN_TRUE);/*w  w w .  j  ava  2s .  co m*/
        } else if (val.toString().equals("FALSE")) {
            print(BOOLEAN_FALSE);
        } else if (val.toString().equals("UNDEFINED")) {
            print(BOOLEAN_UNDEFINED);
        }
    } else if (val instanceof Double) {
        if (((Double) val).isInfinite() || (((Double) val).isNaN())) {
            LOGGER.info("Serializing infinite or NaN double as 0.0");
            print("0.0");
        } else {
            String string = val.toString();
            if (string.endsWith(DOT_0)) {
                print(string.substring(0, string.length() - 1));
            } else {
                print(string);
            }
        }
    } else if (val instanceof Boolean) {
        Boolean bool = (Boolean) val;
        if (bool) {
            print(BOOLEAN_TRUE);
        } else {
            print(BOOLEAN_FALSE);
        }
    } else if (val instanceof String) {
        print(SINGLE_QUOTE);
        String stringVal = (String) val;
        for (int i = 0; i < stringVal.length(); i++) {
            char c = stringVal.charAt(i);
            if (c == '\'') {
                print("\'\'");
            } else if (c == '\\') {
                print("\\\\");
            } else if (c >= 32 && c <= 126) {
                // ISO 8859-1
                print("" + c);
            } else if (c < 255) {
                //  ISO 10646 and ISO 8859-1 are the same < 255 , using ISO_8859_1
                print("\\X\\" + new String(Hex.encodeHex(
                        Charsets.ISO_8859_1.encode(CharBuffer.wrap(new char[] { (char) c })).array()))
                                .toUpperCase());
            } else {
                if (useIso8859_1) {
                    // ISO 8859-1 with -128 offset
                    ByteBuffer encode = Charsets.ISO_8859_1.encode(new String(new char[] { (char) (c - 128) }));
                    print("\\S\\" + (char) encode.get());
                } else {
                    // The following code has not been tested (2012-04-25)
                    // Use UCS-2 or UCS-4

                    // TODO when multiple sequential characters should be encoded in UCS-2 or UCS-4, we don't really need to add all those \X0\ \X2\ and \X4\ chars
                    if (Character.isLowSurrogate(c)) {
                        throw new SerializerException("Unexpected low surrogate range char");
                    } else if (Character.isHighSurrogate(c)) {
                        // We need UCS-4, this is probably never happening
                        if (i + 1 < stringVal.length()) {
                            char low = stringVal.charAt(i + 1);
                            if (!Character.isLowSurrogate(low)) {
                                throw new SerializerException(
                                        "High surrogate char should be followed by char in low surrogate range");
                            }
                            try {
                                print("\\X4\\" + new String(Hex.encodeHex(Charset.forName("UTF-32")
                                        .encode(new String(new char[] { c, low })).array())).toUpperCase()
                                        + "\\X0\\");
                            } catch (UnsupportedCharsetException e) {
                                throw new SerializerException(e);
                            }
                            i++;
                        } else {
                            throw new SerializerException(
                                    "High surrogate char should be followed by char in low surrogate range, but end of string reached");
                        }
                    } else {
                        // UCS-2 will do
                        print("\\X2\\" + new String(Hex
                                .encodeHex(Charsets.UTF_16BE.encode(CharBuffer.wrap(new char[] { c })).array()))
                                        .toUpperCase()
                                + "\\X0\\");
                    }
                }
            }
        }
        print(SINGLE_QUOTE);
    } else if (val instanceof Enumerator) {
        print("." + val + ".");
    } else {
        print(val == null ? "$" : val.toString());
    }
}

From source file:se.sawano.java.security.otp.google.keyuri.UnicodeEscaper.java

/**
 * Returns the Unicode code point of the character at the given index.
 *
 * <p>Unlike {@link Character#codePointAt(CharSequence, int)} or {@link String#codePointAt(int)}
 * this method will never fail silently when encountering an invalid surrogate pair.
 *
 * <p>The behaviour of this method is as follows:
 * <ol>//from w  ww  . j ava2  s. co m
 * <li>If {@code index >= end}, {@link IndexOutOfBoundsException} is thrown.
 * <li><b>If the character at the specified index is not a surrogate, it is returned.</b>
 * <li>If the first character was a high surrogate value, then an attempt is made to read the next
 * character.
 * <ol>
 * <li><b>If the end of the sequence was reached, the negated value of the trailing high
 * surrogate is returned.</b>
 * <li><b>If the next character was a valid low surrogate, the code point value of the
 * high/low surrogate pair is returned.</b>
 * <li>If the next character was not a low surrogate value, then {@link
 * IllegalArgumentException} is thrown.
 * </ol>
 * <li>If the first character was a low surrogate value, {@link IllegalArgumentException} is
 * thrown.
 * </ol>
 *
 * @param seq
 *         the sequence of characters from which to decode the code point
 * @param index
 *         the index of the first character to decode
 * @param end
 *         the index beyond the last valid character to decode
 *
 * @return the Unicode code point for the given index or the negated value of the trailing high surrogate character at the end of the sequence
 */
protected static int codePointAt(CharSequence seq, int index, int end) {
    notNull(seq);
    if (index < end) {
        char c1 = seq.charAt(index++);
        if (c1 < Character.MIN_HIGH_SURROGATE || c1 > Character.MAX_LOW_SURROGATE) {
            // Fast path (first test is probably all we need to do)
            return c1;
        } else if (c1 <= Character.MAX_HIGH_SURROGATE) {
            // If the high surrogate was the last character, return its inverse
            if (index == end) {
                return -c1;
            }
            // Otherwise look for the low surrogate following it
            char c2 = seq.charAt(index);
            if (Character.isLowSurrogate(c2)) {
                return Character.toCodePoint(c1, c2);
            }
            throw new IllegalArgumentException("Expected low surrogate but got char '" + c2 + "' with value "
                    + (int) c2 + " at index " + index + " in '" + seq + "'");
        } else {
            throw new IllegalArgumentException("Unexpected low surrogate character '" + c1 + "' with value "
                    + (int) c1 + " at index " + (index - 1) + " in '" + seq + "'");
        }
    }
    throw new IndexOutOfBoundsException("Index exceeds specified range");
}

From source file:CodePointInputMethod.java

private void finishComposition() {
    int len = buffer.length();
    if (len == 6 && format != SPECIAL_ESCAPE) {
        char codePoint = (char) getCodePoint(buffer, 2, 5);
        if (Character.isValidCodePoint(codePoint) && codePoint != 0xFFFF) {
            buffer.setLength(0);/*from  w  ww.j a v  a  2  s  . co  m*/
            buffer.append(codePoint);
            sendCommittedText();
            return;
        }
    } else if (len == 8 && format == SPECIAL_ESCAPE) {
        int codePoint = getCodePoint(buffer, 2, 7);
        if (Character.isValidCodePoint(codePoint) && codePoint != 0xFFFF) {
            buffer.setLength(0);
            buffer.appendCodePoint(codePoint);
            sendCommittedText();
            return;
        }
    } else if (len == 12 && format == SURROGATE_PAIR) {
        char[] codePoint = { (char) getCodePoint(buffer, 2, 5), (char) getCodePoint(buffer, 8, 11) };
        if (Character.isHighSurrogate(codePoint[0]) && Character.isLowSurrogate(codePoint[1])) {
            buffer.setLength(0);
            buffer.append(codePoint);
            sendCommittedText();
            return;
        }
    }

    beep();
}

From source file:nl.tue.ddss.ifcrdf.model.IfcStepSerializer.java

private void writePrimitive(Resource val) throws IOException, SerializerException {
    if (isLogical(val)) {
        if (val.hasProperty(HASLOGICAL, EXPRESS_TRUE)) {
            print(BOOLEAN_TRUE);/*from w ww  .  ja  v a 2  s . c o  m*/
        } else if (val.hasProperty(HASLOGICAL, EXPRESS_FALSE)) {
            print(BOOLEAN_FALSE);
        } else if (val.hasProperty(HASLOGICAL, EXPRESS_UNDEFINED)) {
            print(BOOLEAN_UNDEFINED);
        }
    } else if (isReal(val) || isNumber(val)) {
        Double valDouble = val.getProperty(HASDOUBLE).getObject().asLiteral().getDouble();
        if ((valDouble).isInfinite() || ((valDouble).isNaN())) {
            LOGGER.info("Serializing infinite or NaN double as 0.0");
            print("0.0");
        } else {
            String string = valDouble.toString();
            if (string.endsWith(DOT_0)) {
                print(string.substring(0, string.length() - 1));
            } else {
                print(string);
            }
        }
    } else if (isInteger(val)) {
        Integer valInteger = val.getProperty(HASINTEGER).getObject().asLiteral().getInt();
        String string = valInteger.toString();
        if (string.endsWith(DOT_0)) {
            print(string.substring(0, string.length() - 2));
        } else {
            print(string);
        }
    } else if (isBoolean(val)) {
        if (val.hasLiteral(HASBOOLEAN, true)) {
            print(BOOLEAN_TRUE);
        } else if (val.hasLiteral(HASBOOLEAN, false)) {
            print(BOOLEAN_FALSE);
        }
    } else if (isString(val)) {
        print(SINGLE_QUOTE);
        String stringVal = val.getProperty(HASSTRING).getObject().asLiteral().getString();
        for (int i = 0; i < stringVal.length(); i++) {
            char c = stringVal.charAt(i);
            if (c == '\'') {
                print("\'\'");
            } else if (c == '\\') {
                print("\\\\");
            } else if (c >= 32 && c <= 126) {
                // ISO 8859-1
                print("" + c);
            } else if (c < 255) {
                // ISO 10646 and ISO 8859-1 are the same < 255 , using
                // ISO_8859_1
                print("\\X\\" + new String(Hex.encodeHex(
                        Charsets.ISO_8859_1.encode(CharBuffer.wrap(new char[] { (char) c })).array()))
                                .toUpperCase());
            } else {
                if (useIso8859_1) {
                    // ISO 8859-1 with -128 offset
                    ByteBuffer encode = Charsets.ISO_8859_1.encode(new String(new char[] { (char) (c - 128) }));
                    print("\\S\\" + (char) encode.get());
                } else {
                    // The following code has not been tested (2012-04-25)
                    // Use UCS-2 or UCS-4

                    // TODO when multiple sequential characters should be
                    // encoded in UCS-2 or UCS-4, we don't really need to
                    // add all those \X0\ \X2\ and \X4\ chars
                    if (Character.isLowSurrogate(c)) {
                        throw new SerializerException("Unexpected low surrogate range char");
                    } else if (Character.isHighSurrogate(c)) {
                        // We need UCS-4, this is probably never happening
                        if (i + 1 < stringVal.length()) {
                            char low = stringVal.charAt(i + 1);
                            if (!Character.isLowSurrogate(low)) {
                                throw new SerializerException(
                                        "High surrogate char should be followed by char in low surrogate range");
                            }
                            try {
                                print("\\X4\\" + new String(Hex.encodeHex(Charset.forName("UTF-32")
                                        .encode(new String(new char[] { c, low })).array())).toUpperCase()
                                        + "\\X0\\");
                            } catch (UnsupportedCharsetException e) {
                                throw new SerializerException(e);
                            }
                            i++;
                        } else {
                            throw new SerializerException(
                                    "High surrogate char should be followed by char in low surrogate range, but end of string reached");
                        }
                    } else {
                        // UCS-2 will do
                        print("\\X2\\" + new String(Hex
                                .encodeHex(Charsets.UTF_16BE.encode(CharBuffer.wrap(new char[] { c })).array()))
                                        .toUpperCase()
                                + "\\X0\\");
                    }
                }
            }
        }
        print(SINGLE_QUOTE);
    } else if (isEnumeration(val)) {
        String enumVal = val.getLocalName();
        print("." + enumVal + ".");
    } else {
        print(val == null ? "$" : val.toString());
    }
}