Example usage for java.lang Character charCount

Introduction

In this page you can find the example usage for java.lang Character charCount.

Prototype

public static int charCount(int codePoint)

Source Link

Document

Determines the number of char values needed to represent the specified character (Unicode code point).

Usage

From source file:Main.java

/**
 * Similar to String.contains() with two main differences:
 * <p>//ww  w  .j a  v a  2  s. c om
 * 1) Only searches token prefixes.  A token is defined as any combination of letters or
 * numbers.
 * <p>
 * 2) Returns the starting index where the substring is found.
 *
 * @param value The string to search.
 * @param substring The substring to look for.
 * @return The starting index where the substring is found. {@literal -1} if substring is not
 *         found in value.
 */
@VisibleForTesting
static int contains(String value, String substring) {
    if (value.length() < substring.length()) {
        return -1;
    }

    // i18n support
    // Generate the code points for the substring once.
    // There will be a maximum of substring.length code points.  But may be fewer.
    // Since the array length is not an accurate size, we need to keep a separate variable.
    final int[] substringCodePoints = new int[substring.length()];
    int substringLength = 0; // may not equal substring.length()!!
    for (int i = 0; i < substring.length();) {
        final int codePoint = Character.codePointAt(substring, i);
        substringCodePoints[substringLength] = codePoint;
        substringLength++;
        i += Character.charCount(codePoint);
    }

    for (int i = 0; i < value.length(); i = findNextTokenStart(value, i)) {
        int numMatch = 0;
        for (int j = i; j < value.length() && numMatch < substringLength; ++numMatch) {
            int valueCp = Character.toLowerCase(value.codePointAt(j));
            int substringCp = substringCodePoints[numMatch];
            if (valueCp != substringCp) {
                break;
            }
            j += Character.charCount(valueCp);
        }
        if (numMatch == substringLength) {
            return i;
        }
    }
    return -1;
}

From source file:Main.java

/**
 * Determines if a character sequence is a QName.
 * <p>// ww w .j a  va 2s  .  c  om
 * A QName is either:
 * <ul>
 * <li>an NCName (LocalName), or</li>
 * <li>an NCName followed by a colon and by another NCName
 * (PrefixName:LocalName)</li>
 * </ul>
 *
 * Source: http://www.w3.org/TR/xml-names/#NT-QName
 *
 * @param s
 *           The character sequence to test.
 * @return Returns <code>true</code> if the character sequence
 *         <code>cs</code> is a QName, or <code>false</code> otherwise.
 */
public static boolean isQName(CharSequence s) {
    if (isEmpty(s)) {
        return false;
    }
    boolean foundColon = false;
    boolean inNCName = false;
    for (int i = 0; i < s.length();) {
        int c = Character.codePointAt(s, i);
        if (c == ':') { //$NON-NLS-1$
            if (foundColon) {
                return false;
            }
            foundColon = true;
            if (!inNCName) {
                return false;
            }
            inNCName = false;
        } else {
            if (!inNCName) {
                if (!isXmlNameStartChar(c)) {
                    return false;
                }
                inNCName = true;
            } else {
                if (!isXmlNameChar(c)) {
                    return false;
                }
            }
        }
        i += Character.charCount(c);
    }
    return true;
}

From source file:Main.java

/**
 * Mangle a string so that it can be represented in an XML document.
 * /*from w  w  w  . j  ava 2  s . c o  m*/
 * There are three kinds of code points in XML:
 * - Those that can be represented normally,
 * - Those that have to be escaped (for example, & must be represented 
 *     as &amp;)
 * - Those that cannot be represented at all in XML.
 *
 * The built-in SAX functions will handle the first two types for us just
 * fine.  However, sometimes we come across a code point of the third type.
 * In this case, we have to mangle the string in order to represent it at
 * all.  We also mangle backslash to avoid confusing a backslash in the
 * string with part our escape sequence.
 * 
 * The encoding used here is as follows: an illegal code point is
 * represented as '\ABCD;', where ABCD is the hexadecimal value of 
 * the code point.
 *
 * @param str     The input string.
 *
 * @return        The mangled string.
 */
public static String mangleXmlString(String str, boolean createEntityRefs) {
    final StringBuilder bld = new StringBuilder();
    final int length = str.length();
    for (int offset = 0; offset < length;) {
        final int cp = str.codePointAt(offset);
        final int len = Character.charCount(cp);
        if (codePointMustBeMangled(cp)) {
            bld.append(mangleCodePoint(cp));
        } else {
            String entityRef = null;
            if (createEntityRefs) {
                entityRef = codePointToEntityRef(cp);
            }
            if (entityRef != null) {
                bld.append(entityRef);
            } else {
                for (int i = 0; i < len; i++) {
                    bld.append(str.charAt(offset + i));
                }
            }
        }
        offset += len;
    }
    return bld.toString();
}

From source file:Main.java

/**
 * Determines if a character sequence is an NCName (Non-Colonised Name). An
 * NCName is a string which starts with an NCName start character and is
 * followed by zero or more NCName characters.
 * /*from  ww  w .  j av  a 2  s. com*/
 * @param s
 *        The character sequence to be tested.
 * @return {@code true} if {@code s} is an NCName, otherwise {@code false}.
 */
public static boolean isNCName(@Nullable CharSequence s) {
    if (isNullOrEmpty(s)) {
        return false;
    }
    assert s != null;
    int firstCodePoint = Character.codePointAt(s, 0);
    if (!isNCNameStartChar(firstCodePoint)) {
        return false;
    }
    for (int i = Character.charCount(firstCodePoint); i < s.length();) {
        int codePoint = Character.codePointAt(s, i);
        if (!isNCNameChar(codePoint)) {
            return false;
        }
        i += Character.charCount(codePoint);
    }
    return true;
}

From source file:org.mariotaku.twidere.util.CodePointArray.java

public CodePointArray(@NonNull final CharSequence cs) {
    final int inputLength = cs.length();
    codePoints = new int[inputLength];
    int codePointsLength = 0;
    for (int offset = 0; offset < inputLength;) {
        final int codePoint = Character.codePointAt(cs, offset);
        codePoints[codePointsLength++] = codePoint;
        offset += Character.charCount(codePoint);
    }//from  w  ww.j a  v  a  2s .  c  o  m
    this.length = codePointsLength;
}

From source file:com.evolveum.midpoint.common.Utils.java

/**
 * Removing non-printable UTF characters from the string.
 * //from   w  w w.j  ava2 s .  co m
 * This is not really used now. It was done as a kind of prototype for
 * filters. But may come handy and it in fact tests that the pattern is
 * doing what expected, so it may be useful.
 * 
 * @param bad
 *            string with bad chars
 * @return string without bad chars
 */
public static String cleanupUtf(String bad) {

    StringBuilder sb = new StringBuilder(bad.length());

    for (int cp, i = 0; i < bad.length(); i += Character.charCount(cp)) {
        cp = bad.codePointAt(i);
        if (isValidXmlCodepoint(cp)) {
            sb.append(Character.toChars(cp));
        }
    }

    return sb.toString();
}

From source file:Main.java

/**
 * Escapes a character sequence so that it is valid XML.
 * //  www .  j  av a  2 s  .com
 * @param s
 *        The character sequence.
 * @return The escaped version of the character sequence.
 */
public static String escapeXML(CharSequence s) {
    // double quote -- quot
    // ampersand -- amp
    // less than -- lt
    // greater than -- gt
    // apostrophe -- apos
    StringBuilder sb = new StringBuilder(s.length() * 2);
    for (int i = 0; i < s.length();) {
        int codePoint = Character.codePointAt(s, i);
        if (codePoint == '<') {
            sb.append(LT);
        } else if (codePoint == '>') {
            sb.append(GT);
        } else if (codePoint == '\"') {
            sb.append(QUOT);
        } else if (codePoint == '&') {
            sb.append(AMP);
        } else if (codePoint == '\'') {
            sb.append(APOS);
        } else {
            sb.appendCodePoint(codePoint);
        }
        i += Character.charCount(codePoint);
    }
    return sb.toString();
}

From source file:org.mariotaku.twidere.util.CodePointArray.java

public int indexOfText(int codePoint, int start) {
    int index = 0;
    for (int i = 0; i < length; i++) {
        final int current = codePoints[i];
        if (current == codePoint && i >= start)
            return index;
        index += Character.charCount(current);
    }//from  w  w w. j a  va  2  s .c om
    return -1;
}

From source file:Main.java

/**
 * Determines if a character sequence is a QName. A QName is either an
 * NCName (LocalName), or an NCName followed by a colon followed by another
 * NCName (where the first NCName is referred to as the 'Prefix Name' and
 * the second NCName is referred to as the 'Local Name' - i.e.
 * PrefixName:LocalName).//  w  ww  .j  av  a2 s  .c  o  m
 * 
 * @param s
 *        The character sequence to be tested.
 * @return {@code true} if {@code s} is a QName, otherwise {@code false}.
 */
public static boolean isQName(CharSequence s) {
    if (isNullOrEmpty(s)) {
        return false;
    }
    boolean foundColon = false;
    boolean inNCName = false;
    for (int i = 0; i < s.length();) {
        int codePoint = Character.codePointAt(s, i);
        if (codePoint == ':') {
            if (foundColon) {
                return false;
            }
            foundColon = true;
            if (!inNCName) {
                return false;
            }
            inNCName = false;
        } else {
            if (!inNCName) {
                if (!isXMLNameStartCharacter(codePoint)) {
                    return false;
                }
                inNCName = true;
            } else {
                if (!isXMLNameChar(codePoint)) {
                    return false;
                }
            }
        }
        i += Character.charCount(codePoint);
    }
    return true;
}

From source file:jp.furplag.util.commons.StringUtils.java

/**
 * return the Array of Unicode code points in the string.
 *
 * @param str the string, may be null./*  w  w  w.j a  v  a2s . co m*/
 * @return Array of codepoints.
 */
public static int[] getCodePoints(final String str) {
    char[] chars = defaultString(str).toCharArray();
    int[] ret = new int[Character.codePointCount(chars, 0, chars.length)];
    int index = 0;
    for (int i = 0, codePoint; i < chars.length; i += Character.charCount(codePoint)) {
        codePoint = Character.codePointAt(chars, i);
        ret[index++] = codePoint;
    }

    return ret;
}