Example usage for java.lang Character charCount

Introduction

In this page you can find the example usage for java.lang Character charCount.

Prototype

public static int charCount(int codePoint)

Source Link

Document

Determines the number of char values needed to represent the specified character (Unicode code point).

Usage

From source file:Main.java

public static void main(String[] args) {
    int cp = 0x12345;
    int res = Character.charCount(cp);

    String str1 = "It is not a valid supplementary character";
    String str2 = "It is a valid supplementary character";

    if (res == 1) {
        System.out.println(str1);
    } else if (res == 2) {
        System.out.println(str2);
    }/*www  .  j a va 2  s  .  c  om*/
}

From source file:Main.java

public static final String filterUCS4(String str) {
    if (TextUtils.isEmpty(str)) {
        return str;
    }// w  ww . j  av  a 2  s .c o m

    if (str.codePointCount(0, str.length()) == str.length()) {
        return str;
    }

    StringBuilder sb = new StringBuilder();

    int index = 0;
    while (index < str.length()) {
        int codePoint = str.codePointAt(index);
        index += Character.charCount(codePoint);
        if (Character.isSupplementaryCodePoint(codePoint)) {
            continue;
        }

        sb.appendCodePoint(codePoint);
    }

    return sb.toString();
}

From source file:Main.java

public static String $$insertWordBreaks(String value, int maxCharsBetweenWordBreaks) {

    StringBuilder result = new StringBuilder();

    // These variables keep track of important state while looping through the string below.
    boolean isInTag = false; // whether we're inside an HTML tag
    boolean isMaybeInEntity = false; // whether we might be inside an HTML entity
    int numCharsWithoutBreak = 0; // number of characters since the last word break

    for (int codePoint, i = 0; i < value.length(); i += Character.charCount(codePoint)) {
        codePoint = value.codePointAt(i);

        // If hit maxCharsBetweenWordBreaks, and next char is not a space, then add <wbr>.
        if (numCharsWithoutBreak >= maxCharsBetweenWordBreaks && codePoint != ' ') {
            result.append("<wbr>");
            numCharsWithoutBreak = 0;/*from  w ww. j av  a 2 s  . c  o  m*/
        }

        if (isInTag) {
            // If inside an HTML tag and we see '>', it's the end of the tag.
            if (codePoint == '>') {
                isInTag = false;
            }

        } else if (isMaybeInEntity) {
            switch (codePoint) {
            // If maybe inside an entity and we see ';', it's the end of the entity. The entity
            // that just ended counts as one char, so increment numCharsWithoutBreak.
            case ';':
                isMaybeInEntity = false;
                ++numCharsWithoutBreak;
                break;
            // If maybe inside an entity and we see '<', we weren't actually in an entity. But
            // now we're inside an HTML tag.
            case '<':
                isMaybeInEntity = false;
                isInTag = true;
                break;
            // If maybe inside an entity and we see ' ', we weren't actually in an entity. Just
            // correct the state and reset the numCharsWithoutBreak since we just saw a space.
            case ' ':
                isMaybeInEntity = false;
                numCharsWithoutBreak = 0;
                break;
            }

        } else { // !isInTag && !isInEntity
            switch (codePoint) {
            // When not within a tag or an entity and we see '<', we're now inside an HTML tag.
            case '<':
                isInTag = true;
                break;
            // When not within a tag or an entity and we see '&', we might be inside an entity.
            case '&':
                isMaybeInEntity = true;
                break;
            // When we see a space, reset the numCharsWithoutBreak count.
            case ' ':
                numCharsWithoutBreak = 0;
                break;
            // When we see a non-space, increment the numCharsWithoutBreak.
            default:
                ++numCharsWithoutBreak;
                break;
            }
        }

        // In addition to adding <wbr>s, we still have to add the original characters.
        result.appendCodePoint(codePoint);
    }

    return result.toString();
}

From source file:Main.java

/**
 * Anything other than letter and numbers are considered delimiters.  Remove start and end
 * delimiters since they are not relevant to search.
 *
 * @param query The query string to clean.
 * @return The cleaned query. Empty string if all characters are cleaned out.
 *///from w w  w .  jav  a 2 s.  c o  m
public static String cleanStartAndEndOfSearchQuery(String query) {
    int start = 0;
    while (start < query.length()) {
        int codePoint = query.codePointAt(start);
        if (Character.isLetterOrDigit(codePoint)) {
            break;
        }
        start += Character.charCount(codePoint);
    }

    if (start == query.length()) {
        // All characters are delimiters.
        return "";
    }

    int end = query.length() - 1;
    while (end > -1) {
        if (Character.isLowSurrogate(query.charAt(end))) {
            // Assume valid i18n string.  There should be a matching high surrogate before it.
            end--;
        }
        int codePoint = query.codePointAt(end);
        if (Character.isLetterOrDigit(codePoint)) {
            break;
        }
        end--;
    }

    // end is a letter or digit.
    return query.substring(start, end + 1);
}

From source file:Main.java

/**
 * This method ensures that the output String has only valid XML unicode characters as specified by the
 * XML 1.0 standard. For reference, please see the
 * standard. This method will return an empty String if the input is null or empty.
 *
 * @param s - The String whose non-valid characters we want to replace.
 * @return The in String, where non-valid characters are replace by spaces.
 * @author Nuno Freire// www. ja  v  a  2 s.  com
 */
public static String removeInvalidXMLCharacters(String s) {

    StringBuilder out = new StringBuilder(); // Used to hold the output.
    int codePoint; // Used to reference the current character.
    int i = 0;
    while (i < s.length()) {
        codePoint = s.codePointAt(i); // This is the unicode code of the character.
        if ((codePoint == 0x9) || // Consider testing larger ranges first to improve speed.
                (codePoint == 0xA) || (codePoint == 0xD) || ((codePoint >= 0x20) && (codePoint <= 0xD7FF))
                || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD))
                || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) {
            out.append(Character.toChars(codePoint));
        } else {
            out.append(' ');
        }
        i += Character.charCount(codePoint); // Increment with the number of code units(java chars) needed to represent a Unicode char.
    }
    return out.toString();
}

From source file:Main.java

/**
 * Converts a stream of plaintext into valid XML. Output stream must convert
 * stream to UTF-8 when saving to disk./*w  w w .j ava  2 s  .co  m*/
 */
public static String makeValidXML(String plaintext) {
    StringBuilder out = new StringBuilder();
    String text = removeXMLInvalidChars(plaintext);
    for (int cp, i = 0; i < text.length(); i += Character.charCount(cp)) {
        cp = text.codePointAt(i);
        out.append(escapeXMLChars(cp));
    }
    return out.toString();
}

From source file:Main.java

/**
 * Determines if the input character sequence <code>cs</code> is a NCName
 * (Non-Colon Name). An NCName is a string which starts with an NCName start
 * character and is followed by zero or more NCName characters.
 * //ww w . jav  a  2  s .  co m
 * Source: http://www.w3.org/TR/xml-names/#NT-NCName
 * 
 * @param cs
 *           The character sequence to test.
 * @return Returns <code>true</code> if the input character sequence is a
 *         NCName or <code>false</code> otherwise.
 */
public static boolean isNCName(CharSequence cs) {
    if (isEmpty(cs)) {
        return false;
    }
    int firstChar = Character.codePointAt(cs, 0);
    if (!isNCNameStartChar(firstChar)) {
        return false;
    }
    for (int i = Character.charCount(firstChar); i < cs.length();) {
        int c = Character.codePointAt(cs, i);
        if (!isNCNameChar(c)) {
            return false;
        }
        i += Character.charCount(c);
    }
    return true;
}

From source file:Main.java

/*** This method ensures that the output String has only     
 * * valid XML unicode characters as specified by the     
 * * XML 1.0 standard. For reference, please see     
 * * <a href="http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char">the     
 * * standard</a>. This method will return an empty     
 * * String if the input is null or empty.     
 * * @param in The String whose non-valid characters we want to remove.     
 * * @return The in String, stripped of non-valid characters.    
 *  *//*from  w  w w  .j  a  v  a 2 s  .  c  om*/
public static String stripNonValidXMLCharacters(String s) {

    StringBuilder out = new StringBuilder(); // Used to hold the output.

    int codePoint; // Used to reference the current character.

    //String ss = "\ud801\udc00"; // This is actualy one unicode character, represented by two code units!!!.
    int i = 0;

    while (i < s.length()) {
        codePoint = s.codePointAt(i); // This is the unicode code of the character.
        if ((codePoint == 0x9) || // Consider testing larger ranges first to improve speed. 

                (codePoint == 0xA) ||

                (codePoint == 0xD) ||

                ((codePoint >= 0x20) && (codePoint <= 0xD7FF)) ||

                ((codePoint >= 0xE000) && (codePoint <= 0xFFFD)) ||

                ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) {

            out.append(Character.toChars(codePoint));

        }

        i += Character.charCount(codePoint); // Increment with the number of code units(java chars) needed to represent a Unicode char. 

    }

    return out.toString();

}

From source file:org.marketcetera.util.test.UnicodeDataTest.java

private static void singleValid(String str, char[] chars, int[] ucps, byte[] nat, byte[] utf8, byte[] utf16be,
        byte[] utf16le, byte[] utf32be, byte[] utf32le) {
    assertArrayEquals(str.toCharArray(), chars);
    int i = 0;/*from   ww w . j av  a  2  s . c o  m*/
    int j = 0;
    while (i < str.length()) {
        int ucp = str.codePointAt(i);
        assertEquals("At code point position " + j, ucp, ucps[j++]);
        i += Character.charCount(ucp);
    }
    assertArrayEquals(str.getBytes(), nat);
    assertArrayEquals(str.getBytes(UTF8), utf8);
    assertArrayEquals(str.getBytes(UTF16BE), utf16be);
    assertArrayEquals(str.getBytes(UTF16LE), utf16le);
    assertArrayEquals(str.getBytes(UTF32BE), utf32be);
    assertArrayEquals(str.getBytes(UTF32LE), utf32le);
}

From source file:Main.java

public static String removeXMLInvalidChars(String str) {
    StringBuilder sb = new StringBuilder(str.length());
    for (int c, i = 0; i < str.length(); i += Character.charCount(c)) {
        c = str.codePointAt(i);// www .j a va  2 s .  co  m
        if (!isValidXMLChar(c)) {
            c = ' ';
        }
        sb.appendCodePoint(c);
    }
    return sb.toString();
}