Example usage for java.lang Character charCount

Introduction

In this page you can find the example usage for java.lang Character charCount.

Prototype

public static int charCount(int codePoint)

Source Link

Document

Determines the number of char values needed to represent the specified character (Unicode code point).

Usage

From source file:com.microsoft.windowsazure.mobileservices.MobileServiceTableBase.java

/**
 * Validates if a given string contains a control character.
 * @param s/*from  ww  w  . j ava 2 s .c  o  m*/
 * @return
 */
protected boolean containsControlCharacter(String s) {
    boolean result = false;

    final int length = s.length();

    for (int offset = 0; offset < length;) {
        final int codepoint = s.codePointAt(offset);

        if (Character.isISOControl(codepoint)) {
            result = true;
            break;
        }

        offset += Character.charCount(codepoint);
    }

    return result;
}

From source file:org.omegat.util.StaticUtils.java

/**
 * Parse a command line string into arguments, interpreting
 * double and single quotes as Bash does.
 * @param cmd Command string// w  w  w . j ava2s  .  c  o  m
 * @return Array of arguments
 */
public static String[] parseCLICommand(String cmd) {
    cmd = cmd.trim();
    if (cmd.isEmpty()) {
        return new String[] { "" };
    }

    StringBuilder arg = new StringBuilder();
    List<String> result = new ArrayList<String>();

    final char noQuote = '\0';
    char currentQuote = noQuote;
    for (int cp, i = 0; i < cmd.length(); i += Character.charCount(cp)) {
        cp = cmd.codePointAt(i);
        if (cp == currentQuote) {
            currentQuote = noQuote;
        } else if (cp == '"' && currentQuote == noQuote) {
            currentQuote = '"';
        } else if (cp == '\'' && currentQuote == noQuote) {
            currentQuote = '\'';
        } else if (cp == '\\' && i + 1 < cmd.length()) {
            int ncp = cmd.codePointAt(cmd.offsetByCodePoints(i, 1));
            if ((currentQuote == noQuote && Character.isWhitespace(ncp))
                    || (currentQuote == '"' && ncp == '"')) {
                arg.appendCodePoint(ncp);
                i += Character.charCount(ncp);
            } else {
                arg.appendCodePoint(cp);
            }
        } else {
            if (Character.isWhitespace(cp) && currentQuote == noQuote) {
                if (arg.length() > 0) {
                    result.add(arg.toString());
                    arg = new StringBuilder();
                } else {
                    // Discard
                }
            } else {
                arg.appendCodePoint(cp);
            }
        }
    }
    // Catch last arg
    if (arg.length() > 0) {
        result.add(arg.toString());
    }
    return result.toArray(new String[result.size()]);
}

From source file:com.android.quicksearchbox.ShortcutRepositoryImplLog.java

/**
 * Given a string x, this method returns the least string y such that x is not a prefix of y.
 * This is useful to implement prefix filtering by comparison, since the only strings z that
 * have x as a prefix are such that z is greater than or equal to x and z is less than y.
 *
 * @param str A non-empty string. The contract above is not honored for an empty input string,
 *        since all strings have the empty string as a prefix.
 *///from   w w  w . j ava2s.c o m
private static String nextString(String str) {
    int len = str.length();
    if (len == 0) {
        return str;
    }
    // The last code point in the string. Within the Basic Multilingual Plane,
    // this is the same as str.charAt(len-1)
    int codePoint = str.codePointBefore(len);
    // This should be safe from overflow, since the largest code point
    // representable in UTF-16 is U+10FFFF.
    int nextCodePoint = codePoint + 1;
    // The index of the start of the last code point.
    // Character.charCount(codePoint) is always 1 (in the BMP) or 2
    int lastIndex = len - Character.charCount(codePoint);
    return new StringBuilder(len).append(str, 0, lastIndex) // append everything but the last code point
            .appendCodePoint(nextCodePoint) // instead of the last code point, use successor
            .toString();
}

From source file:com.microsoft.windowsazure.mobileservices.MobileServiceTableBase.java

/**
 * Validates if a given string contains any of the following special characters: "(U+0022),  +(U+002B), /(U+002F), ?(U+003F), \(U+005C), `(U+0060)
 * @param s//  ww w.  j ava 2s . c o  m
 * @return
 */
protected boolean containsSpecialCharacter(String s) {
    boolean result = false;

    final int length = s.length();

    final int cpQuotationMark = 0x0022;
    final int cpPlusSign = 0x002B;
    final int cpSolidus = 0x002F;
    final int cpQuestionMark = 0x003F;
    final int cpReverseSolidus = 0x005C;
    final int cpGraveAccent = 0x0060;

    for (int offset = 0; offset < length;) {
        final int codepoint = s.codePointAt(offset);

        if (codepoint == cpQuotationMark || codepoint == cpPlusSign || codepoint == cpSolidus
                || codepoint == cpQuestionMark || codepoint == cpReverseSolidus || codepoint == cpGraveAccent) {
            result = true;
            break;
        }

        offset += Character.charCount(codepoint);
    }

    return result;
}

From source file:org.cosmo.common.util.Util.java

/**
* This method ensures that the output String has only valid XML unicode characters as specified by the
* XML 1.0 standard. For reference, please see the
* standard. This method will return an empty String if the input is null or empty.
*
* @author Donoiu Cristian, GPL//w ww  .  j  a v a2 s. com
* @param  The String whose non-valid characters we want to remove.
* @return The in String, stripped of non-valid characters.
*/
public static String removeInvalidXMLCharacters(String s) {
    StringBuilder out = new StringBuilder(); // Used to hold the output.
    int codePoint; // Used to reference the current character.
    //String ss = "\ud801\udc00";                           // This is actualy one unicode character, represented by two code units!!!.
    //System.out.println(ss.codePointCount(0, ss.length()));// See: 1
    int i = 0;
    while (i < s.length()) {
        //System.out.println("i=" + i);
        codePoint = s.codePointAt(i); // This is the unicode code of the character.
        if ((codePoint == 0x9) || // Consider testing larger ranges first to improve speed.
                (codePoint == 0xA) || (codePoint == 0xD) || ((codePoint >= 0x20) && (codePoint <= 0xD7FF))
                || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD))
                || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) {
            out.append(Character.toChars(codePoint));
        }
        i += Character.charCount(codePoint); // Increment with the number of code units(java chars) needed to represent a Unicode char.
    }
    return out.toString();
}

From source file:org.pentaho.reporting.libraries.xmlns.writer.XmlWriterSupport.java

private static void writeTextNormalized(final Writer writer, final String s, final CharsetEncoder encoder,
        final boolean transformNewLine) throws IOException {

    if (s == null) {
        return;//from  w  ww . j a va  2  s.  com
    }

    final StringBuilder strB = new StringBuilder(s.length());
    for (int offset = 0; offset < s.length();) {
        final int cp = s.codePointAt(offset);

        switch (cp) {
        case 9: // \t
            strB.appendCodePoint(cp);
            break;
        case 10: // \n
            if (transformNewLine) {
                strB.append("&#10;");
                break;
            }
            strB.appendCodePoint(cp);
            break;
        case 13: // \r
            if (transformNewLine) {
                strB.append("&#13;");
                break;
            }
            strB.appendCodePoint(cp);
            break;
        case 60: // <
            strB.append("&lt;");
            break;
        case 62: // >
            strB.append("&gt;");
            break;
        case 34: // "
            strB.append("&quot;");
            break;
        case 38: // &
            strB.append("&amp;");
            break;
        case 39: // '
            strB.append("&apos;");
            break;
        default:
            if (cp >= 0x20) {
                final String cpStr = new String(new int[] { cp }, 0, 1);
                if ((encoder != null) && !encoder.canEncode(cpStr)) {
                    strB.append("&#x" + Integer.toHexString(cp));
                } else {
                    strB.appendCodePoint(cp);
                }
            }
        }

        offset += Character.charCount(cp);
    }

    writer.write(strB.toString());
}

From source file:com.flexoodb.common.FlexUtils.java

public static String removeInvalidXMLCharacters(String s) {
    StringBuilder out = new StringBuilder();

    int codePoint;
    int i = 0;/*from w  w  w. ja  v  a2 s  .  co m*/

    while (i < s.length()) {
        // This is the unicode code of the character.
        codePoint = s.codePointAt(i);
        if ((codePoint == 0x9) || (codePoint == 0xA) || (codePoint == 0xD)
                || ((codePoint >= 0x20) && (codePoint <= 0xD7FF))
                || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD))
                || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) {
            out.append(Character.toChars(codePoint));
        }
        i += Character.charCount(codePoint);
    }
    return out.toString();
}

From source file:com.flexoodb.common.FlexUtils.java

public static String removeNonASCII(String s) {
    StringBuilder out = new StringBuilder();

    int codePoint;
    int i = 0;/* w w  w. j  a v a  2 s.com*/

    while (i < s.length()) {
        // This is the unicode code of the character.
        codePoint = s.codePointAt(i);
        if (codePoint < 128) {
            out.append(Character.toChars(codePoint));
        }
        i += Character.charCount(codePoint);
    }
    return out.toString();
}

From source file:bfile.util.StringUtils.java

/**
 * <p>Capitalizes a String changing the first character to title case as
 * per {@link Character#toTitleCase(int)}. No other characters are changed.</p>
 *
 * <p>For a word based algorithm, see {@link org.apache.commons.lang3.text.WordUtils#capitalize(String)}.
 * A {@code null} input String returns {@code null}.</p>
 *
 * <pre>/*from w w  w.  j  a v a 2s  . c om*/
 * StringUtils.capitalize(null)  = null
 * StringUtils.capitalize("")    = ""
 * StringUtils.capitalize("cat") = "Cat"
 * StringUtils.capitalize("cAt") = "CAt"
 * StringUtils.capitalize("'cat'") = "'cat'"
 * </pre>
 *
 * @param str the String to capitalize, may be null
 * @return the capitalized String, {@code null} if null String input
 * @see org.apache.commons.lang3.text.WordUtils#capitalize(String)
 * @see #uncapitalize(String)
 * @since 2.0
 */
public static String capitalize(final String str) {
    int strLen;
    if (str == null || (strLen = str.length()) == 0) {
        return str;
    }

    final int firstCodepoint = str.codePointAt(0);
    final int newCodePoint = Character.toTitleCase(firstCodepoint);
    if (firstCodepoint == newCodePoint) {
        // already capitalized
        return str;
    }

    int newCodePoints[] = new int[strLen]; // cannot be longer than the char array
    int outOffset = 0;
    newCodePoints[outOffset++] = newCodePoint; // copy the first codepoint
    for (int inOffset = Character.charCount(firstCodepoint); inOffset < strLen;) {
        final int codepoint = str.codePointAt(inOffset);
        newCodePoints[outOffset++] = codepoint; // copy the remaining ones
        inOffset += Character.charCount(codepoint);
    }
    return new String(newCodePoints, 0, outOffset);
}

From source file:bfile.util.StringUtils.java

/**
 * <p>Uncapitalizes a String, changing the first character to lower case as
 * per {@link Character#toLowerCase(int)}. No other characters are changed.</p>
 *
 * <p>For a word based algorithm, see {@link org.apache.commons.lang3.text.WordUtils#uncapitalize(String)}.
 * A {@code null} input String returns {@code null}.</p>
 *
 * <pre>//ww w  .  ja v  a  2  s .c  om
 * StringUtils.uncapitalize(null)  = null
 * StringUtils.uncapitalize("")    = ""
 * StringUtils.uncapitalize("cat") = "cat"
 * StringUtils.uncapitalize("Cat") = "cat"
 * StringUtils.uncapitalize("CAT") = "cAT"
 * </pre>
 *
 * @param str the String to uncapitalize, may be null
 * @return the uncapitalized String, {@code null} if null String input
 * @see org.apache.commons.lang3.text.WordUtils#uncapitalize(String)
 * @see #capitalize(String)
 * @since 2.0
 */
public static String uncapitalize(final String str) {
    int strLen;
    if (str == null || (strLen = str.length()) == 0) {
        return str;
    }

    final int firstCodepoint = str.codePointAt(0);
    final int newCodePoint = Character.toLowerCase(firstCodepoint);
    if (firstCodepoint == newCodePoint) {
        // already capitalized
        return str;
    }

    int newCodePoints[] = new int[strLen]; // cannot be longer than the char array
    int outOffset = 0;
    newCodePoints[outOffset++] = newCodePoint; // copy the first codepoint
    for (int inOffset = Character.charCount(firstCodepoint); inOffset < strLen;) {
        final int codepoint = str.codePointAt(inOffset);
        newCodePoints[outOffset++] = codepoint; // copy the remaining ones
        inOffset += Character.charCount(codepoint);
    }
    return new String(newCodePoints, 0, outOffset);
}