List of usage examples for java.lang Character charCount
public static int charCount(int codePoint)
From source file:com.microsoft.windowsazure.mobileservices.MobileServiceTableBase.java
/** * Validates if a given string contains a control character. * @param s/*from ww w . j ava 2 s .c o m*/ * @return */ protected boolean containsControlCharacter(String s) { boolean result = false; final int length = s.length(); for (int offset = 0; offset < length;) { final int codepoint = s.codePointAt(offset); if (Character.isISOControl(codepoint)) { result = true; break; } offset += Character.charCount(codepoint); } return result; }
From source file:org.omegat.util.StaticUtils.java
/** * Parse a command line string into arguments, interpreting * double and single quotes as Bash does. * @param cmd Command string// w w w . j ava2s . c o m * @return Array of arguments */ public static String[] parseCLICommand(String cmd) { cmd = cmd.trim(); if (cmd.isEmpty()) { return new String[] { "" }; } StringBuilder arg = new StringBuilder(); List<String> result = new ArrayList<String>(); final char noQuote = '\0'; char currentQuote = noQuote; for (int cp, i = 0; i < cmd.length(); i += Character.charCount(cp)) { cp = cmd.codePointAt(i); if (cp == currentQuote) { currentQuote = noQuote; } else if (cp == '"' && currentQuote == noQuote) { currentQuote = '"'; } else if (cp == '\'' && currentQuote == noQuote) { currentQuote = '\''; } else if (cp == '\\' && i + 1 < cmd.length()) { int ncp = cmd.codePointAt(cmd.offsetByCodePoints(i, 1)); if ((currentQuote == noQuote && Character.isWhitespace(ncp)) || (currentQuote == '"' && ncp == '"')) { arg.appendCodePoint(ncp); i += Character.charCount(ncp); } else { arg.appendCodePoint(cp); } } else { if (Character.isWhitespace(cp) && currentQuote == noQuote) { if (arg.length() > 0) { result.add(arg.toString()); arg = new StringBuilder(); } else { // Discard } } else { arg.appendCodePoint(cp); } } } // Catch last arg if (arg.length() > 0) { result.add(arg.toString()); } return result.toArray(new String[result.size()]); }
From source file:com.android.quicksearchbox.ShortcutRepositoryImplLog.java
/** * Given a string x, this method returns the least string y such that x is not a prefix of y. * This is useful to implement prefix filtering by comparison, since the only strings z that * have x as a prefix are such that z is greater than or equal to x and z is less than y. * * @param str A non-empty string. The contract above is not honored for an empty input string, * since all strings have the empty string as a prefix. *///from w w w . j ava2s.c o m private static String nextString(String str) { int len = str.length(); if (len == 0) { return str; } // The last code point in the string. Within the Basic Multilingual Plane, // this is the same as str.charAt(len-1) int codePoint = str.codePointBefore(len); // This should be safe from overflow, since the largest code point // representable in UTF-16 is U+10FFFF. int nextCodePoint = codePoint + 1; // The index of the start of the last code point. // Character.charCount(codePoint) is always 1 (in the BMP) or 2 int lastIndex = len - Character.charCount(codePoint); return new StringBuilder(len).append(str, 0, lastIndex) // append everything but the last code point .appendCodePoint(nextCodePoint) // instead of the last code point, use successor .toString(); }
From source file:com.microsoft.windowsazure.mobileservices.MobileServiceTableBase.java
/** * Validates if a given string contains any of the following special characters: "(U+0022), +(U+002B), /(U+002F), ?(U+003F), \(U+005C), `(U+0060) * @param s// ww w. j ava 2s . c o m * @return */ protected boolean containsSpecialCharacter(String s) { boolean result = false; final int length = s.length(); final int cpQuotationMark = 0x0022; final int cpPlusSign = 0x002B; final int cpSolidus = 0x002F; final int cpQuestionMark = 0x003F; final int cpReverseSolidus = 0x005C; final int cpGraveAccent = 0x0060; for (int offset = 0; offset < length;) { final int codepoint = s.codePointAt(offset); if (codepoint == cpQuotationMark || codepoint == cpPlusSign || codepoint == cpSolidus || codepoint == cpQuestionMark || codepoint == cpReverseSolidus || codepoint == cpGraveAccent) { result = true; break; } offset += Character.charCount(codepoint); } return result; }
From source file:org.cosmo.common.util.Util.java
/** * This method ensures that the output String has only valid XML unicode characters as specified by the * XML 1.0 standard. For reference, please see the * standard. This method will return an empty String if the input is null or empty. * * @author Donoiu Cristian, GPL//w ww . j a v a2 s. com * @param The String whose non-valid characters we want to remove. * @return The in String, stripped of non-valid characters. */ public static String removeInvalidXMLCharacters(String s) { StringBuilder out = new StringBuilder(); // Used to hold the output. int codePoint; // Used to reference the current character. //String ss = "\ud801\udc00"; // This is actualy one unicode character, represented by two code units!!!. //System.out.println(ss.codePointCount(0, ss.length()));// See: 1 int i = 0; while (i < s.length()) { //System.out.println("i=" + i); codePoint = s.codePointAt(i); // This is the unicode code of the character. if ((codePoint == 0x9) || // Consider testing larger ranges first to improve speed. (codePoint == 0xA) || (codePoint == 0xD) || ((codePoint >= 0x20) && (codePoint <= 0xD7FF)) || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD)) || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) { out.append(Character.toChars(codePoint)); } i += Character.charCount(codePoint); // Increment with the number of code units(java chars) needed to represent a Unicode char. } return out.toString(); }
From source file:org.pentaho.reporting.libraries.xmlns.writer.XmlWriterSupport.java
private static void writeTextNormalized(final Writer writer, final String s, final CharsetEncoder encoder, final boolean transformNewLine) throws IOException { if (s == null) { return;//from w ww . j a va 2 s. com } final StringBuilder strB = new StringBuilder(s.length()); for (int offset = 0; offset < s.length();) { final int cp = s.codePointAt(offset); switch (cp) { case 9: // \t strB.appendCodePoint(cp); break; case 10: // \n if (transformNewLine) { strB.append(" "); break; } strB.appendCodePoint(cp); break; case 13: // \r if (transformNewLine) { strB.append(" "); break; } strB.appendCodePoint(cp); break; case 60: // < strB.append("<"); break; case 62: // > strB.append(">"); break; case 34: // " strB.append("""); break; case 38: // & strB.append("&"); break; case 39: // ' strB.append("'"); break; default: if (cp >= 0x20) { final String cpStr = new String(new int[] { cp }, 0, 1); if ((encoder != null) && !encoder.canEncode(cpStr)) { strB.append("&#x" + Integer.toHexString(cp)); } else { strB.appendCodePoint(cp); } } } offset += Character.charCount(cp); } writer.write(strB.toString()); }
From source file:com.flexoodb.common.FlexUtils.java
public static String removeInvalidXMLCharacters(String s) { StringBuilder out = new StringBuilder(); int codePoint; int i = 0;/*from w w w. ja v a2 s . co m*/ while (i < s.length()) { // This is the unicode code of the character. codePoint = s.codePointAt(i); if ((codePoint == 0x9) || (codePoint == 0xA) || (codePoint == 0xD) || ((codePoint >= 0x20) && (codePoint <= 0xD7FF)) || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD)) || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) { out.append(Character.toChars(codePoint)); } i += Character.charCount(codePoint); } return out.toString(); }
From source file:com.flexoodb.common.FlexUtils.java
public static String removeNonASCII(String s) { StringBuilder out = new StringBuilder(); int codePoint; int i = 0;/* w w w. j a v a 2 s.com*/ while (i < s.length()) { // This is the unicode code of the character. codePoint = s.codePointAt(i); if (codePoint < 128) { out.append(Character.toChars(codePoint)); } i += Character.charCount(codePoint); } return out.toString(); }
From source file:bfile.util.StringUtils.java
/** * <p>Capitalizes a String changing the first character to title case as * per {@link Character#toTitleCase(int)}. No other characters are changed.</p> * * <p>For a word based algorithm, see {@link org.apache.commons.lang3.text.WordUtils#capitalize(String)}. * A {@code null} input String returns {@code null}.</p> * * <pre>/*from w w w. j a v a 2s . c om*/ * StringUtils.capitalize(null) = null * StringUtils.capitalize("") = "" * StringUtils.capitalize("cat") = "Cat" * StringUtils.capitalize("cAt") = "CAt" * StringUtils.capitalize("'cat'") = "'cat'" * </pre> * * @param str the String to capitalize, may be null * @return the capitalized String, {@code null} if null String input * @see org.apache.commons.lang3.text.WordUtils#capitalize(String) * @see #uncapitalize(String) * @since 2.0 */ public static String capitalize(final String str) { int strLen; if (str == null || (strLen = str.length()) == 0) { return str; } final int firstCodepoint = str.codePointAt(0); final int newCodePoint = Character.toTitleCase(firstCodepoint); if (firstCodepoint == newCodePoint) { // already capitalized return str; } int newCodePoints[] = new int[strLen]; // cannot be longer than the char array int outOffset = 0; newCodePoints[outOffset++] = newCodePoint; // copy the first codepoint for (int inOffset = Character.charCount(firstCodepoint); inOffset < strLen;) { final int codepoint = str.codePointAt(inOffset); newCodePoints[outOffset++] = codepoint; // copy the remaining ones inOffset += Character.charCount(codepoint); } return new String(newCodePoints, 0, outOffset); }
From source file:bfile.util.StringUtils.java
/** * <p>Uncapitalizes a String, changing the first character to lower case as * per {@link Character#toLowerCase(int)}. No other characters are changed.</p> * * <p>For a word based algorithm, see {@link org.apache.commons.lang3.text.WordUtils#uncapitalize(String)}. * A {@code null} input String returns {@code null}.</p> * * <pre>//ww w . ja v a 2 s .c om * StringUtils.uncapitalize(null) = null * StringUtils.uncapitalize("") = "" * StringUtils.uncapitalize("cat") = "cat" * StringUtils.uncapitalize("Cat") = "cat" * StringUtils.uncapitalize("CAT") = "cAT" * </pre> * * @param str the String to uncapitalize, may be null * @return the uncapitalized String, {@code null} if null String input * @see org.apache.commons.lang3.text.WordUtils#uncapitalize(String) * @see #capitalize(String) * @since 2.0 */ public static String uncapitalize(final String str) { int strLen; if (str == null || (strLen = str.length()) == 0) { return str; } final int firstCodepoint = str.codePointAt(0); final int newCodePoint = Character.toLowerCase(firstCodepoint); if (firstCodepoint == newCodePoint) { // already capitalized return str; } int newCodePoints[] = new int[strLen]; // cannot be longer than the char array int outOffset = 0; newCodePoints[outOffset++] = newCodePoint; // copy the first codepoint for (int inOffset = Character.charCount(firstCodepoint); inOffset < strLen;) { final int codepoint = str.codePointAt(inOffset); newCodePoints[outOffset++] = codepoint; // copy the remaining ones inOffset += Character.charCount(codepoint); } return new String(newCodePoints, 0, outOffset); }