List of usage examples for java.lang Character charCount
public static int charCount(int codePoint)
From source file:Main.java
/** * Similar to String.contains() with two main differences: * <p>//ww w .j a v a 2 s. c om * 1) Only searches token prefixes. A token is defined as any combination of letters or * numbers. * <p> * 2) Returns the starting index where the substring is found. * * @param value The string to search. * @param substring The substring to look for. * @return The starting index where the substring is found. {@literal -1} if substring is not * found in value. */ @VisibleForTesting static int contains(String value, String substring) { if (value.length() < substring.length()) { return -1; } // i18n support // Generate the code points for the substring once. // There will be a maximum of substring.length code points. But may be fewer. // Since the array length is not an accurate size, we need to keep a separate variable. final int[] substringCodePoints = new int[substring.length()]; int substringLength = 0; // may not equal substring.length()!! for (int i = 0; i < substring.length();) { final int codePoint = Character.codePointAt(substring, i); substringCodePoints[substringLength] = codePoint; substringLength++; i += Character.charCount(codePoint); } for (int i = 0; i < value.length(); i = findNextTokenStart(value, i)) { int numMatch = 0; for (int j = i; j < value.length() && numMatch < substringLength; ++numMatch) { int valueCp = Character.toLowerCase(value.codePointAt(j)); int substringCp = substringCodePoints[numMatch]; if (valueCp != substringCp) { break; } j += Character.charCount(valueCp); } if (numMatch == substringLength) { return i; } } return -1; }
From source file:Main.java
/** * Determines if a character sequence is a QName. * <p>// ww w .j a va 2s . c om * A QName is either: * <ul> * <li>an NCName (LocalName), or</li> * <li>an NCName followed by a colon and by another NCName * (PrefixName:LocalName)</li> * </ul> * * Source: http://www.w3.org/TR/xml-names/#NT-QName * * @param s * The character sequence to test. * @return Returns <code>true</code> if the character sequence * <code>cs</code> is a QName, or <code>false</code> otherwise. */ public static boolean isQName(CharSequence s) { if (isEmpty(s)) { return false; } boolean foundColon = false; boolean inNCName = false; for (int i = 0; i < s.length();) { int c = Character.codePointAt(s, i); if (c == ':') { //$NON-NLS-1$ if (foundColon) { return false; } foundColon = true; if (!inNCName) { return false; } inNCName = false; } else { if (!inNCName) { if (!isXmlNameStartChar(c)) { return false; } inNCName = true; } else { if (!isXmlNameChar(c)) { return false; } } } i += Character.charCount(c); } return true; }
From source file:Main.java
/** * Mangle a string so that it can be represented in an XML document. * /*from w w w . j ava 2 s . c o m*/ * There are three kinds of code points in XML: * - Those that can be represented normally, * - Those that have to be escaped (for example, & must be represented * as &) * - Those that cannot be represented at all in XML. * * The built-in SAX functions will handle the first two types for us just * fine. However, sometimes we come across a code point of the third type. * In this case, we have to mangle the string in order to represent it at * all. We also mangle backslash to avoid confusing a backslash in the * string with part our escape sequence. * * The encoding used here is as follows: an illegal code point is * represented as '\ABCD;', where ABCD is the hexadecimal value of * the code point. * * @param str The input string. * * @return The mangled string. */ public static String mangleXmlString(String str, boolean createEntityRefs) { final StringBuilder bld = new StringBuilder(); final int length = str.length(); for (int offset = 0; offset < length;) { final int cp = str.codePointAt(offset); final int len = Character.charCount(cp); if (codePointMustBeMangled(cp)) { bld.append(mangleCodePoint(cp)); } else { String entityRef = null; if (createEntityRefs) { entityRef = codePointToEntityRef(cp); } if (entityRef != null) { bld.append(entityRef); } else { for (int i = 0; i < len; i++) { bld.append(str.charAt(offset + i)); } } } offset += len; } return bld.toString(); }
From source file:Main.java
/** * Determines if a character sequence is an NCName (Non-Colonised Name). An * NCName is a string which starts with an NCName start character and is * followed by zero or more NCName characters. * /*from ww w . j av a 2 s. com*/ * @param s * The character sequence to be tested. * @return {@code true} if {@code s} is an NCName, otherwise {@code false}. */ public static boolean isNCName(@Nullable CharSequence s) { if (isNullOrEmpty(s)) { return false; } assert s != null; int firstCodePoint = Character.codePointAt(s, 0); if (!isNCNameStartChar(firstCodePoint)) { return false; } for (int i = Character.charCount(firstCodePoint); i < s.length();) { int codePoint = Character.codePointAt(s, i); if (!isNCNameChar(codePoint)) { return false; } i += Character.charCount(codePoint); } return true; }
From source file:org.mariotaku.twidere.util.CodePointArray.java
public CodePointArray(@NonNull final CharSequence cs) { final int inputLength = cs.length(); codePoints = new int[inputLength]; int codePointsLength = 0; for (int offset = 0; offset < inputLength;) { final int codePoint = Character.codePointAt(cs, offset); codePoints[codePointsLength++] = codePoint; offset += Character.charCount(codePoint); }//from w ww.j a v a 2s . c o m this.length = codePointsLength; }
From source file:com.evolveum.midpoint.common.Utils.java
/** * Removing non-printable UTF characters from the string. * //from w w w.j ava2 s . co m * This is not really used now. It was done as a kind of prototype for * filters. But may come handy and it in fact tests that the pattern is * doing what expected, so it may be useful. * * @param bad * string with bad chars * @return string without bad chars */ public static String cleanupUtf(String bad) { StringBuilder sb = new StringBuilder(bad.length()); for (int cp, i = 0; i < bad.length(); i += Character.charCount(cp)) { cp = bad.codePointAt(i); if (isValidXmlCodepoint(cp)) { sb.append(Character.toChars(cp)); } } return sb.toString(); }
From source file:Main.java
/** * Escapes a character sequence so that it is valid XML. * // www . j av a 2 s .com * @param s * The character sequence. * @return The escaped version of the character sequence. */ public static String escapeXML(CharSequence s) { // double quote -- quot // ampersand -- amp // less than -- lt // greater than -- gt // apostrophe -- apos StringBuilder sb = new StringBuilder(s.length() * 2); for (int i = 0; i < s.length();) { int codePoint = Character.codePointAt(s, i); if (codePoint == '<') { sb.append(LT); } else if (codePoint == '>') { sb.append(GT); } else if (codePoint == '\"') { sb.append(QUOT); } else if (codePoint == '&') { sb.append(AMP); } else if (codePoint == '\'') { sb.append(APOS); } else { sb.appendCodePoint(codePoint); } i += Character.charCount(codePoint); } return sb.toString(); }
From source file:org.mariotaku.twidere.util.CodePointArray.java
public int indexOfText(int codePoint, int start) { int index = 0; for (int i = 0; i < length; i++) { final int current = codePoints[i]; if (current == codePoint && i >= start) return index; index += Character.charCount(current); }//from w w w. j a va 2 s .c om return -1; }
From source file:Main.java
/** * Determines if a character sequence is a QName. A QName is either an * NCName (LocalName), or an NCName followed by a colon followed by another * NCName (where the first NCName is referred to as the 'Prefix Name' and * the second NCName is referred to as the 'Local Name' - i.e. * PrefixName:LocalName).// w ww .j av a2 s .c o m * * @param s * The character sequence to be tested. * @return {@code true} if {@code s} is a QName, otherwise {@code false}. */ public static boolean isQName(CharSequence s) { if (isNullOrEmpty(s)) { return false; } boolean foundColon = false; boolean inNCName = false; for (int i = 0; i < s.length();) { int codePoint = Character.codePointAt(s, i); if (codePoint == ':') { if (foundColon) { return false; } foundColon = true; if (!inNCName) { return false; } inNCName = false; } else { if (!inNCName) { if (!isXMLNameStartCharacter(codePoint)) { return false; } inNCName = true; } else { if (!isXMLNameChar(codePoint)) { return false; } } } i += Character.charCount(codePoint); } return true; }
From source file:jp.furplag.util.commons.StringUtils.java
/** * return the Array of Unicode code points in the string. * * @param str the string, may be null./* w w w.j a v a2s . co m*/ * @return Array of codepoints. */ public static int[] getCodePoints(final String str) { char[] chars = defaultString(str).toCharArray(); int[] ret = new int[Character.codePointCount(chars, 0, chars.length)]; int index = 0; for (int i = 0, codePoint; i < chars.length; i += Character.charCount(codePoint)) { codePoint = Character.codePointAt(chars, i); ret[index++] = codePoint; } return ret; }