List of usage examples for java.lang Character isHighSurrogate
public static boolean isHighSurrogate(char ch)
From source file:Main.java
public static void main(String[] args) { for (char ch = Character.MIN_VALUE; ch < Character.MAX_VALUE; ch++) { if (Character.isHighSurrogate(ch)) { String s = String.format("\\u%04x", (int) ch); System.out.println(s); }// w w w . ja v a 2 s . c o m } }
From source file:Main.java
public static long bytesRequiredToEncode(final String s, final Charset encoding) { int ENCODE_CHUNK = 100; long count = 0; for (int i = 0; i < s.length();) { int end = i + ENCODE_CHUNK; if (end >= s.length()) { end = s.length();// w ww. j a v a2 s . c om } else if (Character.isHighSurrogate(s.charAt(end))) { end++; } count += encoding.encode(s.substring(i, end)).remaining() + 1; i = end; } return count; }
From source file:Main.java
/** * Replaces instances of Emoji unicode characters with their Emoji-Cheat sheet key * * @param s/*from w w w . ja v a 2s. c o m*/ * @return */ public static String replaceUnicodeEmojis(String s) { if (TextUtils.isEmpty(s)) { return ""; } for (int i = 0; i < s.length(); i++) { String key = s.substring(i, i + 1); if ((Character.isLowSurrogate(key.charAt(0)) || Character.isHighSurrogate(key.charAt(0))) && s.length() > i + 1) { key = s.substring(i, i + 2); } String emoji = UNICODE_TO_CHEAT_SHEET.get(key); if (null != emoji) { s = s.replace(key, emoji); } } return s; }
From source file:Main.java
protected static int testEscape(char[] chars, char[] encodings) { int index = 0; int length = chars.length; while (index < length) { char c1 = chars[index++]; if (Character.isHighSurrogate(c1)) { if (index < length) { char c2 = chars[index++]; if (Character.isLowSurrogate(c2)) { int cp = Character.toCodePoint(c1, c2); if (isValidCodePoint(cp)) { continue; }/*w ww . j av a 2 s.c o m*/ } return index - 2; } return index - 1; } else { if (isValidCodePoint(c1)) { if (encodings != null) { for (char ch : encodings) { if (c1 == ch) { return index - 1; } } } continue; } return index - 1; } } return length; }
From source file:Main.java
public static String $$truncate(String str, int maxLen, boolean doAddEllipsis) { if (str.length() <= maxLen) { return str; // no need to truncate }/* w ww. j av a2s. c o m*/ // If doAddEllipsis, either reduce maxLen to compensate, or else if maxLen is too small, just // turn off doAddEllipsis. if (doAddEllipsis) { if (maxLen > 3) { maxLen -= 3; } else { doAddEllipsis = false; } } // Make sure truncating at maxLen doesn't cut up a unicode surrogate pair. if (Character.isHighSurrogate(str.charAt(maxLen - 1)) && Character.isLowSurrogate(str.charAt(maxLen))) { maxLen -= 1; } // Truncate. str = str.substring(0, maxLen); // Add ellipsis. if (doAddEllipsis) { str += "..."; } return str; }
From source file:Main.java
/** * This method ensures that the output String has only valid XML unicode * characters as specified by the XML 1.0 standard. For reference, please * see/*from w w w. j a v a 2s. co m*/ * <a href="http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char">the * standard</a>. This method will return an empty String if the input is * null or empty. * * @param in The String whose non-valid characters we want to remove. * @return The in String, stripped of non-valid characters. */ public static String cleanInvalidXmlChars(String text) { if (null == text || text.isEmpty()) { return text; } final int len = text.length(); char current = 0; int codePoint = 0; StringBuilder sb = new StringBuilder(); for (int i = 0; i < len; i++) { current = text.charAt(i); boolean surrogate = false; if (Character.isHighSurrogate(current) && i + 1 < len && Character.isLowSurrogate(text.charAt(i + 1))) { surrogate = true; codePoint = text.codePointAt(i++); } else { codePoint = current; } if ((codePoint == 0x9) || (codePoint == 0xA) || (codePoint == 0xD) || ((codePoint >= 0x20) && (codePoint <= 0xD7FF)) || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD)) || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) { sb.append(current); if (surrogate) { sb.append(text.charAt(i)); } } else { // // Invalid Char at index transformed into hex //System.err.println("Index=["+ i +"] Char=["+ String.format("%04x", (int)text.charAt(i)) +"] CodePoint=[" + codePoint + "]"); //sb.append("hex"+String.format("%04x", (int)text.charAt(i))); } } return sb.toString(); }
From source file:Strings.java
/** * Returns {@code true} if the specified character sequence is a * valid sequence of UTF-16 {@code char} values. A sequence is * legal if each high surrogate {@code char} value is followed by * a low surrogate value (as defined by {@link * Character#isHighSurrogate(char)} and {@link * Character#isLowSurrogate(char)})./*from w ww . java 2 s .c o m*/ * * <p>This method does <b>not</b> check to see if the sequence of * code points defined by the UTF-16 consists only of code points * defined in the latest Unicode standard. The method only tests * the validity of the UTF-16 encoding sequence. * * @param cs Character sequence to test. * @return {@code true} if the sequence of characters is * legal in UTF-16. */ public static boolean isLegalUtf16(CharSequence cs) { for (int i = 0; i < cs.length(); ++i) { char high = cs.charAt(i); if (Character.isLowSurrogate(high)) return false; if (!Character.isHighSurrogate(high)) continue; ++i; if (i >= cs.length()) return false; char low = cs.charAt(i); if (!Character.isLowSurrogate(low)) return false; int codePoint = Character.toCodePoint(high, low); if (!Character.isValidCodePoint(codePoint)) return false; } return true; }
From source file:de.fau.cs.osr.utils.StringUtils.java
public static String escHtml(String text, boolean forAttribute) { // StringEscapeUtils.escapeHtml(in) does not escape '\'' but a lot of // other stuff that doesn't need escaping. if (text == null) return ""; int n = text.length(); StringBuilder sb = new StringBuilder(n * 4 / 3); for (int i = 0; i < n; i++) { char ch = text.charAt(i); switch (ch) { case ' ': case '\n': case '\t': sb.append(ch);// w w w . j av a 2 s . c om break; case '<': sb.append("<"); break; case '>': sb.append(forAttribute ? ">" : ">"); break; case '&': sb.append("&"); break; case '\'': // ' cannot safely be used, see wikipedia sb.append("'"); break; case '"': sb.append(forAttribute ? """ : "\""); break; default: if ((ch >= 0 && ch < 0x20) || (ch == 0xFE)) { hexCharRef(sb, ch); break; } else if (Character.isHighSurrogate(ch)) { ++i; if (i < n) { char ch2 = text.charAt(i); if (Character.isLowSurrogate(ch2)) { int codePoint = Character.toCodePoint(ch, ch2); switch (Character.getType(codePoint)) { case Character.CONTROL: case Character.PRIVATE_USE: case Character.UNASSIGNED: hexCharRef(sb, codePoint); break; default: sb.append(ch); sb.append(ch2); break; } continue; } } } else if (!Character.isLowSurrogate(ch)) { sb.append(ch); continue; } // No low surrogate followed or only low surrogate throw new IllegalArgumentException("String contains isolated surrogates!"); } } return sb.toString(); }
From source file:com.microsoft.windowsazure.mobileservices.zumoe2etestapp.framework.Util.java
public static String createSimpleRandomString(Random rndGen, int size, int minChar, int maxChar) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < size; i++) { int charRand; char c;/*from ww w. j a v a 2 s . c o m*/ do { charRand = rndGen.nextInt(maxChar - minChar); c = (char) (minChar + charRand); } while (Character.isLowSurrogate(c) || Character.isHighSurrogate(c)); sb.append(c); } return sb.toString(); }
From source file:it.geosolutions.httpproxy.utils.Utils.java
/** * @param ch/*from w w w . j a v a 2s. com*/ * @return */ final static int escapeHtmlFull(int ch) { if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= '0' && ch <= '9') { // safe return ch; } else if (Character.isWhitespace(ch)) { if (ch != '\n' && ch != '\r' && ch != '\t') // safe return ch; } else if (Character.isDefined(ch)) { // safe return ch; } else if (Character.isISOControl(ch)) { // paranoid version:isISOControl which are not isWhitespace // removed ! // do nothing do not include in output ! return -1; } else if (Character.isHighSurrogate((char) ch)) { // do nothing do not include in output ! return -1; } else if (Character.isLowSurrogate((char) ch)) { // wrong char[] sequence, //TODO: LOG !!! return -1; } return -1; }