List of usage examples for java.lang Character toCodePoint
public static int toCodePoint(char high, char low)
From source file:Main.java
public static void main(String[] args) { char ch1 = '\ud800', ch2 = '\udc00'; int cp = Character.toCodePoint(ch1, ch2); String str = "Supplementary code point value is " + cp; System.out.println(str);/*from ww w . jav a2s . c om*/ }
From source file:Main.java
protected static int testEscape(char[] chars, char[] encodings) { int index = 0; int length = chars.length; while (index < length) { char c1 = chars[index++]; if (Character.isHighSurrogate(c1)) { if (index < length) { char c2 = chars[index++]; if (Character.isLowSurrogate(c2)) { int cp = Character.toCodePoint(c1, c2); if (isValidCodePoint(cp)) { continue; }//from w w w . j av a2s. c om } return index - 2; } return index - 1; } else { if (isValidCodePoint(c1)) { if (encodings != null) { for (char ch : encodings) { if (c1 == ch) { return index - 1; } } } continue; } return index - 1; } } return length; }
From source file:Strings.java
/** * Returns {@code true} if the specified character sequence is a * valid sequence of UTF-16 {@code char} values. A sequence is * legal if each high surrogate {@code char} value is followed by * a low surrogate value (as defined by {@link * Character#isHighSurrogate(char)} and {@link * Character#isLowSurrogate(char)}).//w w w . ja v a 2s .c om * * <p>This method does <b>not</b> check to see if the sequence of * code points defined by the UTF-16 consists only of code points * defined in the latest Unicode standard. The method only tests * the validity of the UTF-16 encoding sequence. * * @param cs Character sequence to test. * @return {@code true} if the sequence of characters is * legal in UTF-16. */ public static boolean isLegalUtf16(CharSequence cs) { for (int i = 0; i < cs.length(); ++i) { char high = cs.charAt(i); if (Character.isLowSurrogate(high)) return false; if (!Character.isHighSurrogate(high)) continue; ++i; if (i >= cs.length()) return false; char low = cs.charAt(i); if (!Character.isLowSurrogate(low)) return false; int codePoint = Character.toCodePoint(high, low); if (!Character.isValidCodePoint(codePoint)) return false; } return true; }
From source file:de.fau.cs.osr.utils.StringUtils.java
public static String escHtml(String text, boolean forAttribute) { // StringEscapeUtils.escapeHtml(in) does not escape '\'' but a lot of // other stuff that doesn't need escaping. if (text == null) return ""; int n = text.length(); StringBuilder sb = new StringBuilder(n * 4 / 3); for (int i = 0; i < n; i++) { char ch = text.charAt(i); switch (ch) { case ' ': case '\n': case '\t': sb.append(ch);/* ww w .j ava2s . c o m*/ break; case '<': sb.append("<"); break; case '>': sb.append(forAttribute ? ">" : ">"); break; case '&': sb.append("&"); break; case '\'': // ' cannot safely be used, see wikipedia sb.append("'"); break; case '"': sb.append(forAttribute ? """ : "\""); break; default: if ((ch >= 0 && ch < 0x20) || (ch == 0xFE)) { hexCharRef(sb, ch); break; } else if (Character.isHighSurrogate(ch)) { ++i; if (i < n) { char ch2 = text.charAt(i); if (Character.isLowSurrogate(ch2)) { int codePoint = Character.toCodePoint(ch, ch2); switch (Character.getType(codePoint)) { case Character.CONTROL: case Character.PRIVATE_USE: case Character.UNASSIGNED: hexCharRef(sb, codePoint); break; default: sb.append(ch); sb.append(ch2); break; } continue; } } } else if (!Character.isLowSurrogate(ch)) { sb.append(ch); continue; } // No low surrogate followed or only low surrogate throw new IllegalArgumentException("String contains isolated surrogates!"); } } return sb.toString(); }
From source file:mobisocial.musubi.ui.util.EmojiSpannableFactory.java
public void updateSpannable(Spannable span) { Spannable source = span;/*from w w w. j av a 2 s . c o m*/ for (int i = 0; i < source.length(); i++) { char high = source.charAt(i); if (high <= 127) { // fast exit ascii continue; } // Block until we're initialized waitForEmoji(); long codePoint = high; if (Character.isHighSurrogate(high)) { char low = source.charAt(++i); codePoint = Character.toCodePoint(high, low); if (Character.isSurrogatePair(high, low)) { // from BMP if (!mEmojiMap.containsKey(codePoint)) { if (i >= source.length() - 2) { continue; } high = source.charAt(++i); if (!Character.isHighSurrogate(high)) { Log.w(TAG, "bad unicode character? " + high); continue; } low = source.charAt(++i); if (!Character.isSurrogatePair(high, low)) { Log.d(TAG, "Bogus unicode surrogate " + high + ", " + low); continue; } int codePoint2 = Character.toCodePoint(high, low); //String label = String.format("U+%X U+%X", codePoint, codePoint2); codePoint = ((long) codePoint << 16) | codePoint2; } } else { Log.d(TAG, "Bogus unicode"); } } if (mEmojiMap.containsKey(codePoint)) { Bitmap b = mStickerCache.get(codePoint); if (b != null) { DynamicDrawableSpan im = createStickerSpan(b); span.setSpan(im, i, i + 1, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE); } else { Log.d(TAG, "failed to decode bitmap for codepoints: " + codePoint); } } } }
From source file:net.sourceforge.pmd.util.StringUtil.java
/** * @param buf/*from w w w . j a v a 2 s . c om*/ * @param src * @param supportUTF8 override the default setting, whether special characters should be replaced with entities ( * <code>false</code>) or should be included as is ( <code>true</code>). * */ public static void appendXmlEscaped(StringBuilder buf, String src, boolean supportUTF8) { char c; int i = 0; while (i < src.length()) { c = src.charAt(i++); if (c > '~') { // 126 if (!supportUTF8) { int codepoint = c; // surrogate characters are not allowed in XML if (Character.isHighSurrogate(c)) { char low = src.charAt(i++); codepoint = Character.toCodePoint(c, low); } buf.append("&#x").append(Integer.toHexString(codepoint)).append(';'); } else { buf.append(c); } } else if (c == '&') { buf.append("&"); } else if (c == '"') { buf.append("""); } else if (c == '<') { buf.append("<"); } else if (c == '>') { buf.append(">"); } else { buf.append(c); } } }
From source file:se.sawano.java.security.otp.google.keyuri.UnicodeEscaper.java
/** * Returns the Unicode code point of the character at the given index. * * <p>Unlike {@link Character#codePointAt(CharSequence, int)} or {@link String#codePointAt(int)} * this method will never fail silently when encountering an invalid surrogate pair. * * <p>The behaviour of this method is as follows: * <ol>// ww w. j a va 2s. c om * <li>If {@code index >= end}, {@link IndexOutOfBoundsException} is thrown. * <li><b>If the character at the specified index is not a surrogate, it is returned.</b> * <li>If the first character was a high surrogate value, then an attempt is made to read the next * character. * <ol> * <li><b>If the end of the sequence was reached, the negated value of the trailing high * surrogate is returned.</b> * <li><b>If the next character was a valid low surrogate, the code point value of the * high/low surrogate pair is returned.</b> * <li>If the next character was not a low surrogate value, then {@link * IllegalArgumentException} is thrown. * </ol> * <li>If the first character was a low surrogate value, {@link IllegalArgumentException} is * thrown. * </ol> * * @param seq * the sequence of characters from which to decode the code point * @param index * the index of the first character to decode * @param end * the index beyond the last valid character to decode * * @return the Unicode code point for the given index or the negated value of the trailing high surrogate character at the end of the sequence */ protected static int codePointAt(CharSequence seq, int index, int end) { notNull(seq); if (index < end) { char c1 = seq.charAt(index++); if (c1 < Character.MIN_HIGH_SURROGATE || c1 > Character.MAX_LOW_SURROGATE) { // Fast path (first test is probably all we need to do) return c1; } else if (c1 <= Character.MAX_HIGH_SURROGATE) { // If the high surrogate was the last character, return its inverse if (index == end) { return -c1; } // Otherwise look for the low surrogate following it char c2 = seq.charAt(index); if (Character.isLowSurrogate(c2)) { return Character.toCodePoint(c1, c2); } throw new IllegalArgumentException("Expected low surrogate but got char '" + c2 + "' with value " + (int) c2 + " at index " + index + " in '" + seq + "'"); } else { throw new IllegalArgumentException("Unexpected low surrogate character '" + c1 + "' with value " + (int) c1 + " at index " + (index - 1) + " in '" + seq + "'"); } } throw new IndexOutOfBoundsException("Index exceeds specified range"); }
From source file:org.eclipse.rdf4j.rio.ntriples.NTriplesParser.java
/** * Reads the next Unicode code point.//from w ww .j a va 2s . co m * * @return the next Unicode code point, or -1 if the end of the stream has been reached. * @throws IOException */ protected int readCodePoint() throws IOException { int next = reader.read(); if (Character.isHighSurrogate((char) next)) { next = Character.toCodePoint((char) next, (char) reader.read()); } return next; }
From source file:org.diorite.config.serialization.snakeyaml.emitter.Emitter.java
private void writeDoubleQuoted(String text, boolean split) throws IOException { this.writeIndicator("\"", true, false, false); int start = 0; int end = 0;//from w w w . j av a2 s . c om while (end <= text.length()) { Character ch = null; if (end < text.length()) { ch = text.charAt(end); } if ((ch == null) || ("\"\\\u0085\u2028\u2029\uFEFF".indexOf(ch) != -1) || !(('\u0020' <= ch) && (ch <= '\u007E'))) { if (start < end) { int len = end - start; this.column += len; this.stream.write(text, start, len); start = end; } if (ch != null) { String data; if (ESCAPE_REPLACEMENTS.containsKey(ch)) { data = "\\" + ESCAPE_REPLACEMENTS.get(ch); } else if (!this.allowUnicode || !StreamReader.isPrintable(ch)) { // if !allowUnicode or the character is not printable, // we must encode it if (ch <= '\u00FF') { String s = "0" + Integer.toString(ch, HEX_RADIX); data = "\\x" + s.substring(s.length() - 2); } else if ((ch >= '\uD800') && (ch <= '\uDBFF')) { if ((end + 1) < text.length()) { Character ch2 = text.charAt(++end); String s = "000" + Long.toHexString(Character.toCodePoint(ch, ch2)); data = "\\U" + s.substring(s.length() - 8); } else { String s = "000" + Integer.toString(ch, HEX_RADIX); data = "\\u" + s.substring(s.length() - 4); } } else { String s = "000" + Integer.toString(ch, HEX_RADIX); data = "\\u" + s.substring(s.length() - 4); } } else { data = String.valueOf(ch); } this.column += data.length(); this.stream.write(data); start = end + 1; } } if (((0 < end) && (end < (text.length() - 1))) && ((Objects.equals(ch, ' ')) || (start >= end)) && ((this.column + (end - start)) > this.bestWidth) && split) { String data; if (start >= end) { data = "\\"; } else { data = text.substring(start, end) + "\\"; } if (start < end) { start = end; } this.column += data.length(); this.stream.write(data); this.writeIndent(); this.whitespace = false; this.indention = false; if (text.charAt(start) == ' ') { data = "\\"; this.column += data.length(); this.stream.write(data); } } end += 1; } this.writeIndicator("\"", false, false, false); }