List of usage examples for java.lang Character isHighSurrogate
public static boolean isHighSurrogate(char ch)
From source file:org.eclipse.rdf4j.rio.ntriples.NTriplesParser.java
/** * Reads the next Unicode code point./*from ww w.j av a 2s .c o m*/ * * @return the next Unicode code point, or -1 if the end of the stream has been reached. * @throws IOException */ protected int readCodePoint() throws IOException { int next = reader.read(); if (Character.isHighSurrogate((char) next)) { next = Character.toCodePoint((char) next, (char) reader.read()); } return next; }
From source file:nl.tue.ddss.ifcrdf.model.IfcStepSerializer.java
private void writePrimitive(Resource val) throws IOException, SerializerException { if (isLogical(val)) { if (val.hasProperty(HASLOGICAL, EXPRESS_TRUE)) { print(BOOLEAN_TRUE);/*w w w .j a va 2s .co m*/ } else if (val.hasProperty(HASLOGICAL, EXPRESS_FALSE)) { print(BOOLEAN_FALSE); } else if (val.hasProperty(HASLOGICAL, EXPRESS_UNDEFINED)) { print(BOOLEAN_UNDEFINED); } } else if (isReal(val) || isNumber(val)) { Double valDouble = val.getProperty(HASDOUBLE).getObject().asLiteral().getDouble(); if ((valDouble).isInfinite() || ((valDouble).isNaN())) { LOGGER.info("Serializing infinite or NaN double as 0.0"); print("0.0"); } else { String string = valDouble.toString(); if (string.endsWith(DOT_0)) { print(string.substring(0, string.length() - 1)); } else { print(string); } } } else if (isInteger(val)) { Integer valInteger = val.getProperty(HASINTEGER).getObject().asLiteral().getInt(); String string = valInteger.toString(); if (string.endsWith(DOT_0)) { print(string.substring(0, string.length() - 2)); } else { print(string); } } else if (isBoolean(val)) { if (val.hasLiteral(HASBOOLEAN, true)) { print(BOOLEAN_TRUE); } else if (val.hasLiteral(HASBOOLEAN, false)) { print(BOOLEAN_FALSE); } } else if (isString(val)) { print(SINGLE_QUOTE); String stringVal = val.getProperty(HASSTRING).getObject().asLiteral().getString(); for (int i = 0; i < stringVal.length(); i++) { char c = stringVal.charAt(i); if (c == '\'') { print("\'\'"); } else if (c == '\\') { print("\\\\"); } else if (c >= 32 && c <= 126) { // ISO 8859-1 print("" + c); } else if (c < 255) { // ISO 10646 and ISO 8859-1 are the same < 255 , using // ISO_8859_1 print("\\X\\" + new String(Hex.encodeHex( Charsets.ISO_8859_1.encode(CharBuffer.wrap(new char[] { (char) c })).array())) .toUpperCase()); } else { if (useIso8859_1) { // ISO 8859-1 with -128 offset ByteBuffer encode = Charsets.ISO_8859_1.encode(new String(new char[] { (char) (c - 128) })); print("\\S\\" + (char) encode.get()); } else { // The following code has not been tested (2012-04-25) // Use UCS-2 or UCS-4 // TODO when multiple sequential characters should be // encoded in UCS-2 or UCS-4, we don't really need to // add all those \X0\ \X2\ and \X4\ chars if (Character.isLowSurrogate(c)) { throw new SerializerException("Unexpected low surrogate range char"); } else if (Character.isHighSurrogate(c)) { // We need UCS-4, this is probably never happening if (i + 1 < stringVal.length()) { char low = stringVal.charAt(i + 1); if (!Character.isLowSurrogate(low)) { throw new SerializerException( "High surrogate char should be followed by char in low surrogate range"); } try { print("\\X4\\" + new String(Hex.encodeHex(Charset.forName("UTF-32") .encode(new String(new char[] { c, low })).array())).toUpperCase() + "\\X0\\"); } catch (UnsupportedCharsetException e) { throw new SerializerException(e); } i++; } else { throw new SerializerException( "High surrogate char should be followed by char in low surrogate range, but end of string reached"); } } else { // UCS-2 will do print("\\X2\\" + new String(Hex .encodeHex(Charsets.UTF_16BE.encode(CharBuffer.wrap(new char[] { c })).array())) .toUpperCase() + "\\X0\\"); } } } } print(SINGLE_QUOTE); } else if (isEnumeration(val)) { String enumVal = val.getLocalName(); print("." + enumVal + "."); } else { print(val == null ? "$" : val.toString()); } }
From source file:it.geosdi.era.server.servlet.HTTPProxy.java
public static int escapeHtmlFull(int ch) { if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= '0' && ch <= '9') { // safe//from w w w. j av a 2s . c om return ch; } else if (Character.isWhitespace(ch)) { if (ch != '\n' && ch != '\r' && ch != '\t') // safe return ch; } else if (Character.isDefined(ch)) { // safe return ch; } else if (Character.isISOControl(ch)) { // paranoid version:isISOControl which are not isWhitespace // removed ! // do nothing do not include in output ! return -1; } else if (Character.isHighSurrogate((char) ch)) { // do nothing do not include in output ! return -1; } else if (Character.isLowSurrogate((char) ch)) { // wrong char[] sequence, //TODO: LOG !!! return -1; } return -1; }
From source file:adept.io.Reader.java
/** * Removes surrogate pairs/* w w w. jav a 2 s . c o m*/ * * @param text * @return */ public static String checkSurrogates(String text) { StringBuffer buffer = new StringBuffer(); char[] chars = text.toCharArray(); for (Character c : chars) { if (Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) { System.out.println("WARNING -- invalid xml character " + c + " removed"); } else { buffer.append(c); } } return buffer.toString(); }
From source file:com.sjdf.platform.xss.StringUtils.java
/** * <p>/*from ww w. j ava 2 s . co m*/ * Search a CharSequence to find the first index of any character in the * given set of characters. * </p> * <p/> * <p> * A {@code null} String will return {@code -1}. A {@code null} or zero * length search array will return {@code -1}. * </p> * <p/> * <pre> * StringUtils.indexOfAny(null, *) = -1 * StringUtils.indexOfAny("", *) = -1 * StringUtils.indexOfAny(*, null) = -1 * StringUtils.indexOfAny(*, []) = -1 * StringUtils.indexOfAny("zzabyycdxx",['z','a']) = 0 * StringUtils.indexOfAny("zzabyycdxx",['b','y']) = 3 * StringUtils.indexOfAny("aba", ['z']) = -1 * </pre> * * @param cs the CharSequence to check, may be null * @param searchChars the chars to search for, may be null * @return the index of any of the chars, -1 if no match or null input * @since 3.0 Changed signature from indexOfAny(String, char[]) to * indexOfAny(CharSequence, char...) */ public static int indexOfAny(CharSequence cs, char... searchChars) { if (isEmpty(cs) || searchChars == null) { return INDEX_NOT_FOUND; } int csLen = cs.length(); int csLast = csLen - 1; int searchLen = searchChars.length; int searchLast = searchLen - 1; for (int i = 0; i < csLen; i++) { char ch = cs.charAt(i); for (int j = 0; j < searchLen; j++) { if (searchChars[j] == ch) { if (i < csLast && j < searchLast && Character.isHighSurrogate(ch)) { // ch is a supplementary character if (searchChars[j + 1] == cs.charAt(i + 1)) { return i; } } else { return i; } } } } return INDEX_NOT_FOUND; }
From source file:com.sjdf.platform.xss.StringUtils.java
/** * <p>/* w ww. j a v a 2 s.c o m*/ * Checks if the CharSequence contains any character in the given set of * characters. * </p> * <p/> * <p> * A {@code null} CharSequence will return {@code false}. A {@code null} or * zero length search array will return {@code false}. * </p> * <p/> * <pre> * StringUtils.containsAny(null, *) = false * StringUtils.containsAny("", *) = false * StringUtils.containsAny(*, null) = false * StringUtils.containsAny(*, []) = false * StringUtils.containsAny("zzabyycdxx",['z','a']) = true * StringUtils.containsAny("zzabyycdxx",['b','y']) = true * StringUtils.containsAny("aba", ['z']) = false * </pre> * * @param cs the CharSequence to check, may be null * @param searchChars the chars to search for, may be null * @return the {@code true} if any of the chars are found, {@code false} if * no match or null input * @since 3.0 Changed signature from containsAny(String, char[]) to * containsAny(CharSequence, char...) */ public static boolean containsAny(CharSequence cs, char... searchChars) { if (isEmpty(cs) || searchChars == null) { return false; } int csLength = cs.length(); int searchLength = searchChars.length; int csLast = csLength - 1; int searchLast = searchLength - 1; for (int i = 0; i < csLength; i++) { char ch = cs.charAt(i); for (int j = 0; j < searchLength; j++) { if (searchChars[j] == ch) { if (Character.isHighSurrogate(ch)) { if (j == searchLast) { // missing low surrogate, fine, like // String.indexOf(String) return true; } if (i < csLast && searchChars[j + 1] == cs.charAt(i + 1)) { return true; } } else { // ch is in the Basic Multilingual Plane return true; } } } } return false; }
From source file:org.apache.commons.lang3.StringUtils.java
/** * <p>Search a CharSequence to find the first index of any * character in the given set of characters.</p> * * <p>A {@code null} String will return {@code -1}. * A {@code null} or zero length search array will return {@code -1}.</p> * * <pre>// w w w .j a v a 2s . co m * StringUtils.indexOfAny(null, *) = -1 * StringUtils.indexOfAny("", *) = -1 * StringUtils.indexOfAny(*, null) = -1 * StringUtils.indexOfAny(*, []) = -1 * StringUtils.indexOfAny("zzabyycdxx",['z','a']) = 0 * StringUtils.indexOfAny("zzabyycdxx",['b','y']) = 3 * StringUtils.indexOfAny("aba", ['z']) = -1 * </pre> * * @param cs the CharSequence to check, may be null * @param searchChars the chars to search for, may be null * @return the index of any of the chars, -1 if no match or null input * @since 2.0 * @since 3.0 Changed signature from indexOfAny(String, char[]) to indexOfAny(CharSequence, char...) */ public static int indexOfAny(CharSequence cs, char... searchChars) { if (isEmpty(cs) || ArrayUtils.isEmpty(searchChars)) { return INDEX_NOT_FOUND; } int csLen = cs.length(); int csLast = csLen - 1; int searchLen = searchChars.length; int searchLast = searchLen - 1; for (int i = 0; i < csLen; i++) { char ch = cs.charAt(i); for (int j = 0; j < searchLen; j++) { if (searchChars[j] == ch) { if (i < csLast && j < searchLast && Character.isHighSurrogate(ch)) { // ch is a supplementary character if (searchChars[j + 1] == cs.charAt(i + 1)) { return i; } } else { return i; } } } } return INDEX_NOT_FOUND; }
From source file:com.sjdf.platform.xss.StringUtils.java
/** * <p>//w w w. j a v a 2s .c o m * Searches a CharSequence to find the first index of any character not in * the given set of characters. * </p> * <p/> * <p> * A {@code null} CharSequence will return {@code -1}. A {@code null} or * zero length search array will return {@code -1}. * </p> * <p/> * <pre> * StringUtils.indexOfAnyBut(null, *) = -1 * StringUtils.indexOfAnyBut("", *) = -1 * StringUtils.indexOfAnyBut(*, null) = -1 * StringUtils.indexOfAnyBut(*, []) = -1 * StringUtils.indexOfAnyBut("zzabyycdxx", new char[] {'z', 'a'} ) = 3 * StringUtils.indexOfAnyBut("aba", new char[] {'z'} ) = 0 * StringUtils.indexOfAnyBut("aba", new char[] {'a', 'b'} ) = -1 * * </pre> * * @param cs the CharSequence to check, may be null * @param searchChars the chars to search for, may be null * @return the index of any of the chars, -1 if no match or null input * @since 3.0 Changed signature from indexOfAnyBut(String, char[]) to * indexOfAnyBut(CharSequence, char...) */ public static int indexOfAnyBut(CharSequence cs, char... searchChars) { if (isEmpty(cs) || searchChars == null) { return INDEX_NOT_FOUND; } int csLen = cs.length(); int csLast = csLen - 1; int searchLen = searchChars.length; int searchLast = searchLen - 1; outer: for (int i = 0; i < csLen; i++) { char ch = cs.charAt(i); for (int j = 0; j < searchLen; j++) { if (searchChars[j] == ch) { if (i < csLast && j < searchLast && Character.isHighSurrogate(ch)) { if (searchChars[j + 1] == cs.charAt(i + 1)) { continue outer; } } else { continue outer; } } } return i; } return INDEX_NOT_FOUND; }
From source file:org.apache.commons.lang3.StringUtils.java
/** * <p>Checks if the CharSequence contains any character in the given * set of characters.</p>/*ww w .j a v a2s . com*/ * * <p>A {@code null} CharSequence will return {@code false}. * A {@code null} or zero length search array will return {@code false}.</p> * * <pre> * StringUtils.containsAny(null, *) = false * StringUtils.containsAny("", *) = false * StringUtils.containsAny(*, null) = false * StringUtils.containsAny(*, []) = false * StringUtils.containsAny("zzabyycdxx",['z','a']) = true * StringUtils.containsAny("zzabyycdxx",['b','y']) = true * StringUtils.containsAny("aba", ['z']) = false * </pre> * * @param cs the CharSequence to check, may be null * @param searchChars the chars to search for, may be null * @return the {@code true} if any of the chars are found, * {@code false} if no match or null input * @since 2.4 * @since 3.0 Changed signature from containsAny(String, char[]) to containsAny(CharSequence, char...) */ public static boolean containsAny(CharSequence cs, char... searchChars) { if (isEmpty(cs) || ArrayUtils.isEmpty(searchChars)) { return false; } int csLength = cs.length(); int searchLength = searchChars.length; int csLast = csLength - 1; int searchLast = searchLength - 1; for (int i = 0; i < csLength; i++) { char ch = cs.charAt(i); for (int j = 0; j < searchLength; j++) { if (searchChars[j] == ch) { if (Character.isHighSurrogate(ch)) { if (j == searchLast) { // missing low surrogate, fine, like String.indexOf(String) return true; } if (i < csLast && searchChars[j + 1] == cs.charAt(i + 1)) { return true; } } else { // ch is in the Basic Multilingual Plane return true; } } } } return false; }
From source file:org.apache.commons.lang3.StringUtils.java
/** * <p>Searches a CharSequence to find the first index of any * character not in the given set of characters.</p> * * <p>A {@code null} CharSequence will return {@code -1}. * A {@code null} or zero length search array will return {@code -1}.</p> * * <pre>// w w w . j a va2 s .c o m * StringUtils.indexOfAnyBut(null, *) = -1 * StringUtils.indexOfAnyBut("", *) = -1 * StringUtils.indexOfAnyBut(*, null) = -1 * StringUtils.indexOfAnyBut(*, []) = -1 * StringUtils.indexOfAnyBut("zzabyycdxx", new char[] {'z', 'a'} ) = 3 * StringUtils.indexOfAnyBut("aba", new char[] {'z'} ) = 0 * StringUtils.indexOfAnyBut("aba", new char[] {'a', 'b'} ) = -1 * </pre> * * @param cs the CharSequence to check, may be null * @param searchChars the chars to search for, may be null * @return the index of any of the chars, -1 if no match or null input * @since 2.0 * @since 3.0 Changed signature from indexOfAnyBut(String, char[]) to indexOfAnyBut(CharSequence, char...) */ public static int indexOfAnyBut(CharSequence cs, char... searchChars) { if (isEmpty(cs) || ArrayUtils.isEmpty(searchChars)) { return INDEX_NOT_FOUND; } int csLen = cs.length(); int csLast = csLen - 1; int searchLen = searchChars.length; int searchLast = searchLen - 1; outer: for (int i = 0; i < csLen; i++) { char ch = cs.charAt(i); for (int j = 0; j < searchLen; j++) { if (searchChars[j] == ch) { if (i < csLast && j < searchLast && Character.isHighSurrogate(ch)) { if (searchChars[j + 1] == cs.charAt(i + 1)) { continue outer; } } else { continue outer; } } } return i; } return INDEX_NOT_FOUND; }