List of usage examples for java.lang String codePointAt
public int codePointAt(int index)
From source file:dk.statsbiblioteket.util.xml.XMLUnicodeStreamWriter.java
/** * Escapes all code points > 65535 (multi-char) to &#xHHHHH;-representation, intended for XML. * The usual problematic characters {@code <>&"} are also escaped. * @return a representation directly usable for XML. *///from w w w .j ava 2 s . c om private String extendedUnicodeEscape(String in, final boolean isAttribute) { // TODO: When upgrading to Java 1.8, use the codePointStream sb.setLength(0); int index = 0; while (index < in.length()) { int codePoint = in.codePointAt(index); if (isAttribute && codePoint == '"') { sb.append("""); } else if (codePoint == '<') { sb.append("<"); } else if (codePoint == '>') { sb.append(">"); } else if (codePoint == '&') { sb.append("&"); } else if (codePoint > 65535) { sb.append("&#x").append(Integer.toHexString(codePoint)).append(";"); } else { sb.append((char) codePoint); } index += codePoint < 65536 ? 1 : 2; // What about 3-character representations? } return sb.toString(); }
From source file:co.nubetech.hiho.mapreduce.lib.db.apache.TextSplitter.java
/** * Return a BigDecimal representation of string 'str' suitable for use * in a numerically-sorting order./*from www.j a va 2 s . c o m*/ */ BigDecimal stringToBigDecimal(String str) { BigDecimal result = BigDecimal.ZERO; BigDecimal curPlace = ONE_PLACE; // start with 1/65536 to compute the first digit. int len = Math.min(str.length(), MAX_CHARS); for (int i = 0; i < len; i++) { int codePoint = str.codePointAt(i); result = result.add(tryDivide(new BigDecimal(codePoint), curPlace)); // advance to the next less significant place. e.g., 1/(65536^2) for the second char. curPlace = curPlace.multiply(ONE_PLACE); } return result; }
From source file:uk.ac.ucl.cs.cmic.giftcloud.uploadapp.ConfigurationDialog.java
public static boolean isValidAETitle(String aet) { // Per PS 3.5: Default Character Repertoire excluding character code 5CH (the BACKSLASH \? in ISO-IR 6), and control characters LF, FF, CR and ESC. 16 bytes maximum boolean good = true; if (aet == null) { good = false;//from www . ja v a 2s . co m } else if (aet.length() == 0) { good = false; } else if (aet.length() > 16) { good = false; } else if (aet.trim().length() == 0) { // all whitespace is illegal good = false; } else if (aet.contains("\\")) { good = false; } else { int l = aet.length(); for (int i = 0; i < l; ++i) { int codePoint = aet.codePointAt(i); try { Character.UnicodeBlock codeBlock = Character.UnicodeBlock.of(codePoint); if (codeBlock != Character.UnicodeBlock.BASIC_LATIN) { good = false; } else if (Character.isISOControl(codePoint)) { good = false; } } catch (IllegalArgumentException e) { // if not a valid code point good = false; } } } return good; }
From source file:com.cloudera.sqoop.mapreduce.db.TextSplitter.java
/** * Return a BigDecimal representation of string 'str' suitable for use in a * numerically-sorting order./* w w w. j a va 2 s .c o m*/ */ BigDecimal stringToBigDecimal(String str) { // Start with 1/65536 to compute the first digit. BigDecimal curPlace = ONE_PLACE; BigDecimal result = BigDecimal.ZERO; int len = Math.min(str.length(), MAX_CHARS); for (int i = 0; i < len; i++) { int codePoint = str.codePointAt(i); result = result.add(tryDivide(new BigDecimal(codePoint), curPlace)); // advance to the next less significant place. e.g., 1/(65536^2) for the // second char. curPlace = curPlace.multiply(ONE_PLACE); } return result; }
From source file:org.apache.sqoop.mapreduce.db.TextSplitter.java
/** * Return a BigDecimal representation of string 'str' suitable for use in a * numerically-sorting order./*from w w w . j a v a 2 s.co m*/ */ public BigDecimal stringToBigDecimal(String str) { // Start with 1/65536 to compute the first digit. BigDecimal curPlace = ONE_PLACE; BigDecimal result = BigDecimal.ZERO; int len = Math.min(str.length(), MAX_CHARS); for (int i = 0; i < len; i++) { int codePoint = str.codePointAt(i); result = result.add(tryDivide(new BigDecimal(codePoint), curPlace)); // advance to the next less significant place. e.g., 1/(65536^2) for the // second char. curPlace = curPlace.multiply(ONE_PLACE); } return result; }
From source file:co.nubetech.apache.hadoop.TextSplitter.java
/** * Return a BigDecimal representation of string 'str' suitable for use in a * numerically-sorting order./* www . j a va 2 s. co m*/ */ BigDecimal stringToBigDecimal(String str) { BigDecimal result = BigDecimal.ZERO; BigDecimal curPlace = ONE_PLACE; // start with 1/65536 to compute the // first digit. int len = Math.min(str.length(), MAX_CHARS); for (int i = 0; i < len; i++) { int codePoint = str.codePointAt(i); result = result.add(tryDivide(new BigDecimal(codePoint), curPlace)); // advance to the next less significant place. e.g., 1/(65536^2) for // the second char. curPlace = curPlace.multiply(ONE_PLACE); } return result; }
From source file:org.kitodo.production.plugin.opac.pica.Query.java
/** * Query constructor. Constructs a query from a String. For the query * semantics, see/*from www . java 2s .co m*/ * {@link org.goobi.production.plugin.CataloguePlugin.QueryBuilder}. * * @param queryString * Query string to parse * @throws IllegalArgumentException * if the query is syntactically incomplete (i.e. unterminated * String literal), contains fieldless tokens or bracket * expressions */ Query(String queryString) { int state = 0; String operator = null; StringBuilder field = new StringBuilder(); StringBuilder term = new StringBuilder(32); for (int index = 0; index < queryString.length(); index++) { int codePoint = queryString.codePointAt(index); switch (state) { case 0: switch (codePoint) { case ' ': continue; case '"': throw new IllegalArgumentException(FIELDLESS); case '(': throw new IllegalArgumentException(BRACKET); case '-': operator = NOT; break; default: field.appendCodePoint(codePoint); } state = 1; break; case 1: switch (codePoint) { case ' ': throw new IllegalArgumentException(FIELDLESS); case ':': state = 2; break; default: field.appendCodePoint(codePoint); } break; case 2: switch (codePoint) { case ' ': continue; case '"': state = 4; break; case '(': throw new IllegalArgumentException(BRACKET); default: term.appendCodePoint(codePoint); state = 3; } break; case 3: if (codePoint == ' ') { if (term.length() == 0) { throw new IllegalArgumentException(INCOMPLETE); } addQuery(operator, term.toString(), field.toString()); operator = AND; field = new StringBuilder(); term = new StringBuilder(32); state = 5; } else { term.appendCodePoint(codePoint); } break; case 4: if (codePoint == '"') { addQuery(operator, term.toString(), field.toString()); operator = AND; field = new StringBuilder(); term = new StringBuilder(32); state = 5; } else { term.appendCodePoint(codePoint); } break; case 5: switch (codePoint) { case ' ': continue; case '-': operator = NOT; break; case '|': operator = OR; break; default: field.appendCodePoint(codePoint); } state = 1; break; default: throw new UnreachableCodeException(); } } if (state == 3) { addQuery(operator, term.toString(), field.toString()); } if (state != 3 && state != 5) { throw new IllegalArgumentException(INCOMPLETE); } }
From source file:org.kitodo.dataaccess.storage.memory.GraphPath.java
/** * Parses an object from a graph path string. * * @param string/*from w w w .j a v a2 s .c o m*/ * string to parse * @return the number of code points consumed and the object parsed */ private final Pair<Integer, Node> parseObject(String string, Map<String, String> prefixes) { int length = string.length(); Node result = new MemoryNode(); int index = 0; NodeReference currentPredicate = null; do { while ((index < length) && (string.codePointAt(index) <= ' ')) { index++; } if ((index >= length) || (string.codePointAt(index) == ']')) { return Pair.of(index, result); } else if (string.codePointAt(index) == ',') { index++; currentPredicate = null; } else if (string.codePointAt(index) == '[') { index++; Pair<Integer, Node> recursion = parseObject(string.substring(index), prefixes); index += recursion.getKey(); index++; result.put(currentPredicate != null ? currentPredicate : ANY_PREDICATE, recursion.getValue()); } else { if (currentPredicate == null) { int predicatesStart = index; int codePoint; while ((index < length) && ((codePoint = string.codePointAt(index)) > ' ')) { index += Character.charCount(codePoint); } String predicate = string.substring(predicatesStart, index); currentPredicate = predicate.equals(ANY_PREDICATE_CHAR) ? ANY_PREDICATE : MemoryStorage.INSTANCE.createNodeReference(applyPrefixes(prefixes, predicate)); } else { int literalStart = index; int cp; while ((index < length) && ((cp = string.codePointAt(index)) > ' ') && (cp != ',') && (cp != ']')) { index += Character.charCount(cp); } String value = applyPrefixes(prefixes, string.substring(literalStart, index)); result.put(currentPredicate, MemoryLiteral.createLeaf(value, null)); } } } while (index < length); return Pair.of(length, result); }
From source file:com.norconex.collector.http.redirect.impl.GenericRedirectURLProvider.java
private String resolveRedirectURL(final String redirectURL, final String nonAsciiCharset) { String url = redirectURL; // Is string containing only ASCII as it should? boolean isAscii = true; final int length = url.length(); for (int offset = 0; offset < length;) { final int codepoint = url.codePointAt(offset); if (codepoint > ASCII_MAX_CODEPOINT) { isAscii = false;//from ww w . j av a2s. com break; } offset += Character.charCount(codepoint); } if (isAscii) { return url; } else { LOG.warn("Redirect URI made of 7-bit clean ASCII. " + "It probably is not encoded properly. " + "Will try to fix. Redirect URL: " + redirectURL); } // try to fix if non ascii charset is non UTF8. if (StringUtils.isNotBlank(nonAsciiCharset)) { String charset = CharsetUtils.clean(nonAsciiCharset); if (!CharEncoding.UTF_8.equals(charset)) { try { url = new String(url.getBytes(charset)); return url; } catch (UnsupportedEncodingException e) { LOG.warn("Could not fix badly encoded URL with charset \"" + charset + "\". Redirect URL: " + redirectURL, e); } } } // If all fails, fall back to UTF8 try { url = new String(url.getBytes(CharEncoding.UTF_8)); return url; } catch (UnsupportedEncodingException e) { LOG.warn("Could not fix badly encoded URL with charset " + "\"UTF-8\". Redirect URL: " + redirectURL, e); } return url; }
From source file:org.kitodo.dataaccess.storage.memory.GraphPath.java
/** * Creates a node representing the graph path string. * * @param string/*from w w w . j a v a 2s .c o m*/ * string to parse * @param prefixes * a mapping of prefixes to namespaces which was used to shorten * the string */ public GraphPath(String string, Map<String, String> prefixes) { super(GRAPH_PATH); int index = 0; Node graphPosition = this; int length = string.length(); while (index < length) { while ((index < length) && (string.codePointAt(index) <= ' ')) { index++; } if ((index < length) && (string.codePointAt(index) == '[')) { index++; Pair<Integer, Node> parseObjectRecursive = parseObject(string.substring(index), prefixes); index += parseObjectRecursive.getKey(); index++; graphPosition.put(RDF.OBJECT, parseObjectRecursive.getValue()); } else { Node nextLocationStep = new MemoryNode(LOCATION_STEP); NodeReference direction = RDF.NIL; switch (index < length ? string.codePointAt(index) : -1) { case '<': throw new IllegalArgumentException("Directive '<' not supported."); case '>': index++; switch (index < length ? string.codePointAt(index) : -1) { case '>': index++; if ((index < length) && (string.codePointAt(index) == '>')) { index++; throw new IllegalArgumentException("Directive '>|' not supported."); } else { throw new IllegalArgumentException("Directive '>>' not supported."); } case '|': throw new IllegalArgumentException("Directive '>|' not supported."); default: direction = TO; break; } break; case '|': if (((index + 1) < length) && (string.codePointAt(index + 1) == '<')) { throw new IllegalArgumentException("Directive '|<' not supported."); } break; default: direction = TO; break; } while ((index < length) && (string.codePointAt(index) <= ' ')) { index++; } graphPosition.put(direction, nextLocationStep); graphPosition = nextLocationStep; int predicatesStart = index; int codePoint; while ((index < length) && ((codePoint = string.codePointAt(index)) > ' ')) { index += Character.charCount(codePoint); } String predicates = string.substring(predicatesStart, index); if (!predicates.equals(ANY_PREDICATE_CHAR)) { for (String predicate : predicates.split("\\|")) { graphPosition.put(RDF.PREDICATE, applyPrefixes(prefixes, predicate)); } } } } }