Example usage for java.lang String codePointAt

List of usage examples for java.lang String codePointAt

Introduction

In this page you can find the example usage for java.lang String codePointAt.

Prototype

public int codePointAt(int index) 

Source Link

Document

Returns the character (Unicode code point) at the specified index.

Usage

From source file:dk.statsbiblioteket.util.xml.XMLUnicodeStreamWriter.java

/**
 * Escapes all code points > 65535 (multi-char) to &#xHHHHH;-representation, intended for XML.
 * The usual problematic characters {@code <>&"} are also escaped.
 * @return a representation directly usable for XML.
 *///from w  w w  .j ava  2 s  . c  om
private String extendedUnicodeEscape(String in, final boolean isAttribute) {
    // TODO: When upgrading to Java 1.8, use the codePointStream
    sb.setLength(0);
    int index = 0;
    while (index < in.length()) {
        int codePoint = in.codePointAt(index);
        if (isAttribute && codePoint == '"') {
            sb.append("&quot;");
        } else if (codePoint == '<') {
            sb.append("&lt;");
        } else if (codePoint == '>') {
            sb.append("&gt;");
        } else if (codePoint == '&') {
            sb.append("&amp;");
        } else if (codePoint > 65535) {
            sb.append("&#x").append(Integer.toHexString(codePoint)).append(";");
        } else {
            sb.append((char) codePoint);
        }
        index += codePoint < 65536 ? 1 : 2; // What about 3-character representations?
    }
    return sb.toString();
}

From source file:co.nubetech.hiho.mapreduce.lib.db.apache.TextSplitter.java

/**
 * Return a BigDecimal representation of string 'str' suitable for use
 * in a numerically-sorting order./*from   www.j a va 2  s  .  c  o  m*/
 */
BigDecimal stringToBigDecimal(String str) {
    BigDecimal result = BigDecimal.ZERO;
    BigDecimal curPlace = ONE_PLACE; // start with 1/65536 to compute the first digit.

    int len = Math.min(str.length(), MAX_CHARS);

    for (int i = 0; i < len; i++) {
        int codePoint = str.codePointAt(i);
        result = result.add(tryDivide(new BigDecimal(codePoint), curPlace));
        // advance to the next less significant place. e.g., 1/(65536^2) for the second char.
        curPlace = curPlace.multiply(ONE_PLACE);
    }

    return result;
}

From source file:uk.ac.ucl.cs.cmic.giftcloud.uploadapp.ConfigurationDialog.java

public static boolean isValidAETitle(String aet) {
    // Per PS 3.5: Default Character Repertoire excluding character code 5CH (the BACKSLASH \? in ISO-IR 6), and control characters LF, FF, CR and ESC. 16 bytes maximum
    boolean good = true;
    if (aet == null) {
        good = false;//from  www  .  ja  v a 2s  .  co  m
    } else if (aet.length() == 0) {
        good = false;
    } else if (aet.length() > 16) {
        good = false;
    } else if (aet.trim().length() == 0) { // all whitespace is illegal
        good = false;
    } else if (aet.contains("\\")) {
        good = false;
    } else {
        int l = aet.length();
        for (int i = 0; i < l; ++i) {
            int codePoint = aet.codePointAt(i);
            try {
                Character.UnicodeBlock codeBlock = Character.UnicodeBlock.of(codePoint);
                if (codeBlock != Character.UnicodeBlock.BASIC_LATIN) {
                    good = false;
                } else if (Character.isISOControl(codePoint)) {
                    good = false;
                }
            } catch (IllegalArgumentException e) { // if not a valid code point
                good = false;
            }
        }
    }
    return good;
}

From source file:com.cloudera.sqoop.mapreduce.db.TextSplitter.java

/**
 * Return a BigDecimal representation of string 'str' suitable for use in a
 * numerically-sorting order./* w w  w.  j  a  va 2 s  .c o m*/
 */
BigDecimal stringToBigDecimal(String str) {
    // Start with 1/65536 to compute the first digit.
    BigDecimal curPlace = ONE_PLACE;
    BigDecimal result = BigDecimal.ZERO;

    int len = Math.min(str.length(), MAX_CHARS);

    for (int i = 0; i < len; i++) {
        int codePoint = str.codePointAt(i);
        result = result.add(tryDivide(new BigDecimal(codePoint), curPlace));
        // advance to the next less significant place. e.g., 1/(65536^2) for the
        // second char.
        curPlace = curPlace.multiply(ONE_PLACE);
    }

    return result;
}

From source file:org.apache.sqoop.mapreduce.db.TextSplitter.java

/**
 * Return a BigDecimal representation of string 'str' suitable for use in a
 * numerically-sorting order./*from w w w  .  j  a  v a 2 s.co m*/
 */
public BigDecimal stringToBigDecimal(String str) {
    // Start with 1/65536 to compute the first digit.
    BigDecimal curPlace = ONE_PLACE;
    BigDecimal result = BigDecimal.ZERO;

    int len = Math.min(str.length(), MAX_CHARS);

    for (int i = 0; i < len; i++) {
        int codePoint = str.codePointAt(i);
        result = result.add(tryDivide(new BigDecimal(codePoint), curPlace));
        // advance to the next less significant place. e.g., 1/(65536^2) for the
        // second char.
        curPlace = curPlace.multiply(ONE_PLACE);
    }

    return result;
}

From source file:co.nubetech.apache.hadoop.TextSplitter.java

/**
 * Return a BigDecimal representation of string 'str' suitable for use in a
 * numerically-sorting order./* www  . j a va  2 s. co m*/
 */
BigDecimal stringToBigDecimal(String str) {
    BigDecimal result = BigDecimal.ZERO;
    BigDecimal curPlace = ONE_PLACE; // start with 1/65536 to compute the
    // first digit.

    int len = Math.min(str.length(), MAX_CHARS);

    for (int i = 0; i < len; i++) {
        int codePoint = str.codePointAt(i);
        result = result.add(tryDivide(new BigDecimal(codePoint), curPlace));
        // advance to the next less significant place. e.g., 1/(65536^2) for
        // the second char.
        curPlace = curPlace.multiply(ONE_PLACE);
    }

    return result;
}

From source file:org.kitodo.production.plugin.opac.pica.Query.java

/**
 * Query constructor. Constructs a query from a String. For the query
 * semantics, see/*from www .  java 2s .co m*/
 * {@link org.goobi.production.plugin.CataloguePlugin.QueryBuilder}.
 *
 * @param queryString
 *            Query string to parse
 * @throws IllegalArgumentException
 *             if the query is syntactically incomplete (i.e. unterminated
 *             String literal), contains fieldless tokens or bracket
 *             expressions
 */
Query(String queryString) {
    int state = 0;
    String operator = null;
    StringBuilder field = new StringBuilder();
    StringBuilder term = new StringBuilder(32);
    for (int index = 0; index < queryString.length(); index++) {
        int codePoint = queryString.codePointAt(index);
        switch (state) {
        case 0:
            switch (codePoint) {
            case ' ':
                continue;
            case '"':
                throw new IllegalArgumentException(FIELDLESS);
            case '(':
                throw new IllegalArgumentException(BRACKET);
            case '-':
                operator = NOT;
                break;
            default:
                field.appendCodePoint(codePoint);
            }
            state = 1;
            break;
        case 1:
            switch (codePoint) {
            case ' ':
                throw new IllegalArgumentException(FIELDLESS);
            case ':':
                state = 2;
                break;
            default:
                field.appendCodePoint(codePoint);
            }
            break;
        case 2:
            switch (codePoint) {
            case ' ':
                continue;
            case '"':
                state = 4;
                break;
            case '(':
                throw new IllegalArgumentException(BRACKET);
            default:
                term.appendCodePoint(codePoint);
                state = 3;
            }
            break;
        case 3:
            if (codePoint == ' ') {
                if (term.length() == 0) {
                    throw new IllegalArgumentException(INCOMPLETE);
                }
                addQuery(operator, term.toString(), field.toString());
                operator = AND;
                field = new StringBuilder();
                term = new StringBuilder(32);
                state = 5;
            } else {
                term.appendCodePoint(codePoint);
            }
            break;
        case 4:
            if (codePoint == '"') {
                addQuery(operator, term.toString(), field.toString());
                operator = AND;
                field = new StringBuilder();
                term = new StringBuilder(32);
                state = 5;
            } else {
                term.appendCodePoint(codePoint);
            }
            break;
        case 5:
            switch (codePoint) {
            case ' ':
                continue;
            case '-':
                operator = NOT;
                break;
            case '|':
                operator = OR;
                break;
            default:
                field.appendCodePoint(codePoint);
            }
            state = 1;
            break;
        default:
            throw new UnreachableCodeException();
        }
    }
    if (state == 3) {
        addQuery(operator, term.toString(), field.toString());
    }
    if (state != 3 && state != 5) {
        throw new IllegalArgumentException(INCOMPLETE);
    }
}

From source file:org.kitodo.dataaccess.storage.memory.GraphPath.java

/**
 * Parses an object from a graph path string.
 *
 * @param string/*from   w w w .j  a  v a2  s  .c  o m*/
 *            string to parse
 * @return the number of code points consumed and the object parsed
 */
private final Pair<Integer, Node> parseObject(String string, Map<String, String> prefixes) {
    int length = string.length();
    Node result = new MemoryNode();
    int index = 0;
    NodeReference currentPredicate = null;
    do {
        while ((index < length) && (string.codePointAt(index) <= ' ')) {
            index++;
        }
        if ((index >= length) || (string.codePointAt(index) == ']')) {
            return Pair.of(index, result);
        } else if (string.codePointAt(index) == ',') {
            index++;
            currentPredicate = null;
        } else if (string.codePointAt(index) == '[') {
            index++;
            Pair<Integer, Node> recursion = parseObject(string.substring(index), prefixes);
            index += recursion.getKey();
            index++;
            result.put(currentPredicate != null ? currentPredicate : ANY_PREDICATE, recursion.getValue());
        } else {
            if (currentPredicate == null) {
                int predicatesStart = index;
                int codePoint;
                while ((index < length) && ((codePoint = string.codePointAt(index)) > ' ')) {
                    index += Character.charCount(codePoint);
                }
                String predicate = string.substring(predicatesStart, index);
                currentPredicate = predicate.equals(ANY_PREDICATE_CHAR) ? ANY_PREDICATE
                        : MemoryStorage.INSTANCE.createNodeReference(applyPrefixes(prefixes, predicate));
            } else {
                int literalStart = index;
                int cp;
                while ((index < length) && ((cp = string.codePointAt(index)) > ' ') && (cp != ',')
                        && (cp != ']')) {
                    index += Character.charCount(cp);
                }
                String value = applyPrefixes(prefixes, string.substring(literalStart, index));
                result.put(currentPredicate, MemoryLiteral.createLeaf(value, null));
            }
        }
    } while (index < length);
    return Pair.of(length, result);
}

From source file:com.norconex.collector.http.redirect.impl.GenericRedirectURLProvider.java

private String resolveRedirectURL(final String redirectURL, final String nonAsciiCharset) {

    String url = redirectURL;

    // Is string containing only ASCII as it should?
    boolean isAscii = true;
    final int length = url.length();
    for (int offset = 0; offset < length;) {
        final int codepoint = url.codePointAt(offset);
        if (codepoint > ASCII_MAX_CODEPOINT) {
            isAscii = false;//from   ww w . j  av a2s. com
            break;
        }
        offset += Character.charCount(codepoint);
    }
    if (isAscii) {
        return url;
    } else {
        LOG.warn("Redirect URI made of 7-bit clean ASCII. " + "It probably is not encoded properly. "
                + "Will try to fix. Redirect URL: " + redirectURL);
    }

    // try to fix if non ascii charset is non UTF8.
    if (StringUtils.isNotBlank(nonAsciiCharset)) {
        String charset = CharsetUtils.clean(nonAsciiCharset);
        if (!CharEncoding.UTF_8.equals(charset)) {
            try {
                url = new String(url.getBytes(charset));
                return url;
            } catch (UnsupportedEncodingException e) {
                LOG.warn("Could not fix badly encoded URL with charset \"" + charset + "\". Redirect URL: "
                        + redirectURL, e);
            }
        }
    }

    // If all fails, fall back to UTF8
    try {
        url = new String(url.getBytes(CharEncoding.UTF_8));
        return url;
    } catch (UnsupportedEncodingException e) {
        LOG.warn("Could not fix badly encoded URL with charset " + "\"UTF-8\". Redirect URL: " + redirectURL,
                e);
    }
    return url;
}

From source file:org.kitodo.dataaccess.storage.memory.GraphPath.java

/**
 * Creates a node representing the graph path string.
 *
 * @param string/*from   w  w  w  . j  a v  a  2s  .c  o  m*/
 *            string to parse
 * @param prefixes
 *            a mapping of prefixes to namespaces which was used to shorten
 *            the string
 */
public GraphPath(String string, Map<String, String> prefixes) {
    super(GRAPH_PATH);
    int index = 0;
    Node graphPosition = this;
    int length = string.length();
    while (index < length) {
        while ((index < length) && (string.codePointAt(index) <= ' ')) {
            index++;
        }
        if ((index < length) && (string.codePointAt(index) == '[')) {
            index++;
            Pair<Integer, Node> parseObjectRecursive = parseObject(string.substring(index), prefixes);
            index += parseObjectRecursive.getKey();
            index++;
            graphPosition.put(RDF.OBJECT, parseObjectRecursive.getValue());
        } else {
            Node nextLocationStep = new MemoryNode(LOCATION_STEP);
            NodeReference direction = RDF.NIL;
            switch (index < length ? string.codePointAt(index) : -1) {
            case '<':
                throw new IllegalArgumentException("Directive '<' not supported.");
            case '>':
                index++;
                switch (index < length ? string.codePointAt(index) : -1) {
                case '>':
                    index++;
                    if ((index < length) && (string.codePointAt(index) == '>')) {
                        index++;
                        throw new IllegalArgumentException("Directive '>|' not supported.");
                    } else {
                        throw new IllegalArgumentException("Directive '>>' not supported.");
                    }
                case '|':
                    throw new IllegalArgumentException("Directive '>|' not supported.");
                default:
                    direction = TO;
                    break;
                }
                break;
            case '|':
                if (((index + 1) < length) && (string.codePointAt(index + 1) == '<')) {
                    throw new IllegalArgumentException("Directive '|<' not supported.");
                }
                break;
            default:
                direction = TO;
                break;
            }
            while ((index < length) && (string.codePointAt(index) <= ' ')) {
                index++;
            }
            graphPosition.put(direction, nextLocationStep);
            graphPosition = nextLocationStep;
            int predicatesStart = index;
            int codePoint;
            while ((index < length) && ((codePoint = string.codePointAt(index)) > ' ')) {
                index += Character.charCount(codePoint);
            }
            String predicates = string.substring(predicatesStart, index);
            if (!predicates.equals(ANY_PREDICATE_CHAR)) {
                for (String predicate : predicates.split("\\|")) {
                    graphPosition.put(RDF.PREDICATE, applyPrefixes(prefixes, predicate));
                }
            }
        }
    }
}