Example usage for java.lang String codePointAt

Introduction

In this page you can find the example usage for java.lang String codePointAt.

Prototype

public int codePointAt(int index)

Source Link

Document

Returns the character (Unicode code point) at the specified index.

Usage

From source file:net.sourceforge.jaulp.string.StringUtils.java

/**
 * Converts all characters from the given String to unicodes characters encoded like &#92;uxxxx.
 *
 * @param toUnicode//from w w w .j  a v  a2 s  .  c  o  m
 *            The String to convert.
 * @param toLowerCase
 *            If true the letters from the unicode characters are lower case.
 * @return The converted String.
 */
public static String toUnicode(final String toUnicode, final boolean toLowerCase) {
    final StringBuilder sb = new StringBuilder();
    for (int i = 0; i < toUnicode.length(); i++) {
        final String hex = Integer.toHexString(toUnicode.codePointAt(i));
        if (toLowerCase) {
            hex.toLowerCase();
        } else {
            hex.toUpperCase();
        }
        final String hexWithZeros = "0000" + hex;
        final String hexCodeWithLeadingZeros = hexWithZeros.substring(hexWithZeros.length() - 4);
        sb.append("\\u" + hexCodeWithLeadingZeros);
    }
    return sb.toString();
}

From source file:de.alpharogroup.string.StringExtensions.java

/**
 * Converts all characters from the given String to unicodes characters encoded like &#92;uxxxx.
 *
 * @param toUnicode// w ww  . ja v a2s .co m
 *            The String to convert.
 * @param toLowerCase
 *            If true the letters from the unicode characters are lower case.
 * @return The converted String.
 */
public static String toUnicode(final String toUnicode, final boolean toLowerCase) {
    final StringBuilder sb = new StringBuilder();
    for (int i = 0; i < toUnicode.length(); i++) {
        String hex = Integer.toHexString(toUnicode.codePointAt(i));
        if (toLowerCase) {
            hex = hex.toLowerCase();
        } else {
            hex = hex.toUpperCase();
        }
        final String hexWithZeros = "0000" + hex;
        final String hexCodeWithLeadingZeros = hexWithZeros.substring(hexWithZeros.length() - 4);
        sb.append("\\u" + hexCodeWithLeadingZeros);
    }
    return sb.toString();
}

From source file:org.exoplatform.cms.common.CommonUtils.java

/**
 * Encode special character to html number. Ex: '/' --> &#47; 
 * @param String s, the string input/*  w  ww  . ja v  a2  s .  c o m*/
 * @param String charIgnore, the string content ignore some special character can not encode.
 * @param boolean isTitle, the boolean for check convert is title or not.
 * @return String 
 */
public static String encodeSpecialCharToHTMLnumber(String s, String charIgnore, boolean isTitle) {
    if (isEmpty(s)) {
        return EMPTY_STR;
    }
    int i = 0;
    StringBuilder builder = new StringBuilder();
    while (i < s.length()) {
        char c = s.charAt(i);
        if (charIgnore.indexOf(String.valueOf(c)) >= 0) {
            builder.append(c);
        } else {
            int t = s.codePointAt(i);
            if (t < CHAR_CODES[0] && t > CHAR_CODES[1] || t < CHAR_CODES[2] && t > CHAR_CODES[3]
                    || t < CHAR_CODES[4] && t > CHAR_CODES[5] || t < CHAR_CODES[6] && t > CHAR_CODES[7]) {
                if (isTitle && (t == 60 || t == 62)) {
                    if (t == 60) {
                        builder.append(LESS_THAN);
                    } else if (t == 62) {
                        builder.append(GREATER_THAN);
                    }
                } else {
                    builder.append(AMP_NUMBER).append(t).append(SEMICOLON);
                }
            } else {
                builder.append(c);
            }
        }
        ++i;
    }
    return builder.toString();
}

From source file:org.archive.modules.fetcher.FetchHTTPRequest.java

/**
 * Returns a copy of the string with non-ascii characters replaced by their
 * html numeric character reference in decimal (e.g. &amp;#12345;).
 * //from   w ww  .  j a  va 2 s.  c o  m
 * <p>
 * The purpose of this is to produce a multipart/formdata submission that
 * any server should be able to handle, based on experiments using a modern
 * browser (chromium 47.0.2526.106 for mac). What chromium posts depends on
 * what it considers the character encoding of the page containing the form,
 * and maybe other factors. It would be too complicated to try to simulate
 * that behavior in heritrix.
 * 
 * <p>
 * Instead what we do is approximately what the browser does when the form
 * page is plain ascii. It html-escapes characters outside of the
 * latin1/cp1252 range. Characters in the U+0080-U+00FF range are encoded in
 * latin1/cp1252. That is the one way that we differ from chromium. We
 * html-escape those characters (U+0080-U+00FF) as well. That way the http
 * post is plain ascii, and should work regardless of which encoding the
 * server expects.
 * 
 * <p>
 * N.b. chromium doesn't indicate the encoding of the request in any way (no
 * charset in the content-type or anything like that). Also of note is that
 * when it considers the form page to be utf-8, it submits in utf-8. That's
 * part of the complicated behavior we don't want to try to simulate.
 */
public static String escapeForMultipart(String str) {
    StringBuilder buf = new StringBuilder();
    for (int i = 0; i < str.length();) {
        int codepoint = str.codePointAt(i);
        if (codepoint <= 0x7f) {
            buf.appendCodePoint(codepoint);
        } else {
            buf.append("&#" + codepoint + ";");
        }
        i += Character.charCount(codepoint);
    }
    return buf.toString();
}

From source file:org.apache.stanbol.enhancer.engines.opennlp.impl.NEREngineCore.java

/**
 * Remove non UTF-8 compliant characters (typically control characters) so has to avoid polluting the
 * annotation graph with snippets that are not serializable as XML.
 *//*from w  w  w .  j a  va 2 s. c o m*/
protected static String removeNonUtf8CompliantCharacters(final String text) {
    if (null == text) {
        return null;
    }
    StringBuilder sb = null; //initialised on the first replacement
    for (int i = 0; i < text.length(); i++) {
        int ch = text.codePointAt(i);
        // remove any characters outside the valid UTF-8 range as well as all control characters
        // except tabs and new lines
        //NOTE: rewesten (2012-11-21) replaced the original check with the one
        // found at http://blog.mark-mclaren.info/2007/02/invalid-xml-characters-when-valid-utf8_5873.html
        if (!((ch == 0x9) || (ch == 0xA) || (ch == 0xD) || ((ch >= 0x20) && (ch <= 0xD7FF))
                || ((ch >= 0xE000) && (ch <= 0xFFFD)) || ((ch >= 0x10000) && (ch <= 0x10FFFF)))) {
            if (sb == null) {
                sb = new StringBuilder(text);
            }
            sb.setCharAt(i, ' ');
        }
    }
    return sb == null ? text : sb.toString();
}

From source file:org.apache.poi.util.StringUtil.java

/**
 * Some strings may contain encoded characters of the unicode private use area.
 * Currently the characters of the symbol fonts are mapped to the corresponding
 * characters in the normal unicode range. 
 *
 * @param string the original string //from w  w  w  .j  a  v  a  2s. co m
 * @return the string with mapped characters
 * 
 * @see <a href="http://www.alanwood.net/unicode/private_use_area.html#symbol">Private Use Area (symbol)</a>
 * @see <a href="http://www.alanwood.net/demos/symbol.html">Symbol font - Unicode alternatives for Greek and special characters in HTML</a>
 */
public static String mapMsCodepointString(String string) {
    if (string == null || "".equals(string))
        return string;
    initMsCodepointMap();

    StringBuilder sb = new StringBuilder();
    final int length = string.length();
    for (int offset = 0; offset < length;) {
        Integer msCodepoint = string.codePointAt(offset);
        Integer uniCodepoint = msCodepointToUnicode.get(msCodepoint);
        sb.appendCodePoint(uniCodepoint == null ? msCodepoint : uniCodepoint);
        offset += Character.charCount(msCodepoint);
    }

    return sb.toString();
}

From source file:org.omegat.util.FileUtil.java

static Pattern compileFileMask(String mask) {
    StringBuilder m = new StringBuilder();
    // "Relative" masks can match at any directory level
    if (!mask.startsWith("/")) {
        mask = "**/" + mask;
    }/*from  ww w .  ja va  2  s .  c  om*/
    // Masks ending with a slash match everything in subtree
    if (mask.endsWith("/")) {
        mask += "**";
    }
    for (int cp, i = 0; i < mask.length(); i += Character.charCount(cp)) {
        cp = mask.codePointAt(i);
        if (cp >= 'A' && cp <= 'Z') {
            m.appendCodePoint(cp);
        } else if (cp >= 'a' && cp <= 'z') {
            m.appendCodePoint(cp);
        } else if (cp >= '0' && cp <= '9') {
            m.appendCodePoint(cp);
        } else if (cp == '/') {
            if (mask.regionMatches(i, "/**/", 0, 4)) {
                // The sequence /**/ matches *zero* or more levels
                m.append("(?:/|/.*/)");
                i += 3;
            } else if (mask.regionMatches(i, "/**", 0, 3)) {
                // The sequence /** matches *zero* or more levels
                m.append("(?:|/.*)");
                i += 2;
            } else {
                m.appendCodePoint(cp);
            }
        } else if (cp == '?') {
            // ? matches anything but a directory separator
            m.append("[^/]");
        } else if (cp == '*') {
            if (mask.regionMatches(i, "**/", 0, 3)) {
                // The sequence **/ matches *zero* or more levels
                m.append("(?:|.*/)");
                i += 2;
            } else if (mask.regionMatches(i, "**", 0, 2)) {
                // **
                m.append(".*");
                i++;
            } else {
                // *
                m.append("[^/]*");
            }
        } else {
            m.append('\\').appendCodePoint(cp);
        }
    }
    return Pattern.compile(m.toString());
}

From source file:immf.Util.java

public static String encodeGoomojiSubject(String subject) throws UnsupportedEncodingException {
    final int maxlen = 75 - ("=?UTF-8?B?".length() + "?=".length());
    StringBuilder sb = new StringBuilder();

    int mark = 0;
    int utf8len = "X-Goomoji-Subject: ".length();
    for (int i = 0; i < subject.length();) {
        int cp = subject.codePointAt(i);
        int len;//from  www .  j av  a2 s  .c  om
        if (cp < 0x7f)
            len = 1;
        else if (cp <= 0x7ff)
            len = 2;
        else if (cp <= 0xffff)
            len = 3;
        else
            len = 4;

        if (4 * ((utf8len + len - 1) / 3 + 1) >= maxlen) {
            if (mark > 0)
                sb.append("\r\n ");
            sb.append(MimeUtility.encodeWord(subject.substring(mark, i), "UTF-8", "B"));
            mark = i;
            utf8len = 0;
        }

        utf8len += len;
        i += Character.charCount(cp);
    }
    if (mark > 0)
        sb.append("\r\n ");
    sb.append(MimeUtility.encodeWord(subject.substring(mark), "UTF-8", "B"));

    return sb.toString();
}

From source file:fr.ens.biologie.genomique.eoulsan.it.ITFactory.java

/**
 * Evaluate expression in a string./*from w  w w.jav  a  2 s  . co  m*/
 * @param s string in witch expression must be replaced
 * @param allowExec allow execution of code
 * @return a string with expression evaluated
 * @throws EoulsanException if an error occurs while parsing the string or
 *           executing an expression
 */
static String evaluateExpressions(final String s, final boolean allowExec) throws EoulsanException {

    if (s == null) {
        return null;
    }

    final StringBuilder result = new StringBuilder();

    final int len = s.length();

    for (int i = 0; i < len; i++) {

        final int c0 = s.codePointAt(i);

        // Variable substitution
        if (c0 == '$' && i + 1 < len) {

            final int c1 = s.codePointAt(i + 1);
            if (c1 == '{') {

                final String expr = subStr(s, i + 2, '}');

                final String trimmedExpr = expr.trim();
                if (CONSTANTS.containsKey(trimmedExpr)) {
                    result.append(CONSTANTS.get(trimmedExpr));
                }

                i += expr.length() + 2;
                continue;
            }
        }

        // Command substitution
        if (c0 == '`' && allowExec) {
            final String expr = subStr(s, i + 1, '`');
            try {
                final String r = ProcessUtils.execToString(evaluateExpressions(expr, false));

                // remove last '\n' in the result
                if (!r.isEmpty() && r.charAt(r.length() - 1) == '\n') {
                    result.append(r.substring(0, r.length() - 1));
                } else {
                    result.append(r);
                }

            } catch (final IOException e) {
                throw new EoulsanException("Error while evaluating expression \"" + expr + "\"", e);
            }
            i += expr.length() + 1;
            continue;
        }

        result.appendCodePoint(c0);
    }

    return result.toString();
}

From source file:txyd.util.StringUtils.java

/**
 * ?????//from   w  w w  .  ja  v a 2s .c  om
 * charjavachar16????16???????
 * <p>
 * ??U+0000U+10FFFFU+0000U+FFFFU+10000U+10FFFF
 * <p>
 * ????1216??UTF-16
 * ??????????U+D800U+DFFF
 * ???????
 *
 * @param sentence
 */
public static void printlnChar(String sentence) {
    //        String sentence = "\u03C0\uD835\uDD6B";//?
    //        String sentence = "";
    int lengthU = sentence.length();
    int lengthP = sentence.codePointCount(0, lengthU);
    //      System.out.println(lengthU);        // ??
    //      System.out.println(lengthP);        // ???
    if (lengthU != lengthP) {//
        for (int i = 0; i < lengthU; i++) {
            int codePoint = sentence.codePointAt(i);
            if (Character.isSupplementaryCodePoint(codePoint)) {
                System.out.println(String.valueOf(Character.toChars(codePoint)));
                i++;
            } else {
                System.out.println(sentence.charAt(i));
            }
        }
    }
}