Example usage for java.lang Character charCount

List of usage examples for java.lang Character charCount

Introduction

In this page you can find the example usage for java.lang Character charCount.

Prototype

public static int charCount(int codePoint) 

Source Link

Document

Determines the number of char values needed to represent the specified character (Unicode code point).

Usage

From source file:net.sf.ufsc.ServiceLoader.java

private int parseLine(Class<?> service, URL u, BufferedReader r, int lc, List<String> names)
        throws IOException, ServiceConfigurationError {
    String ln = r.readLine();/*from   w  w  w .  j  a  va 2  s  . c o m*/
    if (ln == null) {
        return -1;
    }
    int ci = ln.indexOf('#');
    if (ci >= 0)
        ln = ln.substring(0, ci);
    ln = ln.trim();
    int n = ln.length();
    if (n != 0) {
        if ((ln.indexOf(' ') >= 0) || (ln.indexOf('\t') >= 0))
            fail(service, u, lc, "Illegal configuration-file syntax");
        int cp = ln.codePointAt(0);
        if (!Character.isJavaIdentifierStart(cp))
            fail(service, u, lc, "Illegal provider-class name: " + ln);
        for (int i = Character.charCount(cp); i < n; i += Character.charCount(cp)) {
            cp = ln.codePointAt(i);
            if (!Character.isJavaIdentifierPart(cp) && (cp != '.'))
                fail(service, u, lc, "Illegal provider-class name: " + ln);
        }
        if (!providers.containsKey(ln) && !names.contains(ln))
            names.add(ln);
    }
    return lc + 1;
}

From source file:org.omegat.tokenizer.BaseTokenizer.java

private boolean acceptToken(String token, boolean filterDigits, boolean filterWhitespace) {
    if (StringUtil.isEmpty(token)) {
        return false;
    }//from ww w .  ja  va 2s .co m
    if (!filterDigits && !filterWhitespace) {
        return true;
    }
    boolean isWhitespaceOnly = true;
    for (int i = 0, cp; i < token.length(); i += Character.charCount(cp)) {
        cp = token.codePointAt(i);
        if (filterDigits && Character.isDigit(cp)) {
            return false;
        }
        if (filterWhitespace && !StringUtil.isWhiteSpace(cp)) {
            isWhitespaceOnly = false;
        }
    }
    return !(filterWhitespace && isWhitespaceOnly);
}

From source file:com.marklogic.mapreduce.utilities.InternalUtilities.java

public static String unparse(String s) {
    int len = s.length();
    StringBuilder buf = new StringBuilder(len * 2);
    for (int cp, i = 0; i < s.length(); i += Character.charCount(cp)) {
        cp = s.codePointAt(i);//from ww  w . ja v  a2  s  . c  o  m
        // iterate through the codepoints in the string
        if ((cp >= 0x20) && (cp < 0x80)) {
            switch (cp) {
            case '"':
                buf.append("&quot;");
                break;
            case '&':
                buf.append("&amp;");
                break;
            default:
                buf.append(s.charAt(i));
            }
        } else {
            buf.append("&#x");
            buf.append(Long.toString(cp, 16));
            buf.append(';');
        }
    }
    return buf.toString();
}

From source file:org.apache.poi.util.StringUtil.java

/**
 * Some strings may contain encoded characters of the unicode private use area.
 * Currently the characters of the symbol fonts are mapped to the corresponding
 * characters in the normal unicode range. 
 *
 * @param string the original string //from w w w . ja  va2 s  .com
 * @return the string with mapped characters
 * 
 * @see <a href="http://www.alanwood.net/unicode/private_use_area.html#symbol">Private Use Area (symbol)</a>
 * @see <a href="http://www.alanwood.net/demos/symbol.html">Symbol font - Unicode alternatives for Greek and special characters in HTML</a>
 */
public static String mapMsCodepointString(String string) {
    if (string == null || "".equals(string))
        return string;
    initMsCodepointMap();

    StringBuilder sb = new StringBuilder();
    final int length = string.length();
    for (int offset = 0; offset < length;) {
        Integer msCodepoint = string.codePointAt(offset);
        Integer uniCodepoint = msCodepointToUnicode.get(msCodepoint);
        sb.appendCodePoint(uniCodepoint == null ? msCodepoint : uniCodepoint);
        offset += Character.charCount(msCodepoint);
    }

    return sb.toString();
}

From source file:com.weibo.api.motan.core.extension.ExtensionLoader.java

private void parseLine(Class<T> type, URL url, String line, int lineNumber, List<String> names)
        throws IOException, ServiceConfigurationError {
    int ci = line.indexOf('#');

    if (ci >= 0) {
        line = line.substring(0, ci);/* w  ww. j  a  v  a  2 s .co  m*/
    }

    line = line.trim();

    if (line.length() <= 0) {
        return;
    }

    if ((line.indexOf(' ') >= 0) || (line.indexOf('\t') >= 0)) {
        failThrows(type, url, lineNumber, "Illegal spi configuration-file syntax");
    }

    int cp = line.codePointAt(0);
    if (!Character.isJavaIdentifierStart(cp)) {
        failThrows(type, url, lineNumber, "Illegal spi provider-class name: " + line);
    }

    for (int i = Character.charCount(cp); i < line.length(); i += Character.charCount(cp)) {
        cp = line.codePointAt(i);
        if (!Character.isJavaIdentifierPart(cp) && (cp != '.')) {
            failThrows(type, url, lineNumber, "Illegal spi provider-class name: " + line);
        }
    }

    if (!names.contains(line)) {
        names.add(line);
    }
}

From source file:org.apache.pdfbox.pdmodel.PDPageContentStream.java

/**
 * Shows the given text at the location specified by the current text matrix.
 *
 * @param text The Unicode text to show.
 * @throws IOException If an io exception occurs.
 *//*  w  w w  .j a  v  a 2 s.  c  o m*/
public void showText(String text) throws IOException {
    if (!inTextMode) {
        throw new IllegalStateException("Must call beginText() before showText()");
    }

    if (fontStack.isEmpty()) {
        throw new IllegalStateException("Must call setFont() before showText()");
    }

    PDFont font = fontStack.peek();

    // Unicode code points to keep when subsetting
    if (font.willBeSubset()) {
        for (int offset = 0; offset < text.length();) {
            int codePoint = text.codePointAt(offset);
            font.addToSubset(codePoint);
            offset += Character.charCount(codePoint);
        }
    }

    COSWriter.writeString(font.encode(text), output);
    write(" ");

    writeOperator("Tj");
}

From source file:org.languagetool.rules.spelling.hunspell.HunspellRule.java

protected String getSentenceTextWithoutUrlsAndImmunizedTokens(AnalyzedSentence sentence) {
    StringBuilder sb = new StringBuilder();
    AnalyzedTokenReadings[] sentenceTokens = getSentenceWithImmunization(sentence).getTokens();
    for (int i = 1; i < sentenceTokens.length; i++) {
        String token = sentenceTokens[i].getToken();
        if (sentenceTokens[i].isImmunized() || sentenceTokens[i].isIgnoredBySpeller() || isUrl(token)
                || isEMail(token) || isQuotedCompound(sentence, i, token)) {
            if (isQuotedCompound(sentence, i, token)) {
                sb.append(" ").append(token.substring(1));
            }/*from   w w  w.j av  a 2s.c o m*/
            // replace URLs and immunized tokens with whitespace to ignore them for spell checking:
            else if (token.length() < 20) {
                sb.append(WHITESPACE_ARRAY[token.length()]);
            } else {
                for (int j = 0; j < token.length(); j++) {
                    sb.append(' ');
                }
            }
        } else if (token.length() > 1 && token.codePointCount(0, token.length()) != token.length()) {
            // some symbols such as emojis () have a string length that equals 2 
            for (int charIndex = 0; charIndex < token.length();) {
                int unicodeCodePoint = token.codePointAt(charIndex);
                int increment = Character.charCount(unicodeCodePoint);
                if (increment == 1) {
                    sb.append(token.charAt(charIndex));
                } else {
                    sb.append("  ");
                }
                charIndex += increment;
            }
        } else {
            sb.append(token);
        }
    }
    return sb.toString();
}

From source file:ac.elements.parser.ExtendedFunctions.java

/**
 * //from   w  w w  . j  a v  a2s . c  om
 * This method ensures that the output String has only valid XML unicode
 * characters as specified by the
 * 
 * XML 1.0 standard. For reference, please see the
 * 
 * standard. This method will return an empty String if the input is null or
 * empty.
 * 
 * 
 * @author Donoiu Cristian, GPL
 * 
 * @param The
 *            String whose non-valid characters we want to remove.
 * 
 * @return The in String, stripped of non-valid characters.
 * @author 
 *         http://cse-mjmcl.cse.bris.ac.uk/blog/2007/02/14/1171465494443.html
 */
public static String stripNonValidXML(String s) {
    // Used to hold the output.
    StringBuilder out = new StringBuilder();

    // Used to reference the current character.
    int codePoint;

    // This is actualy one unicode character,
    // represented by two code units!!!.
    // String ss = "\ud801\udc00";
    // System.out.println(ss.codePointCount(0, ss.length()));// See: 1

    int i = 0;

    while (i < s.length()) {

        // System.out.println("i=" + i);

        // This is the unicode code of the character.
        codePoint = s.codePointAt(i);

        // Consider testing larger ranges first to improve speed.
        if ((codePoint == 0x9) || (codePoint == 0xA) || (codePoint == 0xD)
                || ((codePoint >= 0x20) && (codePoint <= 0xD7FF))
                || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD))
                || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) {

            out.append(Character.toChars(codePoint));

        }

        // Increment with the number of code units(java chars) needed to
        // represent a Unicode char.
        i += Character.charCount(codePoint);

    }

    return out.toString();

}

From source file:org.omegat.util.FileUtil.java

static Pattern compileFileMask(String mask) {
    StringBuilder m = new StringBuilder();
    // "Relative" masks can match at any directory level
    if (!mask.startsWith("/")) {
        mask = "**/" + mask;
    }//from  ww w . ja va 2  s.  com
    // Masks ending with a slash match everything in subtree
    if (mask.endsWith("/")) {
        mask += "**";
    }
    for (int cp, i = 0; i < mask.length(); i += Character.charCount(cp)) {
        cp = mask.codePointAt(i);
        if (cp >= 'A' && cp <= 'Z') {
            m.appendCodePoint(cp);
        } else if (cp >= 'a' && cp <= 'z') {
            m.appendCodePoint(cp);
        } else if (cp >= '0' && cp <= '9') {
            m.appendCodePoint(cp);
        } else if (cp == '/') {
            if (mask.regionMatches(i, "/**/", 0, 4)) {
                // The sequence /**/ matches *zero* or more levels
                m.append("(?:/|/.*/)");
                i += 3;
            } else if (mask.regionMatches(i, "/**", 0, 3)) {
                // The sequence /** matches *zero* or more levels
                m.append("(?:|/.*)");
                i += 2;
            } else {
                m.appendCodePoint(cp);
            }
        } else if (cp == '?') {
            // ? matches anything but a directory separator
            m.append("[^/]");
        } else if (cp == '*') {
            if (mask.regionMatches(i, "**/", 0, 3)) {
                // The sequence **/ matches *zero* or more levels
                m.append("(?:|.*/)");
                i += 2;
            } else if (mask.regionMatches(i, "**", 0, 2)) {
                // **
                m.append(".*");
                i++;
            } else {
                // *
                m.append("[^/]*");
            }
        } else {
            m.append('\\').appendCodePoint(cp);
        }
    }
    return Pattern.compile(m.toString());
}

From source file:immf.Util.java

public static String encodeGoomojiSubject(String subject) throws UnsupportedEncodingException {
    final int maxlen = 75 - ("=?UTF-8?B?".length() + "?=".length());
    StringBuilder sb = new StringBuilder();

    int mark = 0;
    int utf8len = "X-Goomoji-Subject: ".length();
    for (int i = 0; i < subject.length();) {
        int cp = subject.codePointAt(i);
        int len;/* www .  j  av a2 s.c  om*/
        if (cp < 0x7f)
            len = 1;
        else if (cp <= 0x7ff)
            len = 2;
        else if (cp <= 0xffff)
            len = 3;
        else
            len = 4;

        if (4 * ((utf8len + len - 1) / 3 + 1) >= maxlen) {
            if (mark > 0)
                sb.append("\r\n ");
            sb.append(MimeUtility.encodeWord(subject.substring(mark, i), "UTF-8", "B"));
            mark = i;
            utf8len = 0;
        }

        utf8len += len;
        i += Character.charCount(cp);
    }
    if (mark > 0)
        sb.append("\r\n ");
    sb.append(MimeUtility.encodeWord(subject.substring(mark), "UTF-8", "B"));

    return sb.toString();
}