List of usage examples for java.lang Character charCount
public static int charCount(int codePoint)
From source file:net.sf.ufsc.ServiceLoader.java
private int parseLine(Class<?> service, URL u, BufferedReader r, int lc, List<String> names) throws IOException, ServiceConfigurationError { String ln = r.readLine();/*from w w w . j a va 2 s . c o m*/ if (ln == null) { return -1; } int ci = ln.indexOf('#'); if (ci >= 0) ln = ln.substring(0, ci); ln = ln.trim(); int n = ln.length(); if (n != 0) { if ((ln.indexOf(' ') >= 0) || (ln.indexOf('\t') >= 0)) fail(service, u, lc, "Illegal configuration-file syntax"); int cp = ln.codePointAt(0); if (!Character.isJavaIdentifierStart(cp)) fail(service, u, lc, "Illegal provider-class name: " + ln); for (int i = Character.charCount(cp); i < n; i += Character.charCount(cp)) { cp = ln.codePointAt(i); if (!Character.isJavaIdentifierPart(cp) && (cp != '.')) fail(service, u, lc, "Illegal provider-class name: " + ln); } if (!providers.containsKey(ln) && !names.contains(ln)) names.add(ln); } return lc + 1; }
From source file:org.omegat.tokenizer.BaseTokenizer.java
private boolean acceptToken(String token, boolean filterDigits, boolean filterWhitespace) { if (StringUtil.isEmpty(token)) { return false; }//from ww w . ja va 2s .co m if (!filterDigits && !filterWhitespace) { return true; } boolean isWhitespaceOnly = true; for (int i = 0, cp; i < token.length(); i += Character.charCount(cp)) { cp = token.codePointAt(i); if (filterDigits && Character.isDigit(cp)) { return false; } if (filterWhitespace && !StringUtil.isWhiteSpace(cp)) { isWhitespaceOnly = false; } } return !(filterWhitespace && isWhitespaceOnly); }
From source file:com.marklogic.mapreduce.utilities.InternalUtilities.java
public static String unparse(String s) { int len = s.length(); StringBuilder buf = new StringBuilder(len * 2); for (int cp, i = 0; i < s.length(); i += Character.charCount(cp)) { cp = s.codePointAt(i);//from ww w . ja v a2 s . c o m // iterate through the codepoints in the string if ((cp >= 0x20) && (cp < 0x80)) { switch (cp) { case '"': buf.append("""); break; case '&': buf.append("&"); break; default: buf.append(s.charAt(i)); } } else { buf.append("&#x"); buf.append(Long.toString(cp, 16)); buf.append(';'); } } return buf.toString(); }
From source file:org.apache.poi.util.StringUtil.java
/** * Some strings may contain encoded characters of the unicode private use area. * Currently the characters of the symbol fonts are mapped to the corresponding * characters in the normal unicode range. * * @param string the original string //from w w w . ja va2 s .com * @return the string with mapped characters * * @see <a href="http://www.alanwood.net/unicode/private_use_area.html#symbol">Private Use Area (symbol)</a> * @see <a href="http://www.alanwood.net/demos/symbol.html">Symbol font - Unicode alternatives for Greek and special characters in HTML</a> */ public static String mapMsCodepointString(String string) { if (string == null || "".equals(string)) return string; initMsCodepointMap(); StringBuilder sb = new StringBuilder(); final int length = string.length(); for (int offset = 0; offset < length;) { Integer msCodepoint = string.codePointAt(offset); Integer uniCodepoint = msCodepointToUnicode.get(msCodepoint); sb.appendCodePoint(uniCodepoint == null ? msCodepoint : uniCodepoint); offset += Character.charCount(msCodepoint); } return sb.toString(); }
From source file:com.weibo.api.motan.core.extension.ExtensionLoader.java
private void parseLine(Class<T> type, URL url, String line, int lineNumber, List<String> names) throws IOException, ServiceConfigurationError { int ci = line.indexOf('#'); if (ci >= 0) { line = line.substring(0, ci);/* w ww. j a v a 2 s .co m*/ } line = line.trim(); if (line.length() <= 0) { return; } if ((line.indexOf(' ') >= 0) || (line.indexOf('\t') >= 0)) { failThrows(type, url, lineNumber, "Illegal spi configuration-file syntax"); } int cp = line.codePointAt(0); if (!Character.isJavaIdentifierStart(cp)) { failThrows(type, url, lineNumber, "Illegal spi provider-class name: " + line); } for (int i = Character.charCount(cp); i < line.length(); i += Character.charCount(cp)) { cp = line.codePointAt(i); if (!Character.isJavaIdentifierPart(cp) && (cp != '.')) { failThrows(type, url, lineNumber, "Illegal spi provider-class name: " + line); } } if (!names.contains(line)) { names.add(line); } }
From source file:org.apache.pdfbox.pdmodel.PDPageContentStream.java
/** * Shows the given text at the location specified by the current text matrix. * * @param text The Unicode text to show. * @throws IOException If an io exception occurs. *//* w w w .j a v a 2 s. c o m*/ public void showText(String text) throws IOException { if (!inTextMode) { throw new IllegalStateException("Must call beginText() before showText()"); } if (fontStack.isEmpty()) { throw new IllegalStateException("Must call setFont() before showText()"); } PDFont font = fontStack.peek(); // Unicode code points to keep when subsetting if (font.willBeSubset()) { for (int offset = 0; offset < text.length();) { int codePoint = text.codePointAt(offset); font.addToSubset(codePoint); offset += Character.charCount(codePoint); } } COSWriter.writeString(font.encode(text), output); write(" "); writeOperator("Tj"); }
From source file:org.languagetool.rules.spelling.hunspell.HunspellRule.java
protected String getSentenceTextWithoutUrlsAndImmunizedTokens(AnalyzedSentence sentence) { StringBuilder sb = new StringBuilder(); AnalyzedTokenReadings[] sentenceTokens = getSentenceWithImmunization(sentence).getTokens(); for (int i = 1; i < sentenceTokens.length; i++) { String token = sentenceTokens[i].getToken(); if (sentenceTokens[i].isImmunized() || sentenceTokens[i].isIgnoredBySpeller() || isUrl(token) || isEMail(token) || isQuotedCompound(sentence, i, token)) { if (isQuotedCompound(sentence, i, token)) { sb.append(" ").append(token.substring(1)); }/*from w w w.j av a 2s.c o m*/ // replace URLs and immunized tokens with whitespace to ignore them for spell checking: else if (token.length() < 20) { sb.append(WHITESPACE_ARRAY[token.length()]); } else { for (int j = 0; j < token.length(); j++) { sb.append(' '); } } } else if (token.length() > 1 && token.codePointCount(0, token.length()) != token.length()) { // some symbols such as emojis () have a string length that equals 2 for (int charIndex = 0; charIndex < token.length();) { int unicodeCodePoint = token.codePointAt(charIndex); int increment = Character.charCount(unicodeCodePoint); if (increment == 1) { sb.append(token.charAt(charIndex)); } else { sb.append(" "); } charIndex += increment; } } else { sb.append(token); } } return sb.toString(); }
From source file:ac.elements.parser.ExtendedFunctions.java
/** * //from w w w . j a v a2s . c om * This method ensures that the output String has only valid XML unicode * characters as specified by the * * XML 1.0 standard. For reference, please see the * * standard. This method will return an empty String if the input is null or * empty. * * * @author Donoiu Cristian, GPL * * @param The * String whose non-valid characters we want to remove. * * @return The in String, stripped of non-valid characters. * @author * http://cse-mjmcl.cse.bris.ac.uk/blog/2007/02/14/1171465494443.html */ public static String stripNonValidXML(String s) { // Used to hold the output. StringBuilder out = new StringBuilder(); // Used to reference the current character. int codePoint; // This is actualy one unicode character, // represented by two code units!!!. // String ss = "\ud801\udc00"; // System.out.println(ss.codePointCount(0, ss.length()));// See: 1 int i = 0; while (i < s.length()) { // System.out.println("i=" + i); // This is the unicode code of the character. codePoint = s.codePointAt(i); // Consider testing larger ranges first to improve speed. if ((codePoint == 0x9) || (codePoint == 0xA) || (codePoint == 0xD) || ((codePoint >= 0x20) && (codePoint <= 0xD7FF)) || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD)) || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) { out.append(Character.toChars(codePoint)); } // Increment with the number of code units(java chars) needed to // represent a Unicode char. i += Character.charCount(codePoint); } return out.toString(); }
From source file:org.omegat.util.FileUtil.java
static Pattern compileFileMask(String mask) { StringBuilder m = new StringBuilder(); // "Relative" masks can match at any directory level if (!mask.startsWith("/")) { mask = "**/" + mask; }//from ww w . ja va 2 s. com // Masks ending with a slash match everything in subtree if (mask.endsWith("/")) { mask += "**"; } for (int cp, i = 0; i < mask.length(); i += Character.charCount(cp)) { cp = mask.codePointAt(i); if (cp >= 'A' && cp <= 'Z') { m.appendCodePoint(cp); } else if (cp >= 'a' && cp <= 'z') { m.appendCodePoint(cp); } else if (cp >= '0' && cp <= '9') { m.appendCodePoint(cp); } else if (cp == '/') { if (mask.regionMatches(i, "/**/", 0, 4)) { // The sequence /**/ matches *zero* or more levels m.append("(?:/|/.*/)"); i += 3; } else if (mask.regionMatches(i, "/**", 0, 3)) { // The sequence /** matches *zero* or more levels m.append("(?:|/.*)"); i += 2; } else { m.appendCodePoint(cp); } } else if (cp == '?') { // ? matches anything but a directory separator m.append("[^/]"); } else if (cp == '*') { if (mask.regionMatches(i, "**/", 0, 3)) { // The sequence **/ matches *zero* or more levels m.append("(?:|.*/)"); i += 2; } else if (mask.regionMatches(i, "**", 0, 2)) { // ** m.append(".*"); i++; } else { // * m.append("[^/]*"); } } else { m.append('\\').appendCodePoint(cp); } } return Pattern.compile(m.toString()); }
From source file:immf.Util.java
public static String encodeGoomojiSubject(String subject) throws UnsupportedEncodingException { final int maxlen = 75 - ("=?UTF-8?B?".length() + "?=".length()); StringBuilder sb = new StringBuilder(); int mark = 0; int utf8len = "X-Goomoji-Subject: ".length(); for (int i = 0; i < subject.length();) { int cp = subject.codePointAt(i); int len;/* www . j av a2 s.c om*/ if (cp < 0x7f) len = 1; else if (cp <= 0x7ff) len = 2; else if (cp <= 0xffff) len = 3; else len = 4; if (4 * ((utf8len + len - 1) / 3 + 1) >= maxlen) { if (mark > 0) sb.append("\r\n "); sb.append(MimeUtility.encodeWord(subject.substring(mark, i), "UTF-8", "B")); mark = i; utf8len = 0; } utf8len += len; i += Character.charCount(cp); } if (mark > 0) sb.append("\r\n "); sb.append(MimeUtility.encodeWord(subject.substring(mark), "UTF-8", "B")); return sb.toString(); }