List of usage examples for java.lang String codePointAt
public int codePointAt(int index)
From source file:net.sourceforge.jaulp.string.StringUtils.java
/** * Converts all characters from the given String to unicodes characters encoded like \uxxxx. * * @param toUnicode//from w w w .j a v a2 s . c o m * The String to convert. * @param toLowerCase * If true the letters from the unicode characters are lower case. * @return The converted String. */ public static String toUnicode(final String toUnicode, final boolean toLowerCase) { final StringBuilder sb = new StringBuilder(); for (int i = 0; i < toUnicode.length(); i++) { final String hex = Integer.toHexString(toUnicode.codePointAt(i)); if (toLowerCase) { hex.toLowerCase(); } else { hex.toUpperCase(); } final String hexWithZeros = "0000" + hex; final String hexCodeWithLeadingZeros = hexWithZeros.substring(hexWithZeros.length() - 4); sb.append("\\u" + hexCodeWithLeadingZeros); } return sb.toString(); }
From source file:de.alpharogroup.string.StringExtensions.java
/** * Converts all characters from the given String to unicodes characters encoded like \uxxxx. * * @param toUnicode// w ww . ja v a2s .co m * The String to convert. * @param toLowerCase * If true the letters from the unicode characters are lower case. * @return The converted String. */ public static String toUnicode(final String toUnicode, final boolean toLowerCase) { final StringBuilder sb = new StringBuilder(); for (int i = 0; i < toUnicode.length(); i++) { String hex = Integer.toHexString(toUnicode.codePointAt(i)); if (toLowerCase) { hex = hex.toLowerCase(); } else { hex = hex.toUpperCase(); } final String hexWithZeros = "0000" + hex; final String hexCodeWithLeadingZeros = hexWithZeros.substring(hexWithZeros.length() - 4); sb.append("\\u" + hexCodeWithLeadingZeros); } return sb.toString(); }
From source file:org.exoplatform.cms.common.CommonUtils.java
/** * Encode special character to html number. Ex: '/' --> / * @param String s, the string input/* w ww . ja v a2 s . c o m*/ * @param String charIgnore, the string content ignore some special character can not encode. * @param boolean isTitle, the boolean for check convert is title or not. * @return String */ public static String encodeSpecialCharToHTMLnumber(String s, String charIgnore, boolean isTitle) { if (isEmpty(s)) { return EMPTY_STR; } int i = 0; StringBuilder builder = new StringBuilder(); while (i < s.length()) { char c = s.charAt(i); if (charIgnore.indexOf(String.valueOf(c)) >= 0) { builder.append(c); } else { int t = s.codePointAt(i); if (t < CHAR_CODES[0] && t > CHAR_CODES[1] || t < CHAR_CODES[2] && t > CHAR_CODES[3] || t < CHAR_CODES[4] && t > CHAR_CODES[5] || t < CHAR_CODES[6] && t > CHAR_CODES[7]) { if (isTitle && (t == 60 || t == 62)) { if (t == 60) { builder.append(LESS_THAN); } else if (t == 62) { builder.append(GREATER_THAN); } } else { builder.append(AMP_NUMBER).append(t).append(SEMICOLON); } } else { builder.append(c); } } ++i; } return builder.toString(); }
From source file:org.archive.modules.fetcher.FetchHTTPRequest.java
/** * Returns a copy of the string with non-ascii characters replaced by their * html numeric character reference in decimal (e.g. &#12345;). * //from w ww . j a va 2 s. c o m * <p> * The purpose of this is to produce a multipart/formdata submission that * any server should be able to handle, based on experiments using a modern * browser (chromium 47.0.2526.106 for mac). What chromium posts depends on * what it considers the character encoding of the page containing the form, * and maybe other factors. It would be too complicated to try to simulate * that behavior in heritrix. * * <p> * Instead what we do is approximately what the browser does when the form * page is plain ascii. It html-escapes characters outside of the * latin1/cp1252 range. Characters in the U+0080-U+00FF range are encoded in * latin1/cp1252. That is the one way that we differ from chromium. We * html-escape those characters (U+0080-U+00FF) as well. That way the http * post is plain ascii, and should work regardless of which encoding the * server expects. * * <p> * N.b. chromium doesn't indicate the encoding of the request in any way (no * charset in the content-type or anything like that). Also of note is that * when it considers the form page to be utf-8, it submits in utf-8. That's * part of the complicated behavior we don't want to try to simulate. */ public static String escapeForMultipart(String str) { StringBuilder buf = new StringBuilder(); for (int i = 0; i < str.length();) { int codepoint = str.codePointAt(i); if (codepoint <= 0x7f) { buf.appendCodePoint(codepoint); } else { buf.append("&#" + codepoint + ";"); } i += Character.charCount(codepoint); } return buf.toString(); }
From source file:org.apache.stanbol.enhancer.engines.opennlp.impl.NEREngineCore.java
/** * Remove non UTF-8 compliant characters (typically control characters) so has to avoid polluting the * annotation graph with snippets that are not serializable as XML. *//*from w w w . j a va 2 s. c o m*/ protected static String removeNonUtf8CompliantCharacters(final String text) { if (null == text) { return null; } StringBuilder sb = null; //initialised on the first replacement for (int i = 0; i < text.length(); i++) { int ch = text.codePointAt(i); // remove any characters outside the valid UTF-8 range as well as all control characters // except tabs and new lines //NOTE: rewesten (2012-11-21) replaced the original check with the one // found at http://blog.mark-mclaren.info/2007/02/invalid-xml-characters-when-valid-utf8_5873.html if (!((ch == 0x9) || (ch == 0xA) || (ch == 0xD) || ((ch >= 0x20) && (ch <= 0xD7FF)) || ((ch >= 0xE000) && (ch <= 0xFFFD)) || ((ch >= 0x10000) && (ch <= 0x10FFFF)))) { if (sb == null) { sb = new StringBuilder(text); } sb.setCharAt(i, ' '); } } return sb == null ? text : sb.toString(); }
From source file:org.apache.poi.util.StringUtil.java
/** * Some strings may contain encoded characters of the unicode private use area. * Currently the characters of the symbol fonts are mapped to the corresponding * characters in the normal unicode range. * * @param string the original string //from w w w .j a v a 2s. co m * @return the string with mapped characters * * @see <a href="http://www.alanwood.net/unicode/private_use_area.html#symbol">Private Use Area (symbol)</a> * @see <a href="http://www.alanwood.net/demos/symbol.html">Symbol font - Unicode alternatives for Greek and special characters in HTML</a> */ public static String mapMsCodepointString(String string) { if (string == null || "".equals(string)) return string; initMsCodepointMap(); StringBuilder sb = new StringBuilder(); final int length = string.length(); for (int offset = 0; offset < length;) { Integer msCodepoint = string.codePointAt(offset); Integer uniCodepoint = msCodepointToUnicode.get(msCodepoint); sb.appendCodePoint(uniCodepoint == null ? msCodepoint : uniCodepoint); offset += Character.charCount(msCodepoint); } return sb.toString(); }
From source file:org.omegat.util.FileUtil.java
static Pattern compileFileMask(String mask) { StringBuilder m = new StringBuilder(); // "Relative" masks can match at any directory level if (!mask.startsWith("/")) { mask = "**/" + mask; }/*from ww w . ja va 2 s . c om*/ // Masks ending with a slash match everything in subtree if (mask.endsWith("/")) { mask += "**"; } for (int cp, i = 0; i < mask.length(); i += Character.charCount(cp)) { cp = mask.codePointAt(i); if (cp >= 'A' && cp <= 'Z') { m.appendCodePoint(cp); } else if (cp >= 'a' && cp <= 'z') { m.appendCodePoint(cp); } else if (cp >= '0' && cp <= '9') { m.appendCodePoint(cp); } else if (cp == '/') { if (mask.regionMatches(i, "/**/", 0, 4)) { // The sequence /**/ matches *zero* or more levels m.append("(?:/|/.*/)"); i += 3; } else if (mask.regionMatches(i, "/**", 0, 3)) { // The sequence /** matches *zero* or more levels m.append("(?:|/.*)"); i += 2; } else { m.appendCodePoint(cp); } } else if (cp == '?') { // ? matches anything but a directory separator m.append("[^/]"); } else if (cp == '*') { if (mask.regionMatches(i, "**/", 0, 3)) { // The sequence **/ matches *zero* or more levels m.append("(?:|.*/)"); i += 2; } else if (mask.regionMatches(i, "**", 0, 2)) { // ** m.append(".*"); i++; } else { // * m.append("[^/]*"); } } else { m.append('\\').appendCodePoint(cp); } } return Pattern.compile(m.toString()); }
From source file:immf.Util.java
public static String encodeGoomojiSubject(String subject) throws UnsupportedEncodingException { final int maxlen = 75 - ("=?UTF-8?B?".length() + "?=".length()); StringBuilder sb = new StringBuilder(); int mark = 0; int utf8len = "X-Goomoji-Subject: ".length(); for (int i = 0; i < subject.length();) { int cp = subject.codePointAt(i); int len;//from www . j av a2 s .c om if (cp < 0x7f) len = 1; else if (cp <= 0x7ff) len = 2; else if (cp <= 0xffff) len = 3; else len = 4; if (4 * ((utf8len + len - 1) / 3 + 1) >= maxlen) { if (mark > 0) sb.append("\r\n "); sb.append(MimeUtility.encodeWord(subject.substring(mark, i), "UTF-8", "B")); mark = i; utf8len = 0; } utf8len += len; i += Character.charCount(cp); } if (mark > 0) sb.append("\r\n "); sb.append(MimeUtility.encodeWord(subject.substring(mark), "UTF-8", "B")); return sb.toString(); }
From source file:fr.ens.biologie.genomique.eoulsan.it.ITFactory.java
/** * Evaluate expression in a string./*from w w w.jav a 2 s . co m*/ * @param s string in witch expression must be replaced * @param allowExec allow execution of code * @return a string with expression evaluated * @throws EoulsanException if an error occurs while parsing the string or * executing an expression */ static String evaluateExpressions(final String s, final boolean allowExec) throws EoulsanException { if (s == null) { return null; } final StringBuilder result = new StringBuilder(); final int len = s.length(); for (int i = 0; i < len; i++) { final int c0 = s.codePointAt(i); // Variable substitution if (c0 == '$' && i + 1 < len) { final int c1 = s.codePointAt(i + 1); if (c1 == '{') { final String expr = subStr(s, i + 2, '}'); final String trimmedExpr = expr.trim(); if (CONSTANTS.containsKey(trimmedExpr)) { result.append(CONSTANTS.get(trimmedExpr)); } i += expr.length() + 2; continue; } } // Command substitution if (c0 == '`' && allowExec) { final String expr = subStr(s, i + 1, '`'); try { final String r = ProcessUtils.execToString(evaluateExpressions(expr, false)); // remove last '\n' in the result if (!r.isEmpty() && r.charAt(r.length() - 1) == '\n') { result.append(r.substring(0, r.length() - 1)); } else { result.append(r); } } catch (final IOException e) { throw new EoulsanException("Error while evaluating expression \"" + expr + "\"", e); } i += expr.length() + 1; continue; } result.appendCodePoint(c0); } return result.toString(); }
From source file:txyd.util.StringUtils.java
/** * ?????//from w w w . ja v a 2s .c om * charjavachar16????16??????? * <p> * ??U+0000U+10FFFFU+0000U+FFFFU+10000U+10FFFF * <p> * ????1216??UTF-16 * ??????????U+D800U+DFFF * ??????? * * @param sentence */ public static void printlnChar(String sentence) { // String sentence = "\u03C0\uD835\uDD6B";//? // String sentence = ""; int lengthU = sentence.length(); int lengthP = sentence.codePointCount(0, lengthU); // System.out.println(lengthU); // ?? // System.out.println(lengthP); // ??? if (lengthU != lengthP) {// for (int i = 0; i < lengthU; i++) { int codePoint = sentence.codePointAt(i); if (Character.isSupplementaryCodePoint(codePoint)) { System.out.println(String.valueOf(Character.toChars(codePoint))); i++; } else { System.out.println(sentence.charAt(i)); } } } }