List of usage examples for java.lang StringBuilder appendCodePoint
@Override public StringBuilder appendCodePoint(int codePoint)
From source file:com.pfarrell.utils.misc.TextTools.java
/** * make a string with no whitespace at all * @param arg string/*from www .ja v a 2s. c o m*/ * @return string with no whitespace */ public static String justLettersOrDigits(String arg) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < arg.length(); i++) { int c = arg.codePointAt(i); if (Character.isLetterOrDigit(c)) { sb.appendCodePoint(c); } } return sb.toString(); }
From source file:Main.java
public static String stripNonValidXMLCharacters(String text) { if (text == null || ("".equals(text))) { return ""; }//from w ww . j a v a 2 s . c o m StringBuilder sb = new StringBuilder(); for (int i = 0; i < text.length(); i++) { int codePoint = text.codePointAt(i); if (codePoint > 0xFFFF) { i++; } if ((codePoint == 0x9) || (codePoint == 0xA) || (codePoint == 0xD) || ((codePoint >= 0x20) && (codePoint <= 0xD7FF)) || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD)) || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) { sb.appendCodePoint(codePoint); } } return sb.toString(); }
From source file:org.marketcetera.util.misc.RandomStrings.java
/** * Returns a generated string of the given length whose characters * are all Unicode code points that meet the constraints of the * given filter.//from w w w . j a va 2s .c om * * @param filter The filter. * @param len The length (in code points). * * @return The string. */ public static String genStr(UCPFilter filter, int len) { int[] ucps = UCPFilterInfo.getInfo(filter).getUCPs(); StringBuilder builder = new StringBuilder(); for (int i = 0; i < len; i++) { builder.appendCodePoint(ucps[sGenerator.nextInt(ucps.length)]); } return builder.toString(); }
From source file:jp.furplag.util.commons.StringUtils.java
/** * returns a new string that is a substring of this string. The substring begins at the specified <code>beginIndex</code> and extends to the character at index <code>endIndex - 1</code>. Thus the length of the substring is <code>endIndex-beginIndex</code>. * * <pre>//from ww w .java 2 s . c o m * StringUtils.substringUCL(null, *, *) = "" * StringUtils.substringUCL("", * , *) = "" * StringUtils.substringUCL("abc", 0, 2) = "ab" * StringUtils.substringUCL("abc", 2, 0) = "" * StringUtils.substringUCL("abc", 2, 4) = "c" * StringUtils.substringUCL("abc", 4, 6) = "" * StringUtils.substringUCL("abc", 2, 2) = "" * StringUtils.substringUCL("abc", -2, -1) = "b" * StringUtils.substringUCL("abc", -4, 2) = "ab" * </pre> * * @param str the string to get the substring from, may be null. * @param beginIndex the position to start from, negative means count back from the end of the String by this many characters. * @param endIndex the position to end at (exclusive), negative means count back from the end of the String by this many characters. * @return substring from start position to end position, return empty if null. */ public static String substringUCL(final String str, final int beginIndex, final int endIndex) { int[] codePoints = getCodePoints(defaultString(str)); int begin = (beginIndex < 0 ? codePoints.length : 0) + beginIndex; if (begin < 0) begin = 0; int end = (endIndex < 0 ? codePoints.length : 0) + endIndex; if (end > codePoints.length) end = codePoints.length; if (end < 0) end = 0; if (begin > end) return EMPTY; StringBuilder sb = new StringBuilder(); for (int codePoint : Arrays.copyOfRange(codePoints, begin, end)) { sb.appendCodePoint(codePoint); } return sb.toString(); }
From source file:Main.java
/** * Escapes a character sequence so that it is valid XML. * //from ww w.j a v a 2s . c o m * @param s * The character sequence. * @return The escaped version of the character sequence. */ public static String escapeXML(CharSequence s) { // double quote -- quot // ampersand -- amp // less than -- lt // greater than -- gt // apostrophe -- apos StringBuilder sb = new StringBuilder(s.length() * 2); for (int i = 0; i < s.length();) { int codePoint = Character.codePointAt(s, i); if (codePoint == '<') { sb.append(LT); } else if (codePoint == '>') { sb.append(GT); } else if (codePoint == '\"') { sb.append(QUOT); } else if (codePoint == '&') { sb.append(AMP); } else if (codePoint == '\'') { sb.append(APOS); } else { sb.appendCodePoint(codePoint); } i += Character.charCount(codePoint); } return sb.toString(); }
From source file:Main.java
/** * Generates a random word./*from w ww.j a va 2s . c o m*/ */ public static String generateWord(final Random random, final int[] codePointSet) { StringBuilder builder = new StringBuilder(); // 8 * 4 = 32 chars max, but we do it the following way so as to bias the random toward // longer words. This should be closer to natural language, and more importantly, it will // exercise the algorithms in dicttool much more. final int count = 1 + (Math.abs(random.nextInt()) % 5) + (Math.abs(random.nextInt()) % 5) + (Math.abs(random.nextInt()) % 5) + (Math.abs(random.nextInt()) % 5) + (Math.abs(random.nextInt()) % 5) + (Math.abs(random.nextInt()) % 5) + (Math.abs(random.nextInt()) % 5) + (Math.abs(random.nextInt()) % 5); while (builder.length() < count) { builder.appendCodePoint(codePointSet[Math.abs(random.nextInt()) % codePointSet.length]); } return builder.toString(); }
From source file:Main.java
/** * Generates a random word.//from w ww .java 2 s . c o m */ @Nonnull public static String generateWord(@Nonnull final Random random, @Nonnull final int[] codePointSet) { final StringBuilder builder = new StringBuilder(); // 8 * 4 = 32 chars max, but we do it the following way so as to bias the random toward // longer words. This should be closer to natural language, and more importantly, it will // exercise the algorithms in dicttool much more. final int count = 1 + (Math.abs(random.nextInt()) % 5) + (Math.abs(random.nextInt()) % 5) + (Math.abs(random.nextInt()) % 5) + (Math.abs(random.nextInt()) % 5) + (Math.abs(random.nextInt()) % 5) + (Math.abs(random.nextInt()) % 5) + (Math.abs(random.nextInt()) % 5) + (Math.abs(random.nextInt()) % 5); while (builder.length() < count) { builder.appendCodePoint(codePointSet[Math.abs(random.nextInt()) % codePointSet.length]); } return builder.toString(); }
From source file:net.antidot.sql.model.core.SQLConnector.java
/** * Update a database, connected with c, with requests in SQL file. * /*from www . ja v a 2 s . c om*/ * @param c * @param pathToSQLFile * @throws SQLException */ public static void updateDatabase(Connection c, String pathToSQLFile) throws SQLException { log.debug("[SQLConnector:updateDatabase] pathToSQLFile = " + pathToSQLFile); StringBuilder sb = new StringBuilder(); try { FileReader fr = new FileReader(new File(pathToSQLFile)); // be sure to not have line starting with "--" or "/*" or any other // non aplhabetical character BufferedReader br = new BufferedReader(fr); int s = -1; while ((s = br.read()) != -1) { sb.appendCodePoint(s); } br.close(); // here is our splitter ! We use ";" as a delimiter for each request // then we are sure to have well formed statements String[] inst = sb.toString().split(";"); Statement st = c.createStatement(); for (int i = 0; i < inst.length; i++) { // we ensure that there is no spaces before or after the request // string // in order to not execute empty statements if (!inst[i].trim().equals("")) { log.debug("[SQLConnector:updateDatabase] >> " + inst[i]); st.executeUpdate(inst[i]); } } } catch (Exception e) { e.printStackTrace(); } }
From source file:org.apache.poi.util.StringUtil.java
/** * Some strings may contain encoded characters of the unicode private use area. * Currently the characters of the symbol fonts are mapped to the corresponding * characters in the normal unicode range. * * @param string the original string //from w w w . ja va 2 s. c om * @return the string with mapped characters * * @see <a href="http://www.alanwood.net/unicode/private_use_area.html#symbol">Private Use Area (symbol)</a> * @see <a href="http://www.alanwood.net/demos/symbol.html">Symbol font - Unicode alternatives for Greek and special characters in HTML</a> */ public static String mapMsCodepointString(String string) { if (string == null || "".equals(string)) return string; initMsCodepointMap(); StringBuilder sb = new StringBuilder(); final int length = string.length(); for (int offset = 0; offset < length;) { Integer msCodepoint = string.codePointAt(offset); Integer uniCodepoint = msCodepointToUnicode.get(msCodepoint); sb.appendCodePoint(uniCodepoint == null ? msCodepoint : uniCodepoint); offset += Character.charCount(msCodepoint); } return sb.toString(); }
From source file:org.archive.modules.fetcher.FetchHTTPRequest.java
/** * Returns a copy of the string with non-ascii characters replaced by their * html numeric character reference in decimal (e.g. &#12345;). * //from ww w . j av a2 s .c om * <p> * The purpose of this is to produce a multipart/formdata submission that * any server should be able to handle, based on experiments using a modern * browser (chromium 47.0.2526.106 for mac). What chromium posts depends on * what it considers the character encoding of the page containing the form, * and maybe other factors. It would be too complicated to try to simulate * that behavior in heritrix. * * <p> * Instead what we do is approximately what the browser does when the form * page is plain ascii. It html-escapes characters outside of the * latin1/cp1252 range. Characters in the U+0080-U+00FF range are encoded in * latin1/cp1252. That is the one way that we differ from chromium. We * html-escape those characters (U+0080-U+00FF) as well. That way the http * post is plain ascii, and should work regardless of which encoding the * server expects. * * <p> * N.b. chromium doesn't indicate the encoding of the request in any way (no * charset in the content-type or anything like that). Also of note is that * when it considers the form page to be utf-8, it submits in utf-8. That's * part of the complicated behavior we don't want to try to simulate. */ public static String escapeForMultipart(String str) { StringBuilder buf = new StringBuilder(); for (int i = 0; i < str.length();) { int codepoint = str.codePointAt(i); if (codepoint <= 0x7f) { buf.appendCodePoint(codepoint); } else { buf.append("&#" + codepoint + ";"); } i += Character.charCount(codepoint); } return buf.toString(); }