Example usage for java.lang StringBuilder appendCodePoint

List of usage examples for java.lang StringBuilder appendCodePoint

Introduction

In this page you can find the example usage for java.lang StringBuilder appendCodePoint.

Prototype

@Override
public StringBuilder appendCodePoint(int codePoint) 

Source Link

Usage

From source file:com.pfarrell.utils.misc.TextTools.java

/**
 * make a string with no whitespace at all
 * @param arg  string/*from www .ja  v  a 2s. c  o m*/
 * @return string with no whitespace
 */
public static String justLettersOrDigits(String arg) {
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < arg.length(); i++) {
        int c = arg.codePointAt(i);
        if (Character.isLetterOrDigit(c)) {
            sb.appendCodePoint(c);
        }
    }
    return sb.toString();
}

From source file:Main.java

public static String stripNonValidXMLCharacters(String text) {
    if (text == null || ("".equals(text))) {
        return "";
    }//from w ww .  j a  v  a 2  s .  c  o m

    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < text.length(); i++) {
        int codePoint = text.codePointAt(i);
        if (codePoint > 0xFFFF) {
            i++;
        }
        if ((codePoint == 0x9) || (codePoint == 0xA) || (codePoint == 0xD)
                || ((codePoint >= 0x20) && (codePoint <= 0xD7FF))
                || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD))
                || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) {
            sb.appendCodePoint(codePoint);
        }
    }
    return sb.toString();
}

From source file:org.marketcetera.util.misc.RandomStrings.java

/**
 * Returns a generated string of the given length whose characters
 * are all Unicode code points that meet the constraints of the
 * given filter.//from   w  w w .  j  a va 2s .c  om
 *
 * @param filter The filter.
 * @param len The length (in code points).
 *
 * @return The string.
 */

public static String genStr(UCPFilter filter, int len) {
    int[] ucps = UCPFilterInfo.getInfo(filter).getUCPs();
    StringBuilder builder = new StringBuilder();
    for (int i = 0; i < len; i++) {
        builder.appendCodePoint(ucps[sGenerator.nextInt(ucps.length)]);
    }
    return builder.toString();
}

From source file:jp.furplag.util.commons.StringUtils.java

/**
 * returns a new string that is a substring of this string. The substring begins at the specified <code>beginIndex</code> and extends to the character at index <code>endIndex - 1</code>. Thus the length of the substring is <code>endIndex-beginIndex</code>.
 *
 * <pre>//from  ww  w  .java 2 s . c  o m
 * StringUtils.substringUCL(null, *, *)    = ""
 * StringUtils.substringUCL("", * ,  *)    = ""
 * StringUtils.substringUCL("abc", 0, 2)   = "ab"
 * StringUtils.substringUCL("abc", 2, 0)   = ""
 * StringUtils.substringUCL("abc", 2, 4)   = "c"
 * StringUtils.substringUCL("abc", 4, 6)   = ""
 * StringUtils.substringUCL("abc", 2, 2)   = ""
 * StringUtils.substringUCL("abc", -2, -1) = "b"
 * StringUtils.substringUCL("abc", -4, 2)  = "ab"
 * </pre>
 *
 * @param str the string to get the substring from, may be null.
 * @param beginIndex the position to start from, negative means count back from the end of the String by this many characters.
 * @param endIndex the position to end at (exclusive), negative means count back from the end of the String by this many characters.
 * @return substring from start position to end position, return empty if null.
 */
public static String substringUCL(final String str, final int beginIndex, final int endIndex) {
    int[] codePoints = getCodePoints(defaultString(str));
    int begin = (beginIndex < 0 ? codePoints.length : 0) + beginIndex;
    if (begin < 0)
        begin = 0;
    int end = (endIndex < 0 ? codePoints.length : 0) + endIndex;
    if (end > codePoints.length)
        end = codePoints.length;
    if (end < 0)
        end = 0;
    if (begin > end)
        return EMPTY;

    StringBuilder sb = new StringBuilder();
    for (int codePoint : Arrays.copyOfRange(codePoints, begin, end)) {
        sb.appendCodePoint(codePoint);
    }

    return sb.toString();
}

From source file:Main.java

/**
 * Escapes a character sequence so that it is valid XML.
 * //from   ww w.j  a  v a  2s .  c  o  m
 * @param s
 *        The character sequence.
 * @return The escaped version of the character sequence.
 */
public static String escapeXML(CharSequence s) {
    // double quote -- quot
    // ampersand -- amp
    // less than -- lt
    // greater than -- gt
    // apostrophe -- apos
    StringBuilder sb = new StringBuilder(s.length() * 2);
    for (int i = 0; i < s.length();) {
        int codePoint = Character.codePointAt(s, i);
        if (codePoint == '<') {
            sb.append(LT);
        } else if (codePoint == '>') {
            sb.append(GT);
        } else if (codePoint == '\"') {
            sb.append(QUOT);
        } else if (codePoint == '&') {
            sb.append(AMP);
        } else if (codePoint == '\'') {
            sb.append(APOS);
        } else {
            sb.appendCodePoint(codePoint);
        }
        i += Character.charCount(codePoint);
    }
    return sb.toString();
}

From source file:Main.java

/**
 * Generates a random word./*from w ww.j  a va  2s  . c  o m*/
 */
public static String generateWord(final Random random, final int[] codePointSet) {
    StringBuilder builder = new StringBuilder();
    // 8 * 4 = 32 chars max, but we do it the following way so as to bias the random toward
    // longer words. This should be closer to natural language, and more importantly, it will
    // exercise the algorithms in dicttool much more.
    final int count = 1 + (Math.abs(random.nextInt()) % 5) + (Math.abs(random.nextInt()) % 5)
            + (Math.abs(random.nextInt()) % 5) + (Math.abs(random.nextInt()) % 5)
            + (Math.abs(random.nextInt()) % 5) + (Math.abs(random.nextInt()) % 5)
            + (Math.abs(random.nextInt()) % 5) + (Math.abs(random.nextInt()) % 5);
    while (builder.length() < count) {
        builder.appendCodePoint(codePointSet[Math.abs(random.nextInt()) % codePointSet.length]);
    }
    return builder.toString();
}

From source file:Main.java

/**
 * Generates a random word.//from   w  ww .java 2  s  . c o  m
 */

@Nonnull
public static String generateWord(@Nonnull final Random random, @Nonnull final int[] codePointSet) {
    final StringBuilder builder = new StringBuilder();
    // 8 * 4 = 32 chars max, but we do it the following way so as to bias the random toward
    // longer words. This should be closer to natural language, and more importantly, it will
    // exercise the algorithms in dicttool much more.
    final int count = 1 + (Math.abs(random.nextInt()) % 5) + (Math.abs(random.nextInt()) % 5)
            + (Math.abs(random.nextInt()) % 5) + (Math.abs(random.nextInt()) % 5)
            + (Math.abs(random.nextInt()) % 5) + (Math.abs(random.nextInt()) % 5)
            + (Math.abs(random.nextInt()) % 5) + (Math.abs(random.nextInt()) % 5);
    while (builder.length() < count) {
        builder.appendCodePoint(codePointSet[Math.abs(random.nextInt()) % codePointSet.length]);
    }
    return builder.toString();
}

From source file:net.antidot.sql.model.core.SQLConnector.java

/**
 * Update a database, connected with c, with requests in SQL file.
 * /*from   www . ja v  a  2 s  .  c om*/
 * @param c
 * @param pathToSQLFile
 * @throws SQLException
 */
public static void updateDatabase(Connection c, String pathToSQLFile) throws SQLException {
    log.debug("[SQLConnector:updateDatabase] pathToSQLFile = " + pathToSQLFile);
    StringBuilder sb = new StringBuilder();
    try {
        FileReader fr = new FileReader(new File(pathToSQLFile));
        // be sure to not have line starting with "--" or "/*" or any other
        // non aplhabetical character
        BufferedReader br = new BufferedReader(fr);

        int s = -1;
        while ((s = br.read()) != -1) {
            sb.appendCodePoint(s);
        }
        br.close();
        // here is our splitter ! We use ";" as a delimiter for each request
        // then we are sure to have well formed statements
        String[] inst = sb.toString().split(";");
        Statement st = c.createStatement();

        for (int i = 0; i < inst.length; i++) {
            // we ensure that there is no spaces before or after the request
            // string
            // in order to not execute empty statements
            if (!inst[i].trim().equals("")) {
                log.debug("[SQLConnector:updateDatabase] >> " + inst[i]);
                st.executeUpdate(inst[i]);
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:org.apache.poi.util.StringUtil.java

/**
 * Some strings may contain encoded characters of the unicode private use area.
 * Currently the characters of the symbol fonts are mapped to the corresponding
 * characters in the normal unicode range. 
 *
 * @param string the original string //from w  w  w . ja  va  2  s. c  om
 * @return the string with mapped characters
 * 
 * @see <a href="http://www.alanwood.net/unicode/private_use_area.html#symbol">Private Use Area (symbol)</a>
 * @see <a href="http://www.alanwood.net/demos/symbol.html">Symbol font - Unicode alternatives for Greek and special characters in HTML</a>
 */
public static String mapMsCodepointString(String string) {
    if (string == null || "".equals(string))
        return string;
    initMsCodepointMap();

    StringBuilder sb = new StringBuilder();
    final int length = string.length();
    for (int offset = 0; offset < length;) {
        Integer msCodepoint = string.codePointAt(offset);
        Integer uniCodepoint = msCodepointToUnicode.get(msCodepoint);
        sb.appendCodePoint(uniCodepoint == null ? msCodepoint : uniCodepoint);
        offset += Character.charCount(msCodepoint);
    }

    return sb.toString();
}

From source file:org.archive.modules.fetcher.FetchHTTPRequest.java

/**
 * Returns a copy of the string with non-ascii characters replaced by their
 * html numeric character reference in decimal (e.g. &amp;#12345;).
 * //from   ww w . j av a2 s .c om
 * <p>
 * The purpose of this is to produce a multipart/formdata submission that
 * any server should be able to handle, based on experiments using a modern
 * browser (chromium 47.0.2526.106 for mac). What chromium posts depends on
 * what it considers the character encoding of the page containing the form,
 * and maybe other factors. It would be too complicated to try to simulate
 * that behavior in heritrix.
 * 
 * <p>
 * Instead what we do is approximately what the browser does when the form
 * page is plain ascii. It html-escapes characters outside of the
 * latin1/cp1252 range. Characters in the U+0080-U+00FF range are encoded in
 * latin1/cp1252. That is the one way that we differ from chromium. We
 * html-escape those characters (U+0080-U+00FF) as well. That way the http
 * post is plain ascii, and should work regardless of which encoding the
 * server expects.
 * 
 * <p>
 * N.b. chromium doesn't indicate the encoding of the request in any way (no
 * charset in the content-type or anything like that). Also of note is that
 * when it considers the form page to be utf-8, it submits in utf-8. That's
 * part of the complicated behavior we don't want to try to simulate.
 */
public static String escapeForMultipart(String str) {
    StringBuilder buf = new StringBuilder();
    for (int i = 0; i < str.length();) {
        int codepoint = str.codePointAt(i);
        if (codepoint <= 0x7f) {
            buf.appendCodePoint(codepoint);
        } else {
            buf.append("&#" + codepoint + ";");
        }
        i += Character.charCount(codepoint);
    }
    return buf.toString();
}