Example usage for java.lang String codePointAt

Introduction

In this page you can find the example usage for java.lang String codePointAt.

Prototype

public int codePointAt(int index)

Source Link

Document

Returns the character (Unicode code point) at the specified index.

Usage

From source file:org.marketcetera.util.test.UnicodeDataTest.java

private static void singleValid(String str, char[] chars, int[] ucps, byte[] nat, byte[] utf8, byte[] utf16be,
        byte[] utf16le, byte[] utf32be, byte[] utf32le) {
    assertArrayEquals(str.toCharArray(), chars);
    int i = 0;//w w w . jav a  2s.com
    int j = 0;
    while (i < str.length()) {
        int ucp = str.codePointAt(i);
        assertEquals("At code point position " + j, ucp, ucps[j++]);
        i += Character.charCount(ucp);
    }
    assertArrayEquals(str.getBytes(), nat);
    assertArrayEquals(str.getBytes(UTF8), utf8);
    assertArrayEquals(str.getBytes(UTF16BE), utf16be);
    assertArrayEquals(str.getBytes(UTF16LE), utf16le);
    assertArrayEquals(str.getBytes(UTF32BE), utf32be);
    assertArrayEquals(str.getBytes(UTF32LE), utf32le);
}

From source file:Main.java

/*** This method ensures that the output String has only     
 * * valid XML unicode characters as specified by the     
 * * XML 1.0 standard. For reference, please see     
 * * <a href="http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char">the     
 * * standard</a>. This method will return an empty     
 * * String if the input is null or empty.     
 * * @param in The String whose non-valid characters we want to remove.     
 * * @return The in String, stripped of non-valid characters.    
 *  *///from w w w  .  ja v a 2  s  .c om
public static String stripNonValidXMLCharacters(String s) {

    StringBuilder out = new StringBuilder(); // Used to hold the output.

    int codePoint; // Used to reference the current character.

    //String ss = "\ud801\udc00"; // This is actualy one unicode character, represented by two code units!!!.
    int i = 0;

    while (i < s.length()) {
        codePoint = s.codePointAt(i); // This is the unicode code of the character.
        if ((codePoint == 0x9) || // Consider testing larger ranges first to improve speed. 

                (codePoint == 0xA) ||

                (codePoint == 0xD) ||

                ((codePoint >= 0x20) && (codePoint <= 0xD7FF)) ||

                ((codePoint >= 0xE000) && (codePoint <= 0xFFFD)) ||

                ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) {

            out.append(Character.toChars(codePoint));

        }

        i += Character.charCount(codePoint); // Increment with the number of code units(java chars) needed to represent a Unicode char. 

    }

    return out.toString();

}

From source file:org.intermine.webservice.server.query.QueryRequestParser.java

/**
 * Take in a LZW encoded string and return a decoded plain-text string.
 *//*from   w ww. j a v  a2s  .c  om*/
public static String decodeLZWString(String encoded) {
    List<Integer> codes = new ArrayList<Integer>();
    encoded = fixEncoding(encoded);
    int length = encoded.length();
    for (int i = 0; i < length; i++) {
        Integer cp = Integer.valueOf(encoded.codePointAt(i));
        codes.add(cp);
    }
    return decompressLZW(codes);
}

From source file:org.exoplatform.forum.ForumTransformHTML.java

public static String removeCharterStrange(String s) {
    if (s == null || s.length() <= 0)
        return ForumUtils.EMPTY_STR;
    int i = 0;//w  w w.j  a  v a 2s  .  c om
    StringBuilder builder = new StringBuilder();
    while (i < s.length()) {
        if (s.codePointAt(i) > 31) {
            builder.append(s.charAt(i));
        }
        ++i;
    }
    return builder.toString();
}

From source file:Main.java

/**
 * Find the start of the next token.  A token is composed of letters and numbers. Any other
 * character are considered delimiters.//from   w w  w . j  a  va2 s.  co m
 *
 * @param line The string to search for the next token.
 * @param startIndex The index to start searching.  0 based indexing.
 * @return The index for the start of the next token.  line.length() if next token not found.
 */
@VisibleForTesting
static int findNextTokenStart(String line, int startIndex) {
    int index = startIndex;

    // If already in token, eat remainder of token.
    while (index <= line.length()) {
        if (index == line.length()) {
            // No more tokens.
            return index;
        }
        final int codePoint = line.codePointAt(index);
        if (!Character.isLetterOrDigit(codePoint)) {
            break;
        }
        index += Character.charCount(codePoint);
    }

    // Out of token, eat all consecutive delimiters.
    while (index <= line.length()) {
        if (index == line.length()) {
            return index;
        }
        final int codePoint = line.codePointAt(index);
        if (Character.isLetterOrDigit(codePoint)) {
            break;
        }
        index += Character.charCount(codePoint);
    }

    return index;
}

From source file:uk.ac.bbsrc.tgac.miso.integration.util.IntegrationUtils.java

/**
 * Sends a String message to a given host socket
 * /*  ww  w .  j a va2  s .  c  om*/
 * @param socket
 *          of type Socket
 * @param query
 *          of type String
 * @return String
 * @throws IntegrationException
 *           when the socket couldn't be created
 */
public static String sendMessage(Socket socket, String query) throws IntegrationException {
    BufferedWriter wr = null;
    BufferedReader rd = null;
    try {
        wr = new BufferedWriter(new OutputStreamWriter(socket.getOutputStream(), "UTF8"));

        // Send data
        wr.write(query + "\r\n");
        wr.flush();

        // Get response
        rd = new BufferedReader(new InputStreamReader(socket.getInputStream()));
        String line;
        StringBuilder sb = new StringBuilder();
        while ((line = rd.readLine()) != null) {
            sb.append(line);
        }
        wr.close();
        rd.close();

        String dirty = sb.toString();
        StringBuilder response = new StringBuilder();
        int codePoint;
        int i = 0;
        while (i < dirty.length()) {
            codePoint = dirty.codePointAt(i);
            if ((codePoint == 0x9) || (codePoint == 0xA) || (codePoint == 0xD)
                    || ((codePoint >= 0x20) && (codePoint <= 0xD7FF))
                    || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD))
                    || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) {
                response.append(Character.toChars(codePoint));
            }
            i += Character.charCount(codePoint);
        }

        return response.toString().replace("\\\n", "").replace("\\\t", "");
    } catch (UnknownHostException e) {
        log.error("Cannot resolve host: " + socket.getInetAddress(), e);
        throw new IntegrationException(e.getMessage());
    } catch (IOException e) {
        log.error("Couldn't get I/O for the connection to: " + socket.getInetAddress(), e);
        throw new IntegrationException(e.getMessage());
    } finally {
        try {
            if (wr != null) {
                wr.close();
            }
            if (rd != null) {
                rd.close();
            }
        } catch (Throwable t) {
            log.error("close socket", t);
        }
    }
}

From source file:Main.java

/**
 * Find the first character matching the input character in the given
 * string where the character has no letter preceding it.
 * //from  w w  w . j av  a 2s.  c  o  m
 * @param text the string to test for the presence of the input character
 * @param inputChar the test character
 * @param fromIndex the index position of the string to start from
 * @return the position of the first character matching the input character
 *          in the given string where the character has no letter preceding it.
 */
public static int firstCharAt(String text, int inputChar, int fromIndex) {
    int result = 0;

    while (result >= 0) {
        result = text.indexOf(inputChar, fromIndex);

        if (result == 0) {
            return result;
        } else if (result > 0) {
            // Check there is a whitespace or symbol before the hit character
            if (Character.isLetter(text.codePointAt(result - 1))) {
                // The pre-increment is used in if and else branches.
                if (++fromIndex >= text.length()) {
                    return -1;
                } else {
                    // Test again from next candidate character
                    // This isn't the first letter of this word
                    result = text.indexOf(inputChar, fromIndex);
                }
            } else {
                return result;
            }
        }

    }

    return result;
}

From source file:org.exoplatform.cms.common.TransformHTML.java

public static String removeCharterStrange(String s) {
    if (s == null || s.length() <= 0)
        return EMPTY_STR;
    int i = 0;/*from   w ww . j  a  v a2 s  .  c o  m*/
    StringBuilder builder = new StringBuilder();
    while (i < s.length()) {
        if (s.codePointAt(i) > 31) {
            builder.append(s.charAt(i));
        }
        ++i;
    }
    return builder.toString();
}

From source file:org.exoplatform.forum.ForumTransformHTML.java

public static String enCodeViewSignature(String s) {
    if (s != null && s.trim().length() > 0) {
        // replace enter key to <br/> tag html
        StringBuffer buffer = new StringBuffer();
        for (int j = 0; j < s.trim().length(); j++) {
            if (s.codePointAt(j) == 10) {
                buffer.append("<br/>");
            } else {
                buffer.append(s.charAt(j));
            }//from   w w  w.j a  v  a 2 s  .co  m
        }
        s = buffer.toString();
    } else
        s = ForumUtils.EMPTY_STR;
    return s;
}

From source file:Main.java

/**
 * Escape some special character as HTML escape sequence.
 * /*from   w  w  w .java 2  s  .  c  o  m*/
 * @param text Text to be displayed using WebView.
 * @return Text correctly escaped.
 */
public static String escapeCharacterToDisplay(String text) {
    Pattern pattern = PLAIN_TEXT_TO_ESCAPE;
    Matcher match = pattern.matcher(text);

    if (match.find()) {
        StringBuilder out = new StringBuilder();
        int end = 0;
        do {
            int start = match.start();
            out.append(text.substring(end, start));
            end = match.end();
            int c = text.codePointAt(start);
            if (c == ' ') {
                // Escape successive spaces into series of "&nbsp;".
                for (int i = 1, n = end - start; i < n; ++i) {
                    out.append("&nbsp;");
                }
                out.append(' ');
            } else if (c == '\r' || c == '\n') {
                out.append("<br>");
            } else if (c == '<') {
                out.append("&lt;");
            } else if (c == '>') {
                out.append("&gt;");
            } else if (c == '&') {
                out.append("&amp;");
            }
        } while (match.find());
        out.append(text.substring(end));
        text = out.toString();
    }
    return text;
}