List of usage examples for java.lang String codePointAt
public int codePointAt(int index)
From source file:org.marketcetera.util.test.UnicodeDataTest.java
private static void singleValid(String str, char[] chars, int[] ucps, byte[] nat, byte[] utf8, byte[] utf16be, byte[] utf16le, byte[] utf32be, byte[] utf32le) { assertArrayEquals(str.toCharArray(), chars); int i = 0;//w w w . jav a 2s.com int j = 0; while (i < str.length()) { int ucp = str.codePointAt(i); assertEquals("At code point position " + j, ucp, ucps[j++]); i += Character.charCount(ucp); } assertArrayEquals(str.getBytes(), nat); assertArrayEquals(str.getBytes(UTF8), utf8); assertArrayEquals(str.getBytes(UTF16BE), utf16be); assertArrayEquals(str.getBytes(UTF16LE), utf16le); assertArrayEquals(str.getBytes(UTF32BE), utf32be); assertArrayEquals(str.getBytes(UTF32LE), utf32le); }
From source file:Main.java
/*** This method ensures that the output String has only * * valid XML unicode characters as specified by the * * XML 1.0 standard. For reference, please see * * <a href="http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char">the * * standard</a>. This method will return an empty * * String if the input is null or empty. * * @param in The String whose non-valid characters we want to remove. * * @return The in String, stripped of non-valid characters. * *///from w w w . ja v a 2 s .c om public static String stripNonValidXMLCharacters(String s) { StringBuilder out = new StringBuilder(); // Used to hold the output. int codePoint; // Used to reference the current character. //String ss = "\ud801\udc00"; // This is actualy one unicode character, represented by two code units!!!. int i = 0; while (i < s.length()) { codePoint = s.codePointAt(i); // This is the unicode code of the character. if ((codePoint == 0x9) || // Consider testing larger ranges first to improve speed. (codePoint == 0xA) || (codePoint == 0xD) || ((codePoint >= 0x20) && (codePoint <= 0xD7FF)) || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD)) || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) { out.append(Character.toChars(codePoint)); } i += Character.charCount(codePoint); // Increment with the number of code units(java chars) needed to represent a Unicode char. } return out.toString(); }
From source file:org.intermine.webservice.server.query.QueryRequestParser.java
/** * Take in a LZW encoded string and return a decoded plain-text string. *//*from w ww. j a v a2s .c om*/ public static String decodeLZWString(String encoded) { List<Integer> codes = new ArrayList<Integer>(); encoded = fixEncoding(encoded); int length = encoded.length(); for (int i = 0; i < length; i++) { Integer cp = Integer.valueOf(encoded.codePointAt(i)); codes.add(cp); } return decompressLZW(codes); }
From source file:org.exoplatform.forum.ForumTransformHTML.java
public static String removeCharterStrange(String s) { if (s == null || s.length() <= 0) return ForumUtils.EMPTY_STR; int i = 0;//w w w.j a v a 2s . c om StringBuilder builder = new StringBuilder(); while (i < s.length()) { if (s.codePointAt(i) > 31) { builder.append(s.charAt(i)); } ++i; } return builder.toString(); }
From source file:Main.java
/** * Find the start of the next token. A token is composed of letters and numbers. Any other * character are considered delimiters.//from w w w . j a va2 s. co m * * @param line The string to search for the next token. * @param startIndex The index to start searching. 0 based indexing. * @return The index for the start of the next token. line.length() if next token not found. */ @VisibleForTesting static int findNextTokenStart(String line, int startIndex) { int index = startIndex; // If already in token, eat remainder of token. while (index <= line.length()) { if (index == line.length()) { // No more tokens. return index; } final int codePoint = line.codePointAt(index); if (!Character.isLetterOrDigit(codePoint)) { break; } index += Character.charCount(codePoint); } // Out of token, eat all consecutive delimiters. while (index <= line.length()) { if (index == line.length()) { return index; } final int codePoint = line.codePointAt(index); if (Character.isLetterOrDigit(codePoint)) { break; } index += Character.charCount(codePoint); } return index; }
From source file:uk.ac.bbsrc.tgac.miso.integration.util.IntegrationUtils.java
/** * Sends a String message to a given host socket * /* ww w . j a va2 s . c om*/ * @param socket * of type Socket * @param query * of type String * @return String * @throws IntegrationException * when the socket couldn't be created */ public static String sendMessage(Socket socket, String query) throws IntegrationException { BufferedWriter wr = null; BufferedReader rd = null; try { wr = new BufferedWriter(new OutputStreamWriter(socket.getOutputStream(), "UTF8")); // Send data wr.write(query + "\r\n"); wr.flush(); // Get response rd = new BufferedReader(new InputStreamReader(socket.getInputStream())); String line; StringBuilder sb = new StringBuilder(); while ((line = rd.readLine()) != null) { sb.append(line); } wr.close(); rd.close(); String dirty = sb.toString(); StringBuilder response = new StringBuilder(); int codePoint; int i = 0; while (i < dirty.length()) { codePoint = dirty.codePointAt(i); if ((codePoint == 0x9) || (codePoint == 0xA) || (codePoint == 0xD) || ((codePoint >= 0x20) && (codePoint <= 0xD7FF)) || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD)) || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) { response.append(Character.toChars(codePoint)); } i += Character.charCount(codePoint); } return response.toString().replace("\\\n", "").replace("\\\t", ""); } catch (UnknownHostException e) { log.error("Cannot resolve host: " + socket.getInetAddress(), e); throw new IntegrationException(e.getMessage()); } catch (IOException e) { log.error("Couldn't get I/O for the connection to: " + socket.getInetAddress(), e); throw new IntegrationException(e.getMessage()); } finally { try { if (wr != null) { wr.close(); } if (rd != null) { rd.close(); } } catch (Throwable t) { log.error("close socket", t); } } }
From source file:Main.java
/** * Find the first character matching the input character in the given * string where the character has no letter preceding it. * //from w w w . j av a 2s. c o m * @param text the string to test for the presence of the input character * @param inputChar the test character * @param fromIndex the index position of the string to start from * @return the position of the first character matching the input character * in the given string where the character has no letter preceding it. */ public static int firstCharAt(String text, int inputChar, int fromIndex) { int result = 0; while (result >= 0) { result = text.indexOf(inputChar, fromIndex); if (result == 0) { return result; } else if (result > 0) { // Check there is a whitespace or symbol before the hit character if (Character.isLetter(text.codePointAt(result - 1))) { // The pre-increment is used in if and else branches. if (++fromIndex >= text.length()) { return -1; } else { // Test again from next candidate character // This isn't the first letter of this word result = text.indexOf(inputChar, fromIndex); } } else { return result; } } } return result; }
From source file:org.exoplatform.cms.common.TransformHTML.java
public static String removeCharterStrange(String s) { if (s == null || s.length() <= 0) return EMPTY_STR; int i = 0;/*from w ww . j a v a2 s . c o m*/ StringBuilder builder = new StringBuilder(); while (i < s.length()) { if (s.codePointAt(i) > 31) { builder.append(s.charAt(i)); } ++i; } return builder.toString(); }
From source file:org.exoplatform.forum.ForumTransformHTML.java
public static String enCodeViewSignature(String s) { if (s != null && s.trim().length() > 0) { // replace enter key to <br/> tag html StringBuffer buffer = new StringBuffer(); for (int j = 0; j < s.trim().length(); j++) { if (s.codePointAt(j) == 10) { buffer.append("<br/>"); } else { buffer.append(s.charAt(j)); }//from w w w.j a v a 2 s .co m } s = buffer.toString(); } else s = ForumUtils.EMPTY_STR; return s; }
From source file:Main.java
/** * Escape some special character as HTML escape sequence. * /*from w w w .java 2 s . c o m*/ * @param text Text to be displayed using WebView. * @return Text correctly escaped. */ public static String escapeCharacterToDisplay(String text) { Pattern pattern = PLAIN_TEXT_TO_ESCAPE; Matcher match = pattern.matcher(text); if (match.find()) { StringBuilder out = new StringBuilder(); int end = 0; do { int start = match.start(); out.append(text.substring(end, start)); end = match.end(); int c = text.codePointAt(start); if (c == ' ') { // Escape successive spaces into series of " ". for (int i = 1, n = end - start; i < n; ++i) { out.append(" "); } out.append(' '); } else if (c == '\r' || c == '\n') { out.append("<br>"); } else if (c == '<') { out.append("<"); } else if (c == '>') { out.append(">"); } else if (c == '&') { out.append("&"); } } while (match.find()); out.append(text.substring(end)); text = out.toString(); } return text; }