List of usage examples for java.lang String codePointAt
public int codePointAt(int index)
From source file:de.unwesen.packrat.api.FeedReader.java
private void handleWebSearchResults(String data, final Handler handler) { // Log.d(LTAG, "Result: " + data); try {// www. ja v a 2 s. c o m // First check response status. If that is != 200, we may have an error // message to log, and definitely can bail out early. JSONObject result = new JSONObject(data); int status = result.getInt("responseStatus"); if (200 != status) { Log.e(LTAG, "Server error: " + result.getString("responseDetails")); handler.obtainMessage(ERR_SERVER).sendToTarget(); return; } JSONObject d = result.getJSONObject("responseData"); JSONArray res = d.getJSONArray("results"); // Count the occurrences of various words across all returned titles. // If a word is known to designate media type, we'll ignore it. We'll // also ignore words shorter than MIN_WORD_LENGTH. HashMap<String, Integer> wordCount = new HashMap<String, Integer>(); for (int i = 0; i < res.length(); ++i) { JSONObject entry = res.getJSONObject(i); String title = entry.getString("titleNoFormatting"); String[] words = title.split(" "); for (String word : words) { if (MIN_WORD_LENGTH > word.length()) { // Too short continue; } Integer type = sMediaTypes.get(word); if (null != type) { // This word is a media type keyword, so we'll ignore it. continue; } word = word.toLowerCase(); Integer count = wordCount.get(word); if (null == count) { wordCount.put(word, 1); } else { wordCount.put(word, count + 1); } } } // Now that we've counted words, first filter out all words that contain // non-letters. Those are likely not good candidates for further searching. // We ignore them by putting their count to zero. // The tricky part here is that trailing non-letters are likely fine, we // just can't use them for searches. HashMap<String, Integer> filteredWordCount = new HashMap<String, Integer>(); for (String word : wordCount.keySet()) { // Log.d(LTAG, "Word: " + word + " -> " + wordCount.get(word)); int lastLetter = -1; int lastNonLetter = -1; for (int i = 0; i < word.length(); ++i) { int codePoint = word.codePointAt(i); if (Character.isLetter(codePoint) || Character.isDigit(codePoint)) { lastLetter = i; if (lastNonLetter > 0) { // Due to the sequential nature of our iteration, we know that // at(i) is now a letter following a non-letter, so we can // safely ignore this word. break; } } else { lastNonLetter = i; if (-1 == lastLetter) { // We have non-letters preceeding letters, that word should // likely be discarded. break; } } } if (-1 == lastNonLetter) { // Word is pure letters, keep it. filteredWordCount.put(word, wordCount.get(word)); } else if (-1 == lastLetter) { // Word is pure non-letters, discard it. } else if (lastNonLetter > lastLetter) { // Word has trailing non-letters, cut it. Integer count = wordCount.get(word); word = word.substring(0, lastLetter + 1); filteredWordCount.put(word, count); } else { // Word has non-letters in the middle. } } // Next filter step is optional: if we had more than one title to go // through, then chances are that words with only one count should be // ignored. If we had only one title, that's not an optimization we can // safely make. if (1 < res.length()) { wordCount = filteredWordCount; filteredWordCount = new HashMap<String, Integer>(); for (String word : wordCount.keySet()) { int count = wordCount.get(word); if (count > 1) { filteredWordCount.put(word, count); } } } // If we're left with no results, give up right here. if (0 == filteredWordCount.size()) { handler.obtainMessage(ERR_EMPTY_RESPONSE).sendToTarget(); return; } // If we've got results, sort them. List<HashMap.Entry> wordList = new LinkedList<HashMap.Entry>(filteredWordCount.entrySet()); Collections.sort(wordList, new Comparator() { public int compare(Object o1, Object o2) { return -1 * ((Comparable) ((HashMap.Entry) (o1)).getValue()) .compareTo(((HashMap.Entry) (o2)).getValue()); } }); // With the resulting wordList, we'll generate search terms, preferring // more words over fewer words, and words with a higher count over words // with a lower count. WebSearchMachine machine = new WebSearchMachine(wordList, handler); machine.nextTerm(); } catch (JSONException ex) { handler.obtainMessage(ERR_SERIALIZATION).sendToTarget(); } }
From source file:org.apache.pdfbox.pdmodel.PDPageContentStream.java
/** * Shows the given text at the location specified by the current text matrix. * * @param text The Unicode text to show. * @throws IOException If an io exception occurs. *//*from w ww. j a va2 s . c o m*/ public void showText(String text) throws IOException { if (!inTextMode) { throw new IllegalStateException("Must call beginText() before showText()"); } if (fontStack.isEmpty()) { throw new IllegalStateException("Must call setFont() before showText()"); } PDFont font = fontStack.peek(); // Unicode code points to keep when subsetting if (font.willBeSubset()) { for (int offset = 0; offset < text.length();) { int codePoint = text.codePointAt(offset); font.addToSubset(codePoint); offset += Character.charCount(codePoint); } } COSWriter.writeString(font.encode(text), output); write(" "); writeOperator("Tj"); }
From source file:org.rzo.yajsw.os.ms.win.w32.WindowsXPProcess.java
public String getCommandLineInternalWMI() { String result = "?"; // if the server is overloaded we may not get an answer -> try 3 times for (int k = 0; k < 3 && "?".equals(result); k++) try {/*from w w w . java2 s .c o m*/ WindowsXPProcess p = new WindowsXPProcess(); new File("wmic.tmp").delete(); p.setCommand("cmd /C wmic process where processid=" + getPid() + " get commandline > wmic.tmp"); p.setVisible(false); p.start(); p.waitFor(30000); BufferedReader br = new BufferedReader(new FileReader("wmic.tmp")); br.readLine(); br.readLine(); String l = br.readLine(); if (l.codePointAt(0) == 0) { StringBuffer s = new StringBuffer(); for (int i = 0; i < l.length(); i++) if (l.codePointAt(i) != 0) s.append(l.charAt(i)); l = s.toString(); } br.close(); result = l; } catch (Exception e) { e.printStackTrace(); try { Thread.sleep(10000); } catch (InterruptedException e1) { e1.printStackTrace(); return result; } } return result; }
From source file:org.apache.pdfbox.text.PDFTextStripper.java
/** * Handles the LTR and RTL direction of the given words. The whole implementation stands and falls with the given * word. If the word is a full line, the results will be the best. If the word contains of single words or * characters, the order of the characters in a word or words in a line may wrong, due to RTL and LTR marks and * characters!/* w w w.j a va2 s. c om*/ * * Based on http://www.nesterovsky-bros.com/weblog/2013/07/28/VisualToLogicalConversionInJava.aspx * * @param word The word that shall be processed * @return new word with the correct direction of the containing characters */ private String handleDirection(String word) { Bidi bidi = new Bidi(word, Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT); // if there is pure LTR text no need to process further if (!bidi.isMixed() && bidi.getBaseLevel() == Bidi.DIRECTION_LEFT_TO_RIGHT) { return word; } // collect individual bidi information int runCount = bidi.getRunCount(); byte[] levels = new byte[runCount]; Integer[] runs = new Integer[runCount]; for (int i = 0; i < runCount; i++) { levels[i] = (byte) bidi.getRunLevel(i); runs[i] = i; } // reorder individual parts based on their levels Bidi.reorderVisually(levels, 0, runs, 0, runCount); // collect the parts based on the direction within the run StringBuilder result = new StringBuilder(); for (int i = 0; i < runCount; i++) { int index = runs[i]; int start = bidi.getRunStart(index); int end = bidi.getRunLimit(index); int level = levels[index]; if ((level & 1) != 0) { while (--end >= start) { char character = word.charAt(end); if (Character.isMirrored(word.codePointAt(end))) { if (MIRRORING_CHAR_MAP.containsKey(character)) { result.append(MIRRORING_CHAR_MAP.get(character)); } else { result.append(character); } } else { result.append(character); } } } else { result.append(word, start, end); } } return result.toString(); }
From source file:com.repeatability.pdf.PDFTextStripper.java
/** * Handles the LTR and RTL direction of the given words. The whole implementation stands and falls with the given * word. If the word is a full line, the results will be the best. If the word contains of single words or * characters, the order of the characters in a word or words in a line may wrong, due to RTL and LTR marks and * characters!//w w w. j a v a 2s.co m * * Based on http://www.nesterovsky-bros.com/weblog/2013/07/28/VisualToLogicalConversionInJava.aspx * * @param word The word that shall be processed * @return new word with the correct direction of the containing characters */ // kwa //private String handleDirection(String word) protected String handleDirection(String word) { Bidi bidi = new Bidi(word, Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT); // if there is pure LTR text no need to process further if (!bidi.isMixed() && bidi.getBaseLevel() == Bidi.DIRECTION_LEFT_TO_RIGHT) { return word; } // collect individual bidi information int runCount = bidi.getRunCount(); byte[] levels = new byte[runCount]; Integer[] runs = new Integer[runCount]; for (int i = 0; i < runCount; i++) { levels[i] = (byte) bidi.getRunLevel(i); runs[i] = i; } // reorder individual parts based on their levels Bidi.reorderVisually(levels, 0, runs, 0, runCount); // collect the parts based on the direction within the run StringBuilder result = new StringBuilder(); for (int i = 0; i < runCount; i++) { int index = runs[i]; int start = bidi.getRunStart(index); int end = bidi.getRunLimit(index); int level = levels[index]; if ((level & 1) != 0) { for (; --end >= start;) { char character = word.charAt(end); if (Character.isMirrored(word.codePointAt(end))) { if (MIRRORING_CHAR_MAP.containsKey(character)) { result.append(MIRRORING_CHAR_MAP.get(character)); } else { result.append(character); } } else { result.append(character); } } } else { result.append(word, start, end); } } return result.toString(); }
From source file:com.flexoodb.common.FlexUtils.java
public static String removeNonASCII(String s) { StringBuilder out = new StringBuilder(); int codePoint; int i = 0;/*from w w w. ja v a 2 s. co m*/ while (i < s.length()) { // This is the unicode code of the character. codePoint = s.codePointAt(i); if (codePoint < 128) { out.append(Character.toChars(codePoint)); } i += Character.charCount(codePoint); } return out.toString(); }
From source file:com.flexoodb.common.FlexUtils.java
public static String removeInvalidXMLCharacters(String s) { StringBuilder out = new StringBuilder(); int codePoint; int i = 0;/*from www. j a v a 2 s. com*/ while (i < s.length()) { // This is the unicode code of the character. codePoint = s.codePointAt(i); if ((codePoint == 0x9) || (codePoint == 0xA) || (codePoint == 0xD) || ((codePoint >= 0x20) && (codePoint <= 0xD7FF)) || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD)) || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) { out.append(Character.toChars(codePoint)); } i += Character.charCount(codePoint); } return out.toString(); }
From source file:com.crushpaper.Servlet.java
/** Appends the string the a RTF value escaping for unicode. This is a slow function. */ private void appendRtfString(StringBuilder result, String value) { // Inspired by http://blog.stuartlewis.com/2010/09/18/java-rtf-and-unicode-characters/ for (int i = 0; i < value.length(); i++) { int codePoint = value.codePointAt(i); // If the character value is above the // 7-bit range of RTF ASCII if (codePoint == 10) { result.append("\\par\n"); } else if (codePoint > 127) { result.append("\\u" + codePoint + "?"); } else {//from ww w . ja v a 2 s . com result.append(value.substring(i, i + 1)); } } }
From source file:bfile.util.StringUtils.java
/** * <p>Capitalizes a String changing the first character to title case as * per {@link Character#toTitleCase(int)}. No other characters are changed.</p> * * <p>For a word based algorithm, see {@link org.apache.commons.lang3.text.WordUtils#capitalize(String)}. * A {@code null} input String returns {@code null}.</p> * * <pre>/*from ww w. j a v a 2 s. co m*/ * StringUtils.capitalize(null) = null * StringUtils.capitalize("") = "" * StringUtils.capitalize("cat") = "Cat" * StringUtils.capitalize("cAt") = "CAt" * StringUtils.capitalize("'cat'") = "'cat'" * </pre> * * @param str the String to capitalize, may be null * @return the capitalized String, {@code null} if null String input * @see org.apache.commons.lang3.text.WordUtils#capitalize(String) * @see #uncapitalize(String) * @since 2.0 */ public static String capitalize(final String str) { int strLen; if (str == null || (strLen = str.length()) == 0) { return str; } final int firstCodepoint = str.codePointAt(0); final int newCodePoint = Character.toTitleCase(firstCodepoint); if (firstCodepoint == newCodePoint) { // already capitalized return str; } int newCodePoints[] = new int[strLen]; // cannot be longer than the char array int outOffset = 0; newCodePoints[outOffset++] = newCodePoint; // copy the first codepoint for (int inOffset = Character.charCount(firstCodepoint); inOffset < strLen;) { final int codepoint = str.codePointAt(inOffset); newCodePoints[outOffset++] = codepoint; // copy the remaining ones inOffset += Character.charCount(codepoint); } return new String(newCodePoints, 0, outOffset); }
From source file:bfile.util.StringUtils.java
/** * <p>Uncapitalizes a String, changing the first character to lower case as * per {@link Character#toLowerCase(int)}. No other characters are changed.</p> * * <p>For a word based algorithm, see {@link org.apache.commons.lang3.text.WordUtils#uncapitalize(String)}. * A {@code null} input String returns {@code null}.</p> * * <pre>/* www . ja va2 s . c om*/ * StringUtils.uncapitalize(null) = null * StringUtils.uncapitalize("") = "" * StringUtils.uncapitalize("cat") = "cat" * StringUtils.uncapitalize("Cat") = "cat" * StringUtils.uncapitalize("CAT") = "cAT" * </pre> * * @param str the String to uncapitalize, may be null * @return the uncapitalized String, {@code null} if null String input * @see org.apache.commons.lang3.text.WordUtils#uncapitalize(String) * @see #capitalize(String) * @since 2.0 */ public static String uncapitalize(final String str) { int strLen; if (str == null || (strLen = str.length()) == 0) { return str; } final int firstCodepoint = str.codePointAt(0); final int newCodePoint = Character.toLowerCase(firstCodepoint); if (firstCodepoint == newCodePoint) { // already capitalized return str; } int newCodePoints[] = new int[strLen]; // cannot be longer than the char array int outOffset = 0; newCodePoints[outOffset++] = newCodePoint; // copy the first codepoint for (int inOffset = Character.charCount(firstCodepoint); inOffset < strLen;) { final int codepoint = str.codePointAt(inOffset); newCodePoints[outOffset++] = codepoint; // copy the remaining ones inOffset += Character.charCount(codepoint); } return new String(newCodePoints, 0, outOffset); }