List of usage examples for java.lang Character.UnicodeBlock toString
public String toString()
From source file:org.apache.tika.eval.AbstractProfiler.java
void unicodeBlocks(Metadata metadata, Map<Cols, String> data) { String content = getContent(metadata); if (content.length() < 200) { return;/* w w w .j a va 2 s . co m*/ } String s = content; if (content.length() > maxContentLengthForLangId) { s = content.substring(0, maxContentLengthForLangId); } Map<String, Integer> m = new HashMap<>(); Reader r = new StringReader(s); try { int c = r.read(); while (c != -1) { Character.UnicodeBlock block = Character.UnicodeBlock.of(c); String blockString = (block == null) ? "NULL" : block.toString(); Integer i = m.get(blockString); if (i == null) { i = 0; } i++; if (block == null) { blockString = "NULL"; } m.put(blockString, i); c = r.read(); } } catch (IOException e) { e.printStackTrace(); //swallow } List<Pair<String, Integer>> pairs = new ArrayList<>(); for (Map.Entry<String, Integer> e : m.entrySet()) { pairs.add(Pair.of(e.getKey(), e.getValue())); } Collections.sort(pairs, new Comparator<Pair<String, Integer>>() { @Override public int compare(Pair<String, Integer> o1, Pair<String, Integer> o2) { return o2.getValue().compareTo(o1.getValue()); } }); StringBuilder sb = new StringBuilder(); for (int i = 0; i < 20 && i < pairs.size(); i++) { if (i > 0) { sb.append(" | "); } sb.append(pairs.get(i).getKey() + ": " + pairs.get(i).getValue()); } data.put(Cols.UNICODE_CHAR_BLOCKS, sb.toString()); }