Example usage for java.lang Character.UnicodeBlock toString

List of usage examples for java.lang Character.UnicodeBlock toString

Introduction

In this page you can find the example usage for java.lang Character.UnicodeBlock toString.

Prototype

public String toString() 

Source Link

Document

Returns a String object representing this Character 's value.

Usage

From source file:org.apache.tika.eval.AbstractProfiler.java

void unicodeBlocks(Metadata metadata, Map<Cols, String> data) {
    String content = getContent(metadata);
    if (content.length() < 200) {
        return;/* w w  w  .j a  va  2  s  .  co m*/
    }
    String s = content;
    if (content.length() > maxContentLengthForLangId) {
        s = content.substring(0, maxContentLengthForLangId);
    }
    Map<String, Integer> m = new HashMap<>();
    Reader r = new StringReader(s);
    try {
        int c = r.read();
        while (c != -1) {
            Character.UnicodeBlock block = Character.UnicodeBlock.of(c);
            String blockString = (block == null) ? "NULL" : block.toString();
            Integer i = m.get(blockString);
            if (i == null) {
                i = 0;
            }
            i++;
            if (block == null) {
                blockString = "NULL";
            }
            m.put(blockString, i);
            c = r.read();
        }
    } catch (IOException e) {
        e.printStackTrace();
        //swallow
    }

    List<Pair<String, Integer>> pairs = new ArrayList<>();
    for (Map.Entry<String, Integer> e : m.entrySet()) {
        pairs.add(Pair.of(e.getKey(), e.getValue()));
    }
    Collections.sort(pairs, new Comparator<Pair<String, Integer>>() {
        @Override
        public int compare(Pair<String, Integer> o1, Pair<String, Integer> o2) {
            return o2.getValue().compareTo(o1.getValue());
        }
    });
    StringBuilder sb = new StringBuilder();

    for (int i = 0; i < 20 && i < pairs.size(); i++) {
        if (i > 0) {
            sb.append(" | ");
        }
        sb.append(pairs.get(i).getKey() + ": " + pairs.get(i).getValue());
    }
    data.put(Cols.UNICODE_CHAR_BLOCKS, sb.toString());
}