Example usage for java.lang String codePointAt

List of usage examples for java.lang String codePointAt

Introduction

In this page you can find the example usage for java.lang String codePointAt.

Prototype

public int codePointAt(int index) 

Source Link

Document

Returns the character (Unicode code point) at the specified index.

Usage

From source file:org.apache.pdfbox.pdmodel.font.PDTrueTypeFont.java

private void loadDescriptorDictionary(PDFontDescriptorDictionary fd, InputStream ttfData) throws IOException {
    TrueTypeFont ttf = null;//from   w w w . j  av  a2  s .c om
    try {
        TTFParser parser = new TTFParser();
        ttf = parser.parseTTF(ttfData);
        NamingTable naming = ttf.getNaming();
        List<NameRecord> records = naming.getNameRecords();
        for (int i = 0; i < records.size(); i++) {
            NameRecord nr = records.get(i);
            if (nr.getNameId() == NameRecord.NAME_POSTSCRIPT_NAME) {
                setBaseFont(nr.getString());
                fd.setFontName(nr.getString());
            } else if (nr.getNameId() == NameRecord.NAME_FONT_FAMILY_NAME) {
                fd.setFontFamily(nr.getString());
            }
        }

        OS2WindowsMetricsTable os2 = ttf.getOS2Windows();
        boolean isSymbolic = false;
        switch (os2.getFamilyClass()) {
        case OS2WindowsMetricsTable.FAMILY_CLASS_SYMBOLIC:
            isSymbolic = true;
            break;
        case OS2WindowsMetricsTable.FAMILY_CLASS_SCRIPTS:
            fd.setScript(true);
            break;
        case OS2WindowsMetricsTable.FAMILY_CLASS_CLAREDON_SERIFS:
        case OS2WindowsMetricsTable.FAMILY_CLASS_FREEFORM_SERIFS:
        case OS2WindowsMetricsTable.FAMILY_CLASS_MODERN_SERIFS:
        case OS2WindowsMetricsTable.FAMILY_CLASS_OLDSTYLE_SERIFS:
        case OS2WindowsMetricsTable.FAMILY_CLASS_SLAB_SERIFS:
            fd.setSerif(true);
            break;
        default:
            //do nothing
        }
        switch (os2.getWidthClass()) {
        case OS2WindowsMetricsTable.WIDTH_CLASS_ULTRA_CONDENSED:
            fd.setFontStretch("UltraCondensed");
            break;
        case OS2WindowsMetricsTable.WIDTH_CLASS_EXTRA_CONDENSED:
            fd.setFontStretch("ExtraCondensed");
            break;
        case OS2WindowsMetricsTable.WIDTH_CLASS_CONDENSED:
            fd.setFontStretch("Condensed");
            break;
        case OS2WindowsMetricsTable.WIDTH_CLASS_SEMI_CONDENSED:
            fd.setFontStretch("SemiCondensed");
            break;
        case OS2WindowsMetricsTable.WIDTH_CLASS_MEDIUM:
            fd.setFontStretch("Normal");
            break;
        case OS2WindowsMetricsTable.WIDTH_CLASS_SEMI_EXPANDED:
            fd.setFontStretch("SemiExpanded");
            break;
        case OS2WindowsMetricsTable.WIDTH_CLASS_EXPANDED:
            fd.setFontStretch("Expanded");
            break;
        case OS2WindowsMetricsTable.WIDTH_CLASS_EXTRA_EXPANDED:
            fd.setFontStretch("ExtraExpanded");
            break;
        case OS2WindowsMetricsTable.WIDTH_CLASS_ULTRA_EXPANDED:
            fd.setFontStretch("UltraExpanded");
            break;
        default:
            //do nothing
        }
        fd.setFontWeight(os2.getWeightClass());
        fd.setSymbolic(isSymbolic);
        fd.setNonSymbolic(!isSymbolic);

        //todo retval.setFixedPitch
        //todo retval.setItalic
        //todo retval.setAllCap
        //todo retval.setSmallCap
        //todo retval.setForceBold

        HeaderTable header = ttf.getHeader();
        PDRectangle rect = new PDRectangle();
        float scaling = 1000f / header.getUnitsPerEm();
        rect.setLowerLeftX(header.getXMin() * scaling);
        rect.setLowerLeftY(header.getYMin() * scaling);
        rect.setUpperRightX(header.getXMax() * scaling);
        rect.setUpperRightY(header.getYMax() * scaling);
        fd.setFontBoundingBox(rect);

        HorizontalHeaderTable hHeader = ttf.getHorizontalHeader();
        fd.setAscent(hHeader.getAscender() * scaling);
        fd.setDescent(hHeader.getDescender() * scaling);

        GlyphTable glyphTable = ttf.getGlyph();
        GlyphData[] glyphs = glyphTable.getGlyphs();

        PostScriptTable ps = ttf.getPostScript();
        fd.setFixedPitch(ps.getIsFixedPitch() > 0);
        fd.setItalicAngle(ps.getItalicAngle());

        String[] names = ps.getGlyphNames();

        if (names != null) {
            for (int i = 0; i < names.length; i++) {
                //if we have a capital H then use that, otherwise use the
                //tallest letter
                if (names[i].equals("H")) {
                    fd.setCapHeight(glyphs[i].getBoundingBox().getUpperRightY() / scaling);
                }
                if (names[i].equals("x")) {
                    fd.setXHeight(glyphs[i].getBoundingBox().getUpperRightY() / scaling);
                }
            }
        }

        //hmm there does not seem to be a clear definition for StemV,
        //this is close enough and I am told it doesn't usually get used.
        fd.setStemV((fd.getFontBoundingBox().getWidth() * .13f));

        CMAPTable cmapTable = ttf.getCMAP();
        CMAPEncodingEntry[] cmaps = cmapTable.getCmaps();
        CMAPEncodingEntry uniMap = null;

        for (int i = 0; i < cmaps.length; i++) {
            if (cmaps[i].getPlatformId() == CMAPTable.PLATFORM_WINDOWS) {
                int platformEncoding = cmaps[i].getPlatformEncodingId();
                if (CMAPTable.ENCODING_UNICODE == platformEncoding) {
                    uniMap = cmaps[i];
                    break;
                }
            }
        }

        Map<Integer, String> codeToName = this.getFontEncoding().getCodeToNameMap();

        int firstChar = Collections.min(codeToName.keySet());
        int lastChar = Collections.max(codeToName.keySet());

        HorizontalMetricsTable hMet = ttf.getHorizontalMetrics();
        int[] widthValues = hMet.getAdvanceWidth();
        int nWidths = lastChar - firstChar + 1;
        List<Float> widths = new ArrayList<Float>(nWidths);
        // width of the .notdef character.
        Float zero = Float.valueOf(widthValues[0] * scaling);
        for (int i = 0; i < nWidths; i++) {
            widths.add(zero);
        }
        // Encoding singleton to have acces to the chglyph name to
        // unicode cpoint point mapping of Adobe's glyphlist.txt
        Encoding glyphlist = WinAnsiEncoding.INSTANCE;

        // A character code is mapped to a glyph name via the provided
        // font encoding. Afterwards, the glyph name is translated to a
        // glyph ID.
        // For details, see PDFReference16.pdf, Section 5.5.5, p.401
        //
        for (Entry<Integer, String> e : codeToName.entrySet()) {
            String name = e.getValue();
            // pdf code to unicode by glyph list.
            String c = glyphlist.getCharacter(name);
            int charCode = c.codePointAt(0);
            int gid = uniMap.getGlyphId(charCode);
            if (gid != 0) {
                widths.set(e.getKey().intValue() - firstChar, widthValues[gid] * scaling);
            }
        }
        setWidths(widths);
        setFirstChar(firstChar);
        setLastChar(lastChar);
    } finally {
        if (ttf != null) {
            ttf.close();
        }
    }
}

From source file:cn.lambdalib.cgui.gui.component.TextBox.java

private void checkCaretRegion() {
    final double widthLimit = widthLimit();
    final String local = processedContent().substring(displayOffset);
    final int localCaret = caretPos - displayOffset;
    final double distance = sumLength(local, 0, localCaret);
    if (distance > widthLimit) {
        double acc = 0.0;
        int mini = 0;
        for (; mini < localCaret && distance - acc > widthLimit; ++mini) {
            acc += font.getCharWidth(local.codePointAt(mini), option);
        }//from ww w  .  j a va2  s  .c  om
        displayOffset += mini;
    }

    assert displayOffset < caretPos;
}

From source file:org.apache.pdfbox.text.TextPosition.java

/**
 * Combine the diacritic, for example, convert non-combining diacritic characters to their
 * combining counterparts.// www .  ja v  a2  s. c o  m
 *
 * @param str String to normalize
 * @return Normalized string
 */
private String combineDiacritic(String str) {
    // Unicode contains special combining forms of the diacritic characters which we want to use
    int codePoint = str.codePointAt(0);

    // convert the characters not defined in the Unicode spec
    if (DIACRITICS.containsKey(codePoint)) {
        return DIACRITICS.get(codePoint);
    } else {
        return Normalizer.normalize(str, Normalizer.Form.NFKC).trim();
    }
}

From source file:org.sejda.sambox.text.TextPosition.java

/**
 * Combine the diacritic, for example, convert non-combining diacritic characters to their combining counterparts.
 *
 * @param str String to normalize//from  w ww .  j a v  a  2s  .c  o m
 * @return Normalized string
 */
private String combineDiacritic(String str) {
    // Unicode contains special combining forms of the diacritic characters which we want to use
    int codePoint = str.codePointAt(0);

    // convert the characters not defined in the Unicode spec
    if (DIACRITICS.containsKey(codePoint)) {
        return DIACRITICS.get(codePoint);
    }
    return Normalizer.normalize(str, Normalizer.Form.NFKC).trim();
}

From source file:org.omegat.tokenizer.BaseTokenizer.java

protected Token[] tokenizeByCodePoint(String strOrig) {
    // See http://www.ibm.com/developerworks/library/j-unicode/#1-5
    // Example 1-5 appears to be faster than 1-6 for us (because our strings are short?)
    Token[] tokens = new Token[strOrig.codePointCount(0, strOrig.length())];
    for (int cp, i = 0, j = 0; i < strOrig.length(); i += Character.charCount(cp)) {
        cp = strOrig.codePointAt(i);
        tokens[j++] = new Token(String.valueOf(Character.toChars(cp)), i);
    }//from   w ww. ja  v a 2  s  .c  o  m
    return tokens;
}

From source file:org.omegat.tokenizer.BaseTokenizer.java

protected String[] tokenizeByCodePointToStrings(String strOrig) {
    // See http://www.ibm.com/developerworks/library/j-unicode/#1-5
    // Example 1-5 appears to be faster than 1-6 for us (because our strings are short?)
    String[] tokens = new String[strOrig.codePointCount(0, strOrig.length())];
    for (int cp, i = 0, j = 0; i < strOrig.length(); i += Character.charCount(cp)) {
        cp = strOrig.codePointAt(i);
        tokens[j++] = String.valueOf(Character.toChars(cp));
    }//from ww  w  . ja v  a2 s .c  o  m
    return tokens;
}

From source file:org.omegat.tokenizer.BaseTokenizer.java

private boolean acceptToken(String token, boolean filterDigits, boolean filterWhitespace) {
    if (StringUtil.isEmpty(token)) {
        return false;
    }//from   ww w  . j  ava 2  s .c om
    if (!filterDigits && !filterWhitespace) {
        return true;
    }
    boolean isWhitespaceOnly = true;
    for (int i = 0, cp; i < token.length(); i += Character.charCount(cp)) {
        cp = token.codePointAt(i);
        if (filterDigits && Character.isDigit(cp)) {
            return false;
        }
        if (filterWhitespace && !StringUtil.isWhiteSpace(cp)) {
            isWhitespaceOnly = false;
        }
    }
    return !(filterWhitespace && isWhitespaceOnly);
}

From source file:org.spout.api.chat.ChatArguments.java

/**
 * Splits this ChatArguments instance into sections
 *
 * @param type How these arguments are to be split into sections
 * @return The split sections/* w w w  .j ava  2  s. co  m*/
 */
public List<ChatSection> toSections(SplitType type) {
    List<ChatSection> sections = new ArrayList<ChatSection>();
    StringBuilder currentWord = new StringBuilder();
    LinkedHashMap<Integer, List<ChatStyle>> map;
    switch (type) {
    case WORD:
        map = new LinkedHashMap<Integer, List<ChatStyle>>();
        int curIndex = 0;
        for (Object obj : getExpandedPlaceholders()) {
            if (obj instanceof ChatStyle) {
                ChatStyle style = (ChatStyle) obj;
                List<ChatStyle> list = map.get(curIndex);
                if (list == null) {
                    list = new ArrayList<ChatStyle>();
                    map.put(curIndex, list);
                }
                ChatSectionUtils.removeConflicting(list, style);
                list.add(style);
            } else {
                String val = String.valueOf(obj);
                for (int i = 0; i < val.length(); ++i) {
                    int codePoint = val.codePointAt(i);
                    if (Character.isWhitespace(codePoint)) {
                        sections.add(new ChatSectionImpl(type, new LinkedHashMap<Integer, List<ChatStyle>>(map),
                                currentWord.toString()));
                        curIndex = 0;
                        currentWord = new StringBuilder();
                        if (map.size() > 0) {
                            final List<ChatStyle> previousStyles = map.containsKey(-1)
                                    ? new ArrayList<ChatStyle>(map.get(-1))
                                    : new ArrayList<ChatStyle>();

                            for (Map.Entry<Integer, List<ChatStyle>> entry : map.entrySet()) {
                                if (entry.getKey() != -1) {
                                    for (ChatStyle style : entry.getValue()) {
                                        ChatSectionUtils.removeConflicting(previousStyles, style);
                                        previousStyles.add(style);
                                    }
                                }
                            }
                            map.clear();
                            map.put(-1, previousStyles);
                        }
                    } else {
                        currentWord.append(val.substring(i, i + 1));
                        curIndex++;
                    }
                }
            }
        }

        if (currentWord.length() > 0) {
            sections.add(new ChatSectionImpl(type, map, currentWord.toString()));
        }
        break;

    case STYLE_CHANGE:
        StringBuilder curSection = new StringBuilder();
        List<ChatStyle> activeStyles = new ArrayList<ChatStyle>(3);
        for (Object obj : getExpandedPlaceholders()) {
            if (obj instanceof ChatStyle) {
                ChatStyle style = (ChatStyle) obj;
                ChatSectionUtils.removeConflicting(activeStyles, style);
                activeStyles.add(style);

                map = new LinkedHashMap<Integer, List<ChatStyle>>();
                map.put(-1, new ArrayList<ChatStyle>(activeStyles));
                sections.add(new ChatSectionImpl(type, map, curSection.toString()));
                curSection = new StringBuilder();
            } else {
                curSection.append(obj);
            }
        }
        break;

    case ALL:
        return Collections.<ChatSection>singletonList(
                new ChatSectionImpl(getSplitType(), getActiveStyles(), getPlainString()));

    default:
        throw new IllegalArgumentException("Unknown SplitOption " + type + "!");
    }
    return sections;
}

From source file:net.sf.jabref.logic.formatter.bibtexfields.UnicodeToLatexFormatter.java

@Override
public String format(String text) {
    String result = Objects.requireNonNull(text);

    if (result.isEmpty()) {
        return result;
    }/*  w  w  w . jav  a 2s . c o m*/

    // Standard symbols
    for (Map.Entry<String, String> unicodeLatexPair : HTMLUnicodeConversionMaps.UNICODE_LATEX_CONVERSION_MAP
            .entrySet()) {
        result = result.replace(unicodeLatexPair.getKey(), unicodeLatexPair.getValue());
    }

    // Combining accents
    StringBuilder sb = new StringBuilder();
    boolean consumed = false;
    for (int i = 0; i <= (result.length() - 2); i++) {
        if (!consumed && (i < (result.length() - 1))) {
            int cpCurrent = result.codePointAt(i);
            Integer cpNext = result.codePointAt(i + 1);
            String code = HTMLUnicodeConversionMaps.ESCAPED_ACCENTS.get(cpNext);
            if (code == null) {
                sb.append((char) cpCurrent);
            } else {
                sb.append("{\\").append(code).append('{').append((char) cpCurrent).append("}}");
                consumed = true;
            }
        } else {
            consumed = false;
        }
    }
    if (!consumed) {
        sb.append((char) result.codePointAt(result.length() - 1));
    }
    result = sb.toString();

    // Check if any symbols is not converted
    for (int i = 0; i <= (result.length() - 1); i++) {
        int cp = result.codePointAt(i);
        if (cp >= 129) {
            LOGGER.warn("Unicode character not converted: " + cp);
        }
    }
    return result;
}

From source file:org.opencms.search.solr.spellchecking.CmsSolrSpellchecker.java

/**
 * Converts the suggestions from the Solrj format to JSON format.
 *
 * @param response The SpellCheckResponse object containing the spellcheck results.
 * @return The spellcheck suggestions as JSON object or null if something goes wrong.
 *//*from  w  w w .  j av a2  s  . c  o  m*/
private JSONObject getConvertedResponseAsJson(SpellCheckResponse response) {

    if (null == response) {
        return null;
    }

    final JSONObject suggestions = new JSONObject();
    final Map<String, Suggestion> solrSuggestions = response.getSuggestionMap();

    // Add suggestions to the response
    for (final String key : solrSuggestions.keySet()) {

        // Indicator to ignore words that are erroneously marked as misspelled.
        boolean ignoreWord = false;

        // Suggestions that are in the form "Xxxx" -> "xxxx" should be ignored.
        if (Character.isUpperCase(key.codePointAt(0))) {
            final String lowercaseKey = key.toLowerCase();
            // If the suggestion map doesn't contain the lowercased word, ignore this entry.
            if (!solrSuggestions.containsKey(lowercaseKey)) {
                ignoreWord = true;
            }
        }

        if (!ignoreWord) {
            try {
                // Get suggestions as List
                final List<String> l = solrSuggestions.get(key).getAlternatives();
                suggestions.put(key, l);
            } catch (JSONException e) {
                LOG.debug("Exception while converting Solr spellcheckresponse to JSON. ", e);
            }
        }
    }

    return suggestions;
}