List of usage examples for java.lang String codePointAt
public int codePointAt(int index)
From source file:org.apache.pdfbox.pdmodel.font.PDTrueTypeFont.java
private void loadDescriptorDictionary(PDFontDescriptorDictionary fd, InputStream ttfData) throws IOException { TrueTypeFont ttf = null;//from w w w . j av a2 s .c om try { TTFParser parser = new TTFParser(); ttf = parser.parseTTF(ttfData); NamingTable naming = ttf.getNaming(); List<NameRecord> records = naming.getNameRecords(); for (int i = 0; i < records.size(); i++) { NameRecord nr = records.get(i); if (nr.getNameId() == NameRecord.NAME_POSTSCRIPT_NAME) { setBaseFont(nr.getString()); fd.setFontName(nr.getString()); } else if (nr.getNameId() == NameRecord.NAME_FONT_FAMILY_NAME) { fd.setFontFamily(nr.getString()); } } OS2WindowsMetricsTable os2 = ttf.getOS2Windows(); boolean isSymbolic = false; switch (os2.getFamilyClass()) { case OS2WindowsMetricsTable.FAMILY_CLASS_SYMBOLIC: isSymbolic = true; break; case OS2WindowsMetricsTable.FAMILY_CLASS_SCRIPTS: fd.setScript(true); break; case OS2WindowsMetricsTable.FAMILY_CLASS_CLAREDON_SERIFS: case OS2WindowsMetricsTable.FAMILY_CLASS_FREEFORM_SERIFS: case OS2WindowsMetricsTable.FAMILY_CLASS_MODERN_SERIFS: case OS2WindowsMetricsTable.FAMILY_CLASS_OLDSTYLE_SERIFS: case OS2WindowsMetricsTable.FAMILY_CLASS_SLAB_SERIFS: fd.setSerif(true); break; default: //do nothing } switch (os2.getWidthClass()) { case OS2WindowsMetricsTable.WIDTH_CLASS_ULTRA_CONDENSED: fd.setFontStretch("UltraCondensed"); break; case OS2WindowsMetricsTable.WIDTH_CLASS_EXTRA_CONDENSED: fd.setFontStretch("ExtraCondensed"); break; case OS2WindowsMetricsTable.WIDTH_CLASS_CONDENSED: fd.setFontStretch("Condensed"); break; case OS2WindowsMetricsTable.WIDTH_CLASS_SEMI_CONDENSED: fd.setFontStretch("SemiCondensed"); break; case OS2WindowsMetricsTable.WIDTH_CLASS_MEDIUM: fd.setFontStretch("Normal"); break; case OS2WindowsMetricsTable.WIDTH_CLASS_SEMI_EXPANDED: fd.setFontStretch("SemiExpanded"); break; case OS2WindowsMetricsTable.WIDTH_CLASS_EXPANDED: fd.setFontStretch("Expanded"); break; case OS2WindowsMetricsTable.WIDTH_CLASS_EXTRA_EXPANDED: fd.setFontStretch("ExtraExpanded"); break; case OS2WindowsMetricsTable.WIDTH_CLASS_ULTRA_EXPANDED: fd.setFontStretch("UltraExpanded"); break; default: //do nothing } fd.setFontWeight(os2.getWeightClass()); fd.setSymbolic(isSymbolic); fd.setNonSymbolic(!isSymbolic); //todo retval.setFixedPitch //todo retval.setItalic //todo retval.setAllCap //todo retval.setSmallCap //todo retval.setForceBold HeaderTable header = ttf.getHeader(); PDRectangle rect = new PDRectangle(); float scaling = 1000f / header.getUnitsPerEm(); rect.setLowerLeftX(header.getXMin() * scaling); rect.setLowerLeftY(header.getYMin() * scaling); rect.setUpperRightX(header.getXMax() * scaling); rect.setUpperRightY(header.getYMax() * scaling); fd.setFontBoundingBox(rect); HorizontalHeaderTable hHeader = ttf.getHorizontalHeader(); fd.setAscent(hHeader.getAscender() * scaling); fd.setDescent(hHeader.getDescender() * scaling); GlyphTable glyphTable = ttf.getGlyph(); GlyphData[] glyphs = glyphTable.getGlyphs(); PostScriptTable ps = ttf.getPostScript(); fd.setFixedPitch(ps.getIsFixedPitch() > 0); fd.setItalicAngle(ps.getItalicAngle()); String[] names = ps.getGlyphNames(); if (names != null) { for (int i = 0; i < names.length; i++) { //if we have a capital H then use that, otherwise use the //tallest letter if (names[i].equals("H")) { fd.setCapHeight(glyphs[i].getBoundingBox().getUpperRightY() / scaling); } if (names[i].equals("x")) { fd.setXHeight(glyphs[i].getBoundingBox().getUpperRightY() / scaling); } } } //hmm there does not seem to be a clear definition for StemV, //this is close enough and I am told it doesn't usually get used. fd.setStemV((fd.getFontBoundingBox().getWidth() * .13f)); CMAPTable cmapTable = ttf.getCMAP(); CMAPEncodingEntry[] cmaps = cmapTable.getCmaps(); CMAPEncodingEntry uniMap = null; for (int i = 0; i < cmaps.length; i++) { if (cmaps[i].getPlatformId() == CMAPTable.PLATFORM_WINDOWS) { int platformEncoding = cmaps[i].getPlatformEncodingId(); if (CMAPTable.ENCODING_UNICODE == platformEncoding) { uniMap = cmaps[i]; break; } } } Map<Integer, String> codeToName = this.getFontEncoding().getCodeToNameMap(); int firstChar = Collections.min(codeToName.keySet()); int lastChar = Collections.max(codeToName.keySet()); HorizontalMetricsTable hMet = ttf.getHorizontalMetrics(); int[] widthValues = hMet.getAdvanceWidth(); int nWidths = lastChar - firstChar + 1; List<Float> widths = new ArrayList<Float>(nWidths); // width of the .notdef character. Float zero = Float.valueOf(widthValues[0] * scaling); for (int i = 0; i < nWidths; i++) { widths.add(zero); } // Encoding singleton to have acces to the chglyph name to // unicode cpoint point mapping of Adobe's glyphlist.txt Encoding glyphlist = WinAnsiEncoding.INSTANCE; // A character code is mapped to a glyph name via the provided // font encoding. Afterwards, the glyph name is translated to a // glyph ID. // For details, see PDFReference16.pdf, Section 5.5.5, p.401 // for (Entry<Integer, String> e : codeToName.entrySet()) { String name = e.getValue(); // pdf code to unicode by glyph list. String c = glyphlist.getCharacter(name); int charCode = c.codePointAt(0); int gid = uniMap.getGlyphId(charCode); if (gid != 0) { widths.set(e.getKey().intValue() - firstChar, widthValues[gid] * scaling); } } setWidths(widths); setFirstChar(firstChar); setLastChar(lastChar); } finally { if (ttf != null) { ttf.close(); } } }
From source file:cn.lambdalib.cgui.gui.component.TextBox.java
private void checkCaretRegion() { final double widthLimit = widthLimit(); final String local = processedContent().substring(displayOffset); final int localCaret = caretPos - displayOffset; final double distance = sumLength(local, 0, localCaret); if (distance > widthLimit) { double acc = 0.0; int mini = 0; for (; mini < localCaret && distance - acc > widthLimit; ++mini) { acc += font.getCharWidth(local.codePointAt(mini), option); }//from ww w . j a va2 s .c om displayOffset += mini; } assert displayOffset < caretPos; }
From source file:org.apache.pdfbox.text.TextPosition.java
/** * Combine the diacritic, for example, convert non-combining diacritic characters to their * combining counterparts.// www . ja v a2 s. c o m * * @param str String to normalize * @return Normalized string */ private String combineDiacritic(String str) { // Unicode contains special combining forms of the diacritic characters which we want to use int codePoint = str.codePointAt(0); // convert the characters not defined in the Unicode spec if (DIACRITICS.containsKey(codePoint)) { return DIACRITICS.get(codePoint); } else { return Normalizer.normalize(str, Normalizer.Form.NFKC).trim(); } }
From source file:org.sejda.sambox.text.TextPosition.java
/** * Combine the diacritic, for example, convert non-combining diacritic characters to their combining counterparts. * * @param str String to normalize//from w ww . j a v a 2s .c o m * @return Normalized string */ private String combineDiacritic(String str) { // Unicode contains special combining forms of the diacritic characters which we want to use int codePoint = str.codePointAt(0); // convert the characters not defined in the Unicode spec if (DIACRITICS.containsKey(codePoint)) { return DIACRITICS.get(codePoint); } return Normalizer.normalize(str, Normalizer.Form.NFKC).trim(); }
From source file:org.omegat.tokenizer.BaseTokenizer.java
protected Token[] tokenizeByCodePoint(String strOrig) { // See http://www.ibm.com/developerworks/library/j-unicode/#1-5 // Example 1-5 appears to be faster than 1-6 for us (because our strings are short?) Token[] tokens = new Token[strOrig.codePointCount(0, strOrig.length())]; for (int cp, i = 0, j = 0; i < strOrig.length(); i += Character.charCount(cp)) { cp = strOrig.codePointAt(i); tokens[j++] = new Token(String.valueOf(Character.toChars(cp)), i); }//from w ww. ja v a 2 s .c o m return tokens; }
From source file:org.omegat.tokenizer.BaseTokenizer.java
protected String[] tokenizeByCodePointToStrings(String strOrig) { // See http://www.ibm.com/developerworks/library/j-unicode/#1-5 // Example 1-5 appears to be faster than 1-6 for us (because our strings are short?) String[] tokens = new String[strOrig.codePointCount(0, strOrig.length())]; for (int cp, i = 0, j = 0; i < strOrig.length(); i += Character.charCount(cp)) { cp = strOrig.codePointAt(i); tokens[j++] = String.valueOf(Character.toChars(cp)); }//from ww w . ja v a2 s .c o m return tokens; }
From source file:org.omegat.tokenizer.BaseTokenizer.java
private boolean acceptToken(String token, boolean filterDigits, boolean filterWhitespace) { if (StringUtil.isEmpty(token)) { return false; }//from ww w . j ava 2 s .c om if (!filterDigits && !filterWhitespace) { return true; } boolean isWhitespaceOnly = true; for (int i = 0, cp; i < token.length(); i += Character.charCount(cp)) { cp = token.codePointAt(i); if (filterDigits && Character.isDigit(cp)) { return false; } if (filterWhitespace && !StringUtil.isWhiteSpace(cp)) { isWhitespaceOnly = false; } } return !(filterWhitespace && isWhitespaceOnly); }
From source file:org.spout.api.chat.ChatArguments.java
/** * Splits this ChatArguments instance into sections * * @param type How these arguments are to be split into sections * @return The split sections/* w w w .j ava 2 s. co m*/ */ public List<ChatSection> toSections(SplitType type) { List<ChatSection> sections = new ArrayList<ChatSection>(); StringBuilder currentWord = new StringBuilder(); LinkedHashMap<Integer, List<ChatStyle>> map; switch (type) { case WORD: map = new LinkedHashMap<Integer, List<ChatStyle>>(); int curIndex = 0; for (Object obj : getExpandedPlaceholders()) { if (obj instanceof ChatStyle) { ChatStyle style = (ChatStyle) obj; List<ChatStyle> list = map.get(curIndex); if (list == null) { list = new ArrayList<ChatStyle>(); map.put(curIndex, list); } ChatSectionUtils.removeConflicting(list, style); list.add(style); } else { String val = String.valueOf(obj); for (int i = 0; i < val.length(); ++i) { int codePoint = val.codePointAt(i); if (Character.isWhitespace(codePoint)) { sections.add(new ChatSectionImpl(type, new LinkedHashMap<Integer, List<ChatStyle>>(map), currentWord.toString())); curIndex = 0; currentWord = new StringBuilder(); if (map.size() > 0) { final List<ChatStyle> previousStyles = map.containsKey(-1) ? new ArrayList<ChatStyle>(map.get(-1)) : new ArrayList<ChatStyle>(); for (Map.Entry<Integer, List<ChatStyle>> entry : map.entrySet()) { if (entry.getKey() != -1) { for (ChatStyle style : entry.getValue()) { ChatSectionUtils.removeConflicting(previousStyles, style); previousStyles.add(style); } } } map.clear(); map.put(-1, previousStyles); } } else { currentWord.append(val.substring(i, i + 1)); curIndex++; } } } } if (currentWord.length() > 0) { sections.add(new ChatSectionImpl(type, map, currentWord.toString())); } break; case STYLE_CHANGE: StringBuilder curSection = new StringBuilder(); List<ChatStyle> activeStyles = new ArrayList<ChatStyle>(3); for (Object obj : getExpandedPlaceholders()) { if (obj instanceof ChatStyle) { ChatStyle style = (ChatStyle) obj; ChatSectionUtils.removeConflicting(activeStyles, style); activeStyles.add(style); map = new LinkedHashMap<Integer, List<ChatStyle>>(); map.put(-1, new ArrayList<ChatStyle>(activeStyles)); sections.add(new ChatSectionImpl(type, map, curSection.toString())); curSection = new StringBuilder(); } else { curSection.append(obj); } } break; case ALL: return Collections.<ChatSection>singletonList( new ChatSectionImpl(getSplitType(), getActiveStyles(), getPlainString())); default: throw new IllegalArgumentException("Unknown SplitOption " + type + "!"); } return sections; }
From source file:net.sf.jabref.logic.formatter.bibtexfields.UnicodeToLatexFormatter.java
@Override public String format(String text) { String result = Objects.requireNonNull(text); if (result.isEmpty()) { return result; }/* w w w . jav a 2s . c o m*/ // Standard symbols for (Map.Entry<String, String> unicodeLatexPair : HTMLUnicodeConversionMaps.UNICODE_LATEX_CONVERSION_MAP .entrySet()) { result = result.replace(unicodeLatexPair.getKey(), unicodeLatexPair.getValue()); } // Combining accents StringBuilder sb = new StringBuilder(); boolean consumed = false; for (int i = 0; i <= (result.length() - 2); i++) { if (!consumed && (i < (result.length() - 1))) { int cpCurrent = result.codePointAt(i); Integer cpNext = result.codePointAt(i + 1); String code = HTMLUnicodeConversionMaps.ESCAPED_ACCENTS.get(cpNext); if (code == null) { sb.append((char) cpCurrent); } else { sb.append("{\\").append(code).append('{').append((char) cpCurrent).append("}}"); consumed = true; } } else { consumed = false; } } if (!consumed) { sb.append((char) result.codePointAt(result.length() - 1)); } result = sb.toString(); // Check if any symbols is not converted for (int i = 0; i <= (result.length() - 1); i++) { int cp = result.codePointAt(i); if (cp >= 129) { LOGGER.warn("Unicode character not converted: " + cp); } } return result; }
From source file:org.opencms.search.solr.spellchecking.CmsSolrSpellchecker.java
/** * Converts the suggestions from the Solrj format to JSON format. * * @param response The SpellCheckResponse object containing the spellcheck results. * @return The spellcheck suggestions as JSON object or null if something goes wrong. *//*from w w w . j av a2 s . c o m*/ private JSONObject getConvertedResponseAsJson(SpellCheckResponse response) { if (null == response) { return null; } final JSONObject suggestions = new JSONObject(); final Map<String, Suggestion> solrSuggestions = response.getSuggestionMap(); // Add suggestions to the response for (final String key : solrSuggestions.keySet()) { // Indicator to ignore words that are erroneously marked as misspelled. boolean ignoreWord = false; // Suggestions that are in the form "Xxxx" -> "xxxx" should be ignored. if (Character.isUpperCase(key.codePointAt(0))) { final String lowercaseKey = key.toLowerCase(); // If the suggestion map doesn't contain the lowercased word, ignore this entry. if (!solrSuggestions.containsKey(lowercaseKey)) { ignoreWord = true; } } if (!ignoreWord) { try { // Get suggestions as List final List<String> l = solrSuggestions.get(key).getAlternatives(); suggestions.put(key, l); } catch (JSONException e) { LOG.debug("Exception while converting Solr spellcheckresponse to JSON. ", e); } } } return suggestions; }