Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pdfbox.pdmodel.font; import java.awt.geom.GeneralPath; import java.io.IOException; import java.util.HashSet; import java.util.Map; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.fontbox.FontBoxFont; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.font.encoding.DictionaryEncoding; import org.apache.pdfbox.pdmodel.font.encoding.Encoding; import org.apache.pdfbox.pdmodel.font.encoding.GlyphList; import org.apache.pdfbox.pdmodel.font.encoding.MacRomanEncoding; import org.apache.pdfbox.pdmodel.font.encoding.StandardEncoding; import org.apache.pdfbox.pdmodel.font.encoding.WinAnsiEncoding; /** * A simple font. Simple fonts use a PostScript encoding vector. * * @author John Hewson */ public abstract class PDSimpleFont extends PDFont { private static final Log LOG = LogFactory.getLog(PDSimpleFont.class); protected Encoding encoding; protected GlyphList glyphList; private Boolean isSymbolic; private final Set<Integer> noUnicode = new HashSet<>(); // for logging /** * Constructor for embedding. */ PDSimpleFont() { super(); } /** * Constructor for Standard 14. */ PDSimpleFont(String baseFont) { super(baseFont); // assign the glyph list based on the font if ("ZapfDingbats".equals(baseFont)) { glyphList = GlyphList.getZapfDingbats(); } else { glyphList = GlyphList.getAdobeGlyphList(); } } /** * Constructor. * * @param fontDictionary Font dictionary. */ PDSimpleFont(COSDictionary fontDictionary) throws IOException { super(fontDictionary); } /** * Reads the Encoding from the Font dictionary or the embedded or substituted font file. * Must be called at the end of any subclass constructors. * * @throws IOException if the font file could not be read */ protected void readEncoding() throws IOException { COSBase encoding = dict.getDictionaryObject(COSName.ENCODING); if (encoding != null) { if (encoding instanceof COSName) { COSName encodingName = (COSName) encoding; this.encoding = Encoding.getInstance(encodingName); if (this.encoding == null) { LOG.warn("Unknown encoding: " + encodingName.getName()); this.encoding = readEncodingFromFont(); // fallback } } else if (encoding instanceof COSDictionary) { COSDictionary encodingDict = (COSDictionary) encoding; Encoding builtIn = null; Boolean symbolic = getSymbolicFlag(); boolean isFlaggedAsSymbolic = symbolic != null && symbolic; COSName baseEncoding = encodingDict.getCOSName(COSName.BASE_ENCODING); boolean hasValidBaseEncoding = baseEncoding != null && Encoding.getInstance(baseEncoding) != null; if (!hasValidBaseEncoding && isFlaggedAsSymbolic) { builtIn = readEncodingFromFont(); } if (symbolic == null) { symbolic = false; } this.encoding = new DictionaryEncoding(encodingDict, !symbolic, builtIn); } } else { this.encoding = readEncodingFromFont(); } // normalise the standard 14 name, e.g "Symbol,Italic" -> "Symbol" String standard14Name = Standard14Fonts.getMappedFontName(getName()); // assign the glyph list based on the font if ("ZapfDingbats".equals(standard14Name)) { glyphList = GlyphList.getZapfDingbats(); } else { // StandardEncoding and Symbol are in the AGL glyphList = GlyphList.getAdobeGlyphList(); } } /** * Called by readEncoding() if the encoding needs to be extracted from the font file. * * @throws IOException if the font file could not be read. */ protected abstract Encoding readEncodingFromFont() throws IOException; /** * Returns the Encoding vector. */ public Encoding getEncoding() { return encoding; } /** * Returns the Encoding vector. */ public GlyphList getGlyphList() { return glyphList; } /** * Returns true the font is a symbolic (that is, it does not use the Adobe Standard Roman * character set). */ public final boolean isSymbolic() { if (isSymbolic == null) { Boolean result = isFontSymbolic(); if (result != null) { isSymbolic = result; } else { // unless we can prove that the font is non-symbolic, we assume that it is not isSymbolic = true; } } return isSymbolic; } /** * Internal implementation of isSymbolic, allowing for the fact that the result may be * indeterminate. */ protected Boolean isFontSymbolic() { Boolean result = getSymbolicFlag(); if (result != null) { return result; } else if (isStandard14()) { String mappedName = Standard14Fonts.getMappedFontName(getName()); return mappedName.equals("Symbol") || mappedName.equals("ZapfDingbats"); } else { if (encoding == null) { // sanity check, should never happen if (!(this instanceof PDTrueTypeFont)) { throw new IllegalStateException("PDFBox bug: encoding should not be null!"); } // TTF without its non-symbolic flag set must be symbolic return true; } else if (encoding instanceof WinAnsiEncoding || encoding instanceof MacRomanEncoding || encoding instanceof StandardEncoding) { return false; } else if (encoding instanceof DictionaryEncoding) { // each name in Differences array must also be in the latin character set for (String name : ((DictionaryEncoding) encoding).getDifferences().values()) { if (".notdef".equals(name)) { // skip } else if (!(WinAnsiEncoding.INSTANCE.contains(name) && MacRomanEncoding.INSTANCE.contains(name) && StandardEncoding.INSTANCE.contains(name))) { return true; } } return false; } else { // we don't know return null; } } } /** * Returns the value of the symbolic flag, allowing for the fact that the result may be * indeterminate. */ protected final Boolean getSymbolicFlag() { if (getFontDescriptor() != null) { // fixme: isSymbolic() defaults to false if the flag is missing so we can't trust this return getFontDescriptor().isSymbolic(); } return null; } @Override public String toUnicode(int code) throws IOException { return toUnicode(code, GlyphList.getAdobeGlyphList()); } @Override public String toUnicode(int code, GlyphList customGlyphList) throws IOException { // allow the glyph list to be overridden for the purpose of extracting Unicode // we only do this when the font's glyph list is the AGL, to avoid breaking Zapf Dingbats GlyphList unicodeGlyphList; if (this.glyphList == GlyphList.getAdobeGlyphList()) { unicodeGlyphList = customGlyphList; } else { unicodeGlyphList = this.glyphList; } // first try to use a ToUnicode CMap String unicode = super.toUnicode(code); if (unicode != null) { return unicode; } // if the font is a "simple font" and uses MacRoman/MacExpert/WinAnsi[Encoding] // or has Differences with names from only Adobe Standard and/or Symbol, then: // // a) Map the character codes to names // b) Look up the name in the Adobe Glyph List to obtain the Unicode value String name = null; if (encoding != null) { name = encoding.getName(code); unicode = unicodeGlyphList.toUnicode(name); if (unicode != null) { return unicode; } } // if no value has been produced, there is no way to obtain Unicode for the character. if (LOG.isWarnEnabled() && !noUnicode.contains(code)) { // we keep track of which warnings have been issued, so we don't log multiple times noUnicode.add(code); if (name != null) { LOG.warn("No Unicode mapping for " + name + " (" + code + ") in font " + getName()); } else { LOG.warn("No Unicode mapping for character code " + code + " in font " + getName()); } } return null; } @Override public boolean isVertical() { return false; } @Override protected final float getStandard14Width(int code) { if (getStandard14AFM() != null) { String nameInAFM = getEncoding().getName(code); // the Adobe AFMs don't include .notdef, but Acrobat uses 250, test with PDFBOX-2334 if (".notdef".equals(nameInAFM)) { return 250f; } return getStandard14AFM().getCharacterWidth(nameInAFM); } throw new IllegalStateException("No AFM"); } @Override public boolean isStandard14() { // this logic is based on Acrobat's behaviour, see PDFBOX-2372 // the Encoding entry cannot have Differences if we want "standard 14" font handling if (getEncoding() instanceof DictionaryEncoding) { DictionaryEncoding dictionary = (DictionaryEncoding) getEncoding(); if (dictionary.getDifferences().size() > 0) { // we also require that the differences are actually different, see PDFBOX-1900 with // the file from PDFBOX-2192 on Windows Encoding baseEncoding = dictionary.getBaseEncoding(); for (Map.Entry<Integer, String> entry : dictionary.getDifferences().entrySet()) { if (!entry.getValue().equals(baseEncoding.getName(entry.getKey()))) { return false; } } } } return super.isStandard14(); } protected boolean isNonZeroBoundingBox(PDRectangle bbox) { return bbox != null && (Float.compare(bbox.getLowerLeftX(), 0) != 0 || Float.compare(bbox.getLowerLeftY(), 0) != 0 || Float.compare(bbox.getUpperRightX(), 0) != 0 || Float.compare(bbox.getUpperRightY(), 0) != 0); } /** * Returns the path for the character with the given name. For some fonts, GIDs may be used * instead of names when calling this method. * * @return glyph path * @throws IOException if the path could not be read */ public abstract GeneralPath getPath(String name) throws IOException; /** * Returns true if the font contains the character with the given name. * * @throws IOException if the path could not be read */ public abstract boolean hasGlyph(String name) throws IOException; /** * Returns the embedded or system font used for rendering. This is never null. */ public abstract FontBoxFont getFontBoxFont(); @Override public void addToSubset(int codePoint) { throw new UnsupportedOperationException(); } @Override public void subset() throws IOException { // only TTF subsetting via PDType0Font is currently supported throw new UnsupportedOperationException(); } @Override public boolean willBeSubset() { return false; } @Override public boolean hasExplicitWidth(int code) throws IOException { if (dict.containsKey(COSName.WIDTHS)) { int firstChar = dict.getInt(COSName.FIRST_CHAR, -1); if (code >= firstChar && code - firstChar < getWidths().size()) { return true; } } return false; } }