Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* $Id: CharacterSetBuilder.java 1338605 2012-05-15 09:07:02Z mehdi $ */ package org.apache.fop.afp.fonts; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.WeakHashMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.xmlgraphics.image.loader.util.SoftMapCache; import org.apache.fop.afp.AFPConstants; import org.apache.fop.afp.AFPEventProducer; import org.apache.fop.afp.util.ResourceAccessor; import org.apache.fop.afp.util.StructuredFieldReader; import org.apache.fop.fonts.Typeface; /** * The CharacterSetBuilder is responsible building the a CharacterSet instance that holds * the font metric data. The data is either read from disk and passed to a CharacterSet (*) * or a FopCharacterSet is instantiated that is composed of a Typeface instance configured * with this data.<p/> * -*- For referenced fonts CharacterSetBuilder is responsible for reading the font attributes * from binary code page files and the character set metric files. In IBM font structure, a * code page maps each character of text to the characters in a character set. * Each character is translated into a code point. When the character is * printed, each code point is matched to a character ID on the code page * specified. The character ID is then matched to the image (raster pattern or * outline pattern) of the character in the character set specified. The image * in the character set is the image that is printed in the document. To be a * valid code page for a particular character set, all character IDs in the code * page must be included in that character set. <p/>This class will read the * font information from the binary code page files and character set metric * files in order to determine the correct metrics to use when rendering the * formatted object. <p/> * */ public abstract class CharacterSetBuilder { /** * Static logging instance */ protected static final Log LOG = LogFactory.getLog(CharacterSetBuilder.class); /** * Template used to convert lists to arrays. */ private static final CharacterSetOrientation[] EMPTY_CSO_ARRAY = new CharacterSetOrientation[0]; /** Codepage MO:DCA structured field. */ private static final byte[] CODEPAGE_SF = new byte[] { (byte) 0xD3, (byte) 0xA8, (byte) 0x87 }; /** Character table MO:DCA structured field. */ private static final byte[] CHARACTER_TABLE_SF = new byte[] { (byte) 0xD3, (byte) 0x8C, (byte) 0x87 }; /** Font descriptor MO:DCA structured field. */ private static final byte[] FONT_DESCRIPTOR_SF = new byte[] { (byte) 0xD3, (byte) 0xA6, (byte) 0x89 }; /** Font control MO:DCA structured field. */ private static final byte[] FONT_CONTROL_SF = new byte[] { (byte) 0xD3, (byte) 0xA7, (byte) 0x89 }; /** Font orientation MO:DCA structured field. */ private static final byte[] FONT_ORIENTATION_SF = new byte[] { (byte) 0xD3, (byte) 0xAE, (byte) 0x89 }; /** Font position MO:DCA structured field. */ private static final byte[] FONT_POSITION_SF = new byte[] { (byte) 0xD3, (byte) 0xAC, (byte) 0x89 }; /** Font index MO:DCA structured field. */ private static final byte[] FONT_INDEX_SF = new byte[] { (byte) 0xD3, (byte) 0x8C, (byte) 0x89 }; /** * The collection of code pages */ private final Map<String, Map<String, String>> codePagesCache = Collections .synchronizedMap(new WeakHashMap<String, Map<String, String>>()); /** * Cache of charactersets */ private final SoftMapCache characterSetsCache = new SoftMapCache(true); /** Default constructor. */ private CharacterSetBuilder() { } /** * Factory method for the single-byte implementation of AFPFontReader. * @return AFPFontReader */ public static CharacterSetBuilder getSingleByteInstance() { return SingleByteLoader.getInstance(); } /** * Factory method for the double-byte (CID Keyed font (Type 0)) implementation of AFPFontReader. * @return AFPFontReader */ public static CharacterSetBuilder getDoubleByteInstance() { return DoubleByteLoader.getInstance(); } /** * Returns an InputStream to a given file path and filename * * * @param accessor the resource accessor * @param filename the file name * @param eventProducer for handling AFP related events * @return an inputStream * * @throws IOException in the event that an I/O exception of some sort has occurred */ protected InputStream openInputStream(ResourceAccessor accessor, String filename, AFPEventProducer eventProducer) throws IOException { URI uri; try { uri = new URI(filename.trim()); } catch (URISyntaxException e) { throw new FileNotFoundException("Invalid filename: " + filename + " (" + e.getMessage() + ")"); } if (LOG.isDebugEnabled()) { LOG.debug("Opening " + uri); } InputStream inputStream = accessor.createInputStream(uri); return inputStream; } /** * Closes the inputstream * * @param inputStream the inputstream to close */ protected void closeInputStream(InputStream inputStream) { try { if (inputStream != null) { inputStream.close(); } } catch (Exception ex) { // Lets log at least! LOG.error(ex.getMessage()); } } /** * Load the font details and metrics into the CharacterSetMetric object, this will use the * actual afp code page and character set files to load the object with the necessary metrics. * * @param characterSetName name of the characterset * @param codePageName name of the code page file * @param encoding encoding name * @param accessor used to load codepage and characterset * @param eventProducer for handling AFP related events * @return CharacterSet object * @throws IOException if an I/O error occurs */ public CharacterSet buildSBCS(String characterSetName, String codePageName, String encoding, ResourceAccessor accessor, AFPEventProducer eventProducer) throws IOException { return processFont(characterSetName, codePageName, encoding, CharacterSetType.SINGLE_BYTE, accessor, eventProducer); } /** * Load the font details and metrics into the CharacterSetMetric object, this will use the * actual afp code page and character set files to load the object with the necessary metrics. * This method is to be used for double byte character sets (DBCS). * * @param characterSetName name of the characterset * @param codePageName name of the code page file * @param encoding encoding name * @param charsetType the characterset type * @param accessor used to load codepage and characterset * @param eventProducer for handling AFP related events * @return CharacterSet object * @throws IOException if an I/O error occurs */ public CharacterSet buildDBCS(String characterSetName, String codePageName, String encoding, CharacterSetType charsetType, ResourceAccessor accessor, AFPEventProducer eventProducer) throws IOException { return processFont(characterSetName, codePageName, encoding, charsetType, accessor, eventProducer); } /** * Load the font details and metrics into the CharacterSetMetric object, this will use the * actual afp code page and character set files to load the object with the necessary metrics. * * @param characterSetName the CharacterSetMetric object to populate * @param codePageName the name of the code page to use * @param encoding name of the encoding in use * @param typeface base14 font name * @param eventProducer for handling AFP related events * @return CharacterSet object * @throws IOException if an I/O error occurs */ public CharacterSet build(String characterSetName, String codePageName, String encoding, Typeface typeface, AFPEventProducer eventProducer) throws IOException { return new FopCharacterSet(codePageName, encoding, characterSetName, typeface, eventProducer); } private CharacterSet processFont(String characterSetName, String codePageName, String encoding, CharacterSetType charsetType, ResourceAccessor accessor, AFPEventProducer eventProducer) throws IOException { // check for cached version of the characterset String descriptor = characterSetName + "_" + encoding + "_" + codePageName; CharacterSet characterSet = (CharacterSet) characterSetsCache.get(descriptor); if (characterSet != null) { return characterSet; } // characterset not in the cache, so recreating characterSet = new CharacterSet(codePageName, encoding, charsetType, characterSetName, accessor, eventProducer); InputStream inputStream = null; try { /** * Get the code page which contains the character mapping * information to map the unicode character id to the graphic * chracter global identifier. */ Map<String, String> codePage; synchronized (codePagesCache) { codePage = codePagesCache.get(codePageName); if (codePage == null) { codePage = loadCodePage(codePageName, encoding, accessor, eventProducer); codePagesCache.put(codePageName, codePage); } } inputStream = openInputStream(accessor, characterSetName, eventProducer); StructuredFieldReader structuredFieldReader = new StructuredFieldReader(inputStream); // Process D3A689 Font Descriptor FontDescriptor fontDescriptor = processFontDescriptor(structuredFieldReader); characterSet.setNominalVerticalSize(fontDescriptor.getNominalFontSizeInMillipoints()); // Process D3A789 Font Control FontControl fontControl = processFontControl(structuredFieldReader); if (fontControl != null) { //process D3AE89 Font Orientation CharacterSetOrientation[] characterSetOrientations = processFontOrientation(structuredFieldReader); double metricNormalizationFactor; if (fontControl.isRelative()) { metricNormalizationFactor = 1; } else { int dpi = fontControl.getDpi(); metricNormalizationFactor = 1000.0d * 72000.0d / fontDescriptor.getNominalFontSizeInMillipoints() / dpi; } //process D3AC89 Font Position processFontPosition(structuredFieldReader, characterSetOrientations, metricNormalizationFactor); //process D38C89 Font Index (per orientation) for (int i = 0; i < characterSetOrientations.length; i++) { processFontIndex(structuredFieldReader, characterSetOrientations[i], codePage, metricNormalizationFactor); characterSet.addCharacterSetOrientation(characterSetOrientations[i]); } } else { throw new IOException("Missing D3AE89 Font Control structured field."); } } finally { closeInputStream(inputStream); } characterSetsCache.put(descriptor, characterSet); return characterSet; } /** * Load the code page information from the appropriate file. The file name * to load is determined by the code page name and the file extension 'CDP'. * * @param codePage * the code page identifier * @param encoding * the encoding to use for the character decoding * @param accessor the resource accessor * @param eventProducer for handling AFP related events * @return a code page mapping (key: GCGID, value: Unicode character) * @throws IOException if an I/O exception of some sort has occurred. */ protected Map<String, String> loadCodePage(String codePage, String encoding, ResourceAccessor accessor, AFPEventProducer eventProducer) throws IOException { // Create the HashMap to store code page information Map<String, String> codePages = new HashMap<String, String>(); InputStream inputStream = null; try { inputStream = openInputStream(accessor, codePage.trim(), eventProducer); StructuredFieldReader structuredFieldReader = new StructuredFieldReader(inputStream); byte[] data = structuredFieldReader.getNext(CHARACTER_TABLE_SF); int position = 0; byte[] gcgiBytes = new byte[8]; byte[] charBytes = new byte[1]; // Read data, ignoring bytes 0 - 2 for (int index = 3; index < data.length; index++) { if (position < 8) { // Build the graphic character global identifier key gcgiBytes[position] = data[index]; position++; } else if (position == 9) { position = 0; // Set the character charBytes[0] = data[index]; String gcgiString = new String(gcgiBytes, AFPConstants.EBCIDIC_ENCODING); //Use the 8-bit char index to find the Unicode character using the Java encoding //given in the configuration. If the code page and the Java encoding don't //match, a wrong Unicode character will be associated with the AFP GCGID. //Idea: we could use IBM's GCGID to Unicode map and build code pages ourselves. String charString = new String(charBytes, encoding); codePages.put(gcgiString, charString); } else { position++; } } } catch (FileNotFoundException e) { eventProducer.codePageNotFound(this, e); } finally { closeInputStream(inputStream); } return codePages; } /** * Process the font descriptor details using the structured field reader. * * @param structuredFieldReader the structured field reader * @return a class representing the font descriptor * @throws IOException if an I/O exception of some sort has occurred. */ protected static FontDescriptor processFontDescriptor(StructuredFieldReader structuredFieldReader) throws IOException { byte[] fndData = structuredFieldReader.getNext(FONT_DESCRIPTOR_SF); return new FontDescriptor(fndData); } /** * Process the font control details using the structured field reader. * * @param structuredFieldReader * the structured field reader * @return the FontControl * @throws IOException if an I/O exception of some sort has occurred. */ protected FontControl processFontControl(StructuredFieldReader structuredFieldReader) throws IOException { byte[] fncData = structuredFieldReader.getNext(FONT_CONTROL_SF); FontControl fontControl = null; if (fncData != null) { fontControl = new FontControl(); if (fncData[7] == (byte) 0x02) { fontControl.setRelative(true); } int metricResolution = getUBIN(fncData, 9); if (metricResolution == 1000) { //Special case: 1000 units per em (rather than dpi) fontControl.setUnitsPerEm(1000); } else { fontControl.setDpi(metricResolution / 10); } } return fontControl; } /** * Process the font orientation details from using the structured field * reader. * * @param structuredFieldReader * the structured field reader * @return CharacterSetOrientation array * @throws IOException if an I/O exception of some sort has occurred. */ protected CharacterSetOrientation[] processFontOrientation(StructuredFieldReader structuredFieldReader) throws IOException { byte[] data = structuredFieldReader.getNext(FONT_ORIENTATION_SF); int position = 0; byte[] fnoData = new byte[26]; List<CharacterSetOrientation> orientations = new ArrayList<CharacterSetOrientation>(); // Read data, ignoring bytes 0 - 2 for (int index = 3; index < data.length; index++) { // Build the font orientation record fnoData[position] = data[index]; position++; if (position == 26) { position = 0; int orientation = determineOrientation(fnoData[2]); // Space Increment int space = ((fnoData[8] & 0xFF) << 8) + (fnoData[9] & 0xFF); // Em-Space Increment int em = ((fnoData[14] & 0xFF) << 8) + (fnoData[15] & 0xFF); CharacterSetOrientation cso = new CharacterSetOrientation(orientation); cso.setSpaceIncrement(space); cso.setEmSpaceIncrement(em); orientations.add(cso); } } return orientations.toArray(EMPTY_CSO_ARRAY); } /** * Populate the CharacterSetOrientation object in the suplied array with the * font position details using the supplied structured field reader. * * @param structuredFieldReader * the structured field reader * @param characterSetOrientations * the array of CharacterSetOrientation objects * @param metricNormalizationFactor factor to apply to the metrics to get normalized * font metric values * @throws IOException if an I/O exception of some sort has occurred. */ protected void processFontPosition(StructuredFieldReader structuredFieldReader, CharacterSetOrientation[] characterSetOrientations, double metricNormalizationFactor) throws IOException { byte[] data = structuredFieldReader.getNext(FONT_POSITION_SF); int position = 0; byte[] fpData = new byte[26]; int characterSetOrientationIndex = 0; // Read data, ignoring bytes 0 - 2 for (int index = 3; index < data.length; index++) { if (position < 22) { // Build the font orientation record fpData[position] = data[index]; if (position == 9) { CharacterSetOrientation characterSetOrientation = characterSetOrientations[characterSetOrientationIndex]; int xHeight = getSBIN(fpData, 2); int capHeight = getSBIN(fpData, 4); int ascHeight = getSBIN(fpData, 6); int dscHeight = getSBIN(fpData, 8); dscHeight = dscHeight * -1; characterSetOrientation.setXHeight((int) Math.round(xHeight * metricNormalizationFactor)); characterSetOrientation.setCapHeight((int) Math.round(capHeight * metricNormalizationFactor)); characterSetOrientation.setAscender((int) Math.round(ascHeight * metricNormalizationFactor)); characterSetOrientation.setDescender((int) Math.round(dscHeight * metricNormalizationFactor)); } } else if (position == 22) { position = 0; characterSetOrientationIndex++; fpData[position] = data[index]; } position++; } } /** * Process the font index details for the character set orientation. * * @param structuredFieldReader the structured field reader * @param cso the CharacterSetOrientation object to populate * @param codepage the map of code pages * @param metricNormalizationFactor factor to apply to the metrics to get normalized * font metric values * @throws IOException if an I/O exception of some sort has occurred. */ protected void processFontIndex(StructuredFieldReader structuredFieldReader, CharacterSetOrientation cso, Map<String, String> codepage, double metricNormalizationFactor) throws IOException { byte[] data = structuredFieldReader.getNext(FONT_INDEX_SF); int position = 0; byte[] gcgid = new byte[8]; byte[] fiData = new byte[20]; char lowest = 255; char highest = 0; String firstABCMismatch = null; // Read data, ignoring bytes 0 - 2 for (int index = 3; index < data.length; index++) { if (position < 8) { gcgid[position] = data[index]; position++; } else if (position < 27) { fiData[position - 8] = data[index]; position++; } else if (position == 27) { fiData[position - 8] = data[index]; position = 0; String gcgiString = new String(gcgid, AFPConstants.EBCIDIC_ENCODING); String idx = codepage.get(gcgiString); if (idx != null) { char cidx = idx.charAt(0); int width = getUBIN(fiData, 0); int a = getSBIN(fiData, 10); int b = getUBIN(fiData, 12); int c = getSBIN(fiData, 14); int abc = a + b + c; int diff = Math.abs(abc - width); if (diff != 0 && width != 0) { double diffPercent = 100 * diff / (double) width; if (diffPercent > 2) { if (LOG.isTraceEnabled()) { LOG.trace(gcgiString + ": " + a + " + " + b + " + " + c + " = " + (a + b + c) + " but found: " + width); } if (firstABCMismatch == null) { firstABCMismatch = gcgiString; } } } if (cidx < lowest) { lowest = cidx; } if (cidx > highest) { highest = cidx; } int normalizedWidth = (int) Math.round(width * metricNormalizationFactor); cso.setWidth(cidx, normalizedWidth); } } } cso.setFirstChar(lowest); cso.setLastChar(highest); if (LOG.isDebugEnabled() && firstABCMismatch != null) { //Debug level because it usually is no problem. LOG.debug("Font has metrics inconsitencies where A+B+C doesn't equal the" + " character increment. The first such character found: " + firstABCMismatch); } } private static int getUBIN(byte[] data, int start) { return ((data[start] & 0xFF) << 8) + (data[start + 1] & 0xFF); } private static int getSBIN(byte[] data, int start) { int ubin = ((data[start] & 0xFF) << 8) + (data[start + 1] & 0xFF); if ((ubin & 0x8000) != 0) { //extend sign return ubin | 0xFFFF0000; } else { return ubin; } } private class FontControl { private int dpi; private int unitsPerEm; private boolean isRelative = false; public int getDpi() { return dpi; } public void setDpi(int i) { dpi = i; } public int getUnitsPerEm() { return this.unitsPerEm; } public void setUnitsPerEm(int value) { this.unitsPerEm = value; } public boolean isRelative() { return isRelative; } public void setRelative(boolean b) { isRelative = b; } } private static class FontDescriptor { private byte[] data; public FontDescriptor(byte[] data) { this.data = data; } public int getNominalFontSizeInMillipoints() { int nominalFontSize = 100 * getUBIN(data, 39); return nominalFontSize; } } private static final class SingleByteLoader extends CharacterSetBuilder { private static final SingleByteLoader INSTANCE = new SingleByteLoader(); private SingleByteLoader() { super(); } private static SingleByteLoader getInstance() { return INSTANCE; } } /** * Double-byte (CID Keyed font (Type 0)) implementation of AFPFontReader. */ private static final class DoubleByteLoader extends CharacterSetBuilder { private static final DoubleByteLoader INSTANCE = new DoubleByteLoader(); private DoubleByteLoader() { } static DoubleByteLoader getInstance() { return INSTANCE; } protected Map<String, String> loadCodePage(String codePage, String encoding, ResourceAccessor accessor, AFPEventProducer eventProducer) throws IOException { // Create the HashMap to store code page information Map<String, String> codePages = new HashMap<String, String>(); InputStream inputStream = null; try { inputStream = openInputStream(accessor, codePage.trim(), eventProducer); StructuredFieldReader structuredFieldReader = new StructuredFieldReader(inputStream); byte[] data; while ((data = structuredFieldReader.getNext(CHARACTER_TABLE_SF)) != null) { int position = 0; byte[] gcgiBytes = new byte[8]; byte[] charBytes = new byte[2]; // Read data, ignoring bytes 0 - 2 for (int index = 3; index < data.length; index++) { if (position < 8) { // Build the graphic character global identifier key gcgiBytes[position] = data[index]; position++; } else if (position == 9) { // Set the character charBytes[0] = data[index]; position++; } else if (position == 10) { position = 0; // Set the character charBytes[1] = data[index]; String gcgiString = new String(gcgiBytes, AFPConstants.EBCIDIC_ENCODING); String charString = new String(charBytes, encoding); codePages.put(gcgiString, charString); } else { position++; } } } } catch (FileNotFoundException e) { eventProducer.codePageNotFound(this, e); } finally { closeInputStream(inputStream); } return codePages; } } private static int determineOrientation(byte orientation) { int degrees = 0; switch (orientation) { case 0x00: degrees = 0; break; case 0x2D: degrees = 90; break; case 0x5A: degrees = 180; break; case (byte) 0x87: degrees = 270; break; default: throw new IllegalStateException("Invalid orientation: " + orientation); } return degrees; } }