Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pdfbox.encoding; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.MissingResourceException; import java.util.StringTokenizer; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.pdmodel.common.COSObjectable; import org.apache.pdfbox.util.ResourceLoader; /** * This is an interface to a text encoder. * * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a> * @version $Revision: 1.15 $ */ public abstract class Encoding implements COSObjectable { /** * Log instance. */ private static final Log LOG = LogFactory.getLog(Encoding.class); /** Identifies a non-mapped character. */ public static final String NOTDEF = ".notdef"; /** * This is a mapping from a character code to a character name. */ protected final Map<Integer, String> codeToName = new HashMap<Integer, String>(); /** * This is a mapping from a character name to a character code. */ protected final Map<String, Integer> nameToCode = new HashMap<String, Integer>(); private static final Map<String, String> NAME_TO_CHARACTER = new HashMap<String, String>(); private static final Map<String, String> CHARACTER_TO_NAME = new HashMap<String, String>(); static { //Loads the official Adobe Glyph List loadGlyphList("org/apache/pdfbox/resources/glyphlist.txt"); //Loads some additional glyph mappings loadGlyphList("org/apache/pdfbox/resources/additional_glyphlist.txt"); // Load an external glyph list file that user can give as JVM property String location = System.getProperty("glyphlist_ext"); if (location != null) { File external = new File(location); if (external.exists()) { loadGlyphList(location); } } NAME_TO_CHARACTER.put(NOTDEF, ""); NAME_TO_CHARACTER.put("fi", "fi"); NAME_TO_CHARACTER.put("fl", "fl"); NAME_TO_CHARACTER.put("ffi", "ffi"); NAME_TO_CHARACTER.put("ff", "ff"); NAME_TO_CHARACTER.put("pi", "pi"); for (Map.Entry<String, String> entry : NAME_TO_CHARACTER.entrySet()) { CHARACTER_TO_NAME.put(entry.getValue(), entry.getKey()); } } /** * Loads a glyph list from a given location and populates the NAME_TO_CHARACTER hashmap * for character lookups. * @param location - The string location of the glyphlist file */ private static void loadGlyphList(String location) { BufferedReader glyphStream = null; try { InputStream resource = ResourceLoader.loadResource(location); if (resource == null) { throw new MissingResourceException("Glyphlist not found: " + location, Encoding.class.getName(), location); } glyphStream = new BufferedReader(new InputStreamReader(resource)); String line = null; while ((line = glyphStream.readLine()) != null) { line = line.trim(); //lines starting with # are comments which we can ignore. if (!line.startsWith("#")) { int semicolonIndex = line.indexOf(';'); if (semicolonIndex >= 0) { String unicodeValue = null; try { String characterName = line.substring(0, semicolonIndex); unicodeValue = line.substring(semicolonIndex + 1, line.length()); StringTokenizer tokenizer = new StringTokenizer(unicodeValue, " ", false); StringBuilder value = new StringBuilder(); while (tokenizer.hasMoreTokens()) { int characterCode = Integer.parseInt(tokenizer.nextToken(), 16); value.append((char) characterCode); } if (NAME_TO_CHARACTER.containsKey(characterName)) { LOG.warn("duplicate value for characterName=" + characterName + "," + value); } else { NAME_TO_CHARACTER.put(characterName, value.toString()); } } catch (NumberFormatException nfe) { LOG.error("malformed unicode value " + unicodeValue, nfe); } } } } } catch (IOException io) { LOG.error("error while reading the glyph list.", io); } finally { if (glyphStream != null) { try { glyphStream.close(); } catch (IOException e) { LOG.error("error when closing the glyph list.", e); } } } } /** * Returns an unmodifiable view of the Code2Name mapping. * @return the Code2Name map */ public Map<Integer, String> getCodeToNameMap() { return Collections.unmodifiableMap(codeToName); } /** * Returns an unmodifiable view of the Name2Code mapping. * @return the Name2Code map */ public Map<String, Integer> getNameToCodeMap() { return Collections.unmodifiableMap(nameToCode); } /** * This will add a character encoding. * * @param code The character code that matches the character. * @param name The name of the character. */ public void addCharacterEncoding(int code, String name) { codeToName.put(code, name); nameToCode.put(name, code); } /** * This will get the character code for the name. * * @param name The name of the character. * * @return The code for the character. * * @throws IOException If there is no character code for the name. */ public int getCode(String name) throws IOException { Integer code = nameToCode.get(name); if (code == null) { throw new IOException("No character code for character name '" + name + "'"); } return code; } /** * This will take a character code and get the name from the code. * * @param code The character code. * * @return The name of the character. * * @throws IOException If there is no name for the code. */ public String getName(int code) throws IOException { return codeToName.get(code); } /** * This will take a character code and get the name from the code. * * @param c The character. * * @return The name of the character. * * @throws IOException If there is no name for the character. */ public String getNameFromCharacter(char c) throws IOException { String name = CHARACTER_TO_NAME.get(Character.toString(c)); if (name == null) { throw new IOException("No name for character '" + c + "'"); } return name; } /** * This will get the character from the code. * * @param code The character code. * * @return The printable character for the code. * * @throws IOException If there is not name for the character. */ public String getCharacter(int code) throws IOException { String name = getName(code); if (name != null) { return getCharacter(getName(code)); } return null; } /** * This will get the character from the name. * * @param name The name of the character. * * @return The printable character for the code. */ public String getCharacter(String name) { String character = NAME_TO_CHARACTER.get(name); if (character == null) { // test if we have a suffix and if so remove it if (name.indexOf('.') > 0) { character = getCharacter(name.substring(0, name.indexOf('.'))); } // test for Unicode name // (uniXXXX - XXXX must be a multiple of four; // each representing a hexadecimal Unicode code point) else if (name.startsWith("uni")) { int nameLength = name.length(); StringBuilder uniStr = new StringBuilder(); try { for (int chPos = 3; chPos + 4 <= nameLength; chPos += 4) { int characterCode = Integer.parseInt(name.substring(chPos, chPos + 4), 16); if (characterCode > 0xD7FF && characterCode < 0xE000) { LOG.warn("Unicode character name with not allowed code area: " + name); } else { uniStr.append((char) characterCode); } } character = uniStr.toString(); NAME_TO_CHARACTER.put(name, character); } catch (NumberFormatException nfe) { LOG.warn("Not a number in Unicode character name: " + name); character = name; } } // test for an alternate Unicode name representation else if (name.startsWith("u")) { try { int characterCode = Integer.parseInt(name.substring(1), 16); if (characterCode > 0xD7FF && characterCode < 0xE000) { LOG.warn("Unicode character name with not allowed code area: " + name); } else { character = String.valueOf((char) characterCode); NAME_TO_CHARACTER.put(name, character); } } catch (NumberFormatException nfe) { LOG.warn("Not a number in Unicode character name: " + name); character = name; } } else if (nameToCode.containsKey(name)) { int code = nameToCode.get(name); character = Character.toString((char) code); } else { character = name; } } return character; } }