com.planbase.pdf.layoutmanager.PdfLayoutMgr.java Source code

Java tutorial

Introduction

Here is the source code for com.planbase.pdf.layoutmanager.PdfLayoutMgr.java

Source

// Copyright 2012-01-10 PlanBase Inc. & Glen Peterson
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.planbase.pdf.layoutmanager;

import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.edit.PDPageContentStream;
import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace;
import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB;
import org.apache.pdfbox.pdmodel.graphics.xobject.PDJpeg;
import org.apache.pdfbox.pdmodel.graphics.xobject.PDPixelMap;

import java.awt.Color;
import java.awt.image.BufferedImage;
import java.io.IOException;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 <p>The main class in this package; it handles page and line breaks.</p>
<h3>Usage (the unit test is a much better example):</h3>
<pre><code>// Create a new manager
PdfLayoutMgr pageMgr = PdfLayoutMgr.newRgbPageMgr();
LogicalPage lp = pageMgr.logicalPageStart();
// defaults to Landscape orientation
// call various lp.tableBuilder() or lp.put...() methods here.
// They will page-break and create extra physical pages as needed.
// ...
lp.commit();
lp = pageMgr.logicalPageStart(LogicalPage.Orientation.PORTRAIT);
// These pages will be in Portrait orientation
// call various lp methods to put things on the next page grouping
// ...
lp.commit();
// The file to write to
OutputStream os = new FileOutputStream("test.pdf");
// Commit all pages to output stream.
pageMgr.save(os);</code></pre>
<br>
<h3>Note:</h3>
<p>Because this class buffers and writes to an underlying stream, it is mutable, has side effects,
 and is NOT thread-safe!</p>
 */
public class PdfLayoutMgr {

    // private Logger logger = Logger.getLogger(PdfLayoutMgr.class);

    //        logger.info("Ascent: " + PDType1Font.HELVETICA.getFontDescriptor().getAscent());
    //        logger.info("StemH: " + PDType1Font.HELVETICA.getFontDescriptor().getStemH());
    //        logger.info("CapHeight: " + PDType1Font.HELVETICA.getFontDescriptor().getCapHeight());
    //        logger.info("XHeight: " + PDType1Font.HELVETICA.getFontDescriptor().getXHeight());
    //        logger.info("Descent: " + PDType1Font.HELVETICA.getFontDescriptor().getDescent());
    //        logger.info("Leading: " + PDType1Font.HELVETICA.getFontDescriptor().getLeading());
    //
    //        logger.info("Height: " + PDType1Font.HELVETICA.getFontDescriptor().getFontBoundingBox().getHeight());
    //
    //        Ascent:    718.0
    //        StemH:       0.0
    //        CapHeight: 718.0
    //        XHeight:   523.0
    //        Descent:  -207.0
    //        Leading:     0.0
    //        Height:   1156.0
    // CapHeight - descent = 925
    // 925 - descent = 1132 which is still less than 1156.
    // I'm going to make line-height =
    // Java FontMetrics says getHeight() = getAscent() + getDescent() + getLeading().
    // I think ascent and descent are compatible with this.  I'm going to make Leading be
    // -descent/2

    /**
     If you use no scaling when printing the output PDF, PDFBox shows approximately 72 
     Document-Units Per Inch.  This makes one pixel on an average desktop monitor correspond to
     roughly one document unit.  This is a useful constant for page layout math.
     */
    public static final float DOC_UNITS_PER_INCH = 72f;

    // TODO: add Sensible defaults, such as textStyle?
    //    private TextStyle textStyle;
    //    private PDRectangle pageDimensions;
    //    private Padding pageMargins;
    //    private PDRectangle printableArea;
    //
    //    public TextStyle textStyle() { return textStyle; }
    //    public PDRectangle pageDimensions() { return pageDimensions; }
    //    public Padding pageMargins() { return pageMargins; }
    //    public PDRectangle printableArea() { return printableArea; }

    // You can have many DrawJpegs backed by only a few images - it is a flyweight, and this
    // hash map keeps track of the few underlying images, even as intances of DrawJpeg
    // represent all the places where these images are used.
    // CRITICAL: This means that the the set of jpgs must be thrown out and created anew for each
    // document!  Thus, a private final field on the PdfLayoutMgr instead of DrawJpeg, and DrawJpeg
    // must be an inner class (or this would have to be package scoped).
    private final Map<BufferedImage, PDJpeg> jpegMap = new HashMap<BufferedImage, PDJpeg>();

    private PDJpeg ensureCached(final ScaledJpeg sj) {
        BufferedImage bufferedImage = sj.bufferedImage();
        PDJpeg temp = jpegMap.get(bufferedImage);
        if (temp == null) {
            try {
                temp = new PDJpeg(doc, bufferedImage);
            } catch (IOException ioe) {
                // can there ever be an exception here?  Doesn't it get written later?
                throw new IllegalStateException("Caught exception creating a PDJpeg from a bufferedImage", ioe);
            }
            jpegMap.put(bufferedImage, temp);
        }
        return temp;
    }

    // You can have many DrawPngs backed by only a few images - it is a flyweight, and this
    // hash map keeps track of the few underlying images, even as intances of DrawPng
    // represent all the places where these images are used.
    // CRITICAL: This means that the the set of jpgs must be thrown out and created anew for each
    // document!  Thus, a private final field on the PdfLayoutMgr instead of DrawPng, and DrawPng
    // must be an inner class (or this would have to be package scoped).
    private final Map<BufferedImage, PDPixelMap> pngMap = new HashMap<BufferedImage, PDPixelMap>();

    private PDPixelMap ensureCached(final ScaledPng sj) {
        BufferedImage bufferedImage = sj.bufferedImage();
        PDPixelMap temp = pngMap.get(bufferedImage);
        if (temp == null) {
            try {
                temp = new PDPixelMap(doc, bufferedImage);
            } catch (IOException ioe) {
                // can there ever be an exception here?  Doesn't it get written later?
                throw new IllegalStateException("Caught exception creating a PDPixelMap from a bufferedImage", ioe);
            }
            pngMap.put(bufferedImage, temp);
        }
        return temp;
    }

    /**
     * Please don't access this class directly if you don't have to.  It's a little bit like a model for stuff that
     * needs to be drawn on a page, but much more like a heap of random functionality that sort of landed in an
     * inner class.  This will probably be refactored away in future releases.
     */
    static class PageBuffer {
        public final int pageNum;
        private long lastOrd = 0;
        private final Set<PdfItem> items = new TreeSet<PdfItem>();

        private PageBuffer(int pn) {
            pageNum = pn;
        }

        void fillRect(final float xVal, final float yVal, final float w, final float h, final Color c,
                final float z) {
            items.add(FillRect.of(xVal, yVal, w, h, c, lastOrd++, z));
        }

        //        public void fillRect(final float xVal, final float yVal, final float w, final Color c,
        //                             final float h) {
        //            fillRect(xVal, yVal, w, h, c, PdfItem.DEFAULT_Z_INDEX);
        //        }
        //
        //        public void drawJpeg(final float xVal, final float yVal, final BufferedImage bi,
        //                             final PdfLayoutMgr mgr, final float z) {
        //            items.add(DrawJpeg.of(xVal, yVal, bi, mgr, lastOrd++, z));
        //        }

        void drawJpeg(final float xVal, final float yVal, final ScaledJpeg sj, final PdfLayoutMgr mgr) {
            items.add(DrawJpeg.of(xVal, yVal, sj, mgr, lastOrd++, PdfItem.DEFAULT_Z_INDEX));
        }

        void drawPng(final float xVal, final float yVal, final ScaledPng sj, final PdfLayoutMgr mgr) {
            items.add(DrawPng.of(xVal, yVal, sj, mgr, lastOrd++, PdfItem.DEFAULT_Z_INDEX));
        }

        private void drawLine(final float xa, final float ya, final float xb, final float yb, final LineStyle ls,
                final float z) {
            items.add(DrawLine.of(xa, ya, xb, yb, ls, lastOrd++, z));
        }

        void drawLine(final float xa, final float ya, final float xb, final float yb, final LineStyle ls) {
            drawLine(xa, ya, xb, yb, ls, PdfItem.DEFAULT_Z_INDEX);
        }

        private void drawStyledText(final float xCoord, final float yCoord, final String text, TextStyle s,
                final float z) {
            items.add(Text.of(xCoord, yCoord, text, s, lastOrd++, z));
        }

        void drawStyledText(final float xCoord, final float yCoord, final String text, TextStyle s) {
            drawStyledText(xCoord, yCoord, text, s, PdfItem.DEFAULT_Z_INDEX);
        }

        private void commit(PDPageContentStream stream) throws IOException {
            // Since items are z-ordered, then sub-ordered by entry-order, we will draw
            // everything in the correct order.
            for (PdfItem item : items) {
                item.commit(stream);
            }
        }

        private static class DrawLine extends PdfItem {
            private final float x1, y1, x2, y2;
            private final LineStyle style;

            private DrawLine(final float xa, final float ya, final float xb, final float yb, LineStyle s,
                    final long ord, final float z) {
                super(ord, z);
                x1 = xa;
                y1 = ya;
                x2 = xb;
                y2 = yb;
                style = s;
            }

            public static DrawLine of(final float xa, final float ya, final float xb, final float yb, LineStyle s,
                    final long ord, final float z) {
                return new DrawLine(xa, ya, xb, yb, s, ord, z);
            }

            @Override
            public void commit(PDPageContentStream stream) throws IOException {
                stream.setStrokingColor(style.color());
                stream.setLineWidth(style.width());
                stream.drawLine(x1, y1, x2, y2);
            }
        }

        private static class FillRect extends PdfItem {
            private final float x, y, width, height;
            private final Color color;

            private FillRect(final float xVal, final float yVal, final float w, final float h, final Color c,
                    final long ord, final float z) {
                super(ord, z);
                x = xVal;
                y = yVal;
                width = w;
                height = h;
                color = c;
            }

            public static FillRect of(final float xVal, final float yVal, final float w, final float h,
                    final Color c, final long ord, final float z) {
                return new FillRect(xVal, yVal, w, h, c, ord, z);
            }

            @Override
            public void commit(PDPageContentStream stream) throws IOException {
                stream.setNonStrokingColor(color);
                stream.fillRect(x, y, width, height);
            }
        }

        static class Text extends PdfItem {
            public final float x, y;
            public final String t;
            public final TextStyle style;

            private Text(final float xCoord, final float yCoord, final String text, TextStyle s, final long ord,
                    final float z) {
                super(ord, z);
                x = xCoord;
                y = yCoord;
                t = text;
                style = s;
            }

            public static Text of(final float xCoord, final float yCoord, final String text, TextStyle s,
                    final long ord, final float z) {
                return new Text(xCoord, yCoord, text, s, ord, z);
            }

            @Override
            public void commit(PDPageContentStream stream) throws IOException {
                stream.beginText();
                stream.setNonStrokingColor(style.textColor());
                stream.setFont(style.font(), style.fontSize());
                stream.moveTextPositionByAmount(x, y);
                stream.drawString(t);
                stream.endText();
            }
        }

        private static class DrawPng extends PdfItem {
            private final float x, y;
            private final PDPixelMap png;
            private final ScaledPng scaledPng;

            // private Log logger = LogFactory.getLog(DrawPng.class);

            private DrawPng(final float xVal, final float yVal, final ScaledPng sj, final PdfLayoutMgr mgr,
                    final long ord, final float z) {
                super(ord, z);
                x = xVal;
                y = yVal;
                png = mgr.ensureCached(sj);
                scaledPng = sj;
            }

            public static DrawPng of(final float xVal, final float yVal, final ScaledPng sj, final PdfLayoutMgr mgr,
                    final long ord, final float z) {
                return new DrawPng(xVal, yVal, sj, mgr, ord, z);
            }

            @Override
            public void commit(PDPageContentStream stream) throws IOException {
                // stream.drawImage(png, x, y);
                XyDim dim = scaledPng.dimensions();
                stream.drawXObject(png, x, y, dim.x(), dim.y());
            }
        }

        private static class DrawJpeg extends PdfItem {
            private final float x, y;
            private final PDJpeg jpeg;
            private final ScaledJpeg scaledJpeg;

            // private Log logger = LogFactory.getLog(DrawJpeg.class);

            private DrawJpeg(final float xVal, final float yVal, final ScaledJpeg sj, final PdfLayoutMgr mgr,
                    final long ord, final float z) {
                super(ord, z);
                x = xVal;
                y = yVal;
                jpeg = mgr.ensureCached(sj);
                scaledJpeg = sj;
            }

            public static DrawJpeg of(final float xVal, final float yVal, final ScaledJpeg sj,
                    final PdfLayoutMgr mgr, final long ord, final float z) {
                return new DrawJpeg(xVal, yVal, sj, mgr, ord, z);
            }

            @Override
            public void commit(PDPageContentStream stream) throws IOException {
                // stream.drawImage(jpeg, x, y);
                XyDim dim = scaledJpeg.dimensions();
                stream.drawXObject(jpeg, x, y, dim.x(), dim.y());
            }
        }
    }

    private final List<PageBuffer> pages = new ArrayList<PageBuffer>();
    private final PDDocument doc;

    // pages.size() counts the first page as 1, so 0 is the appropriate sentinel value
    private int unCommittedPageIdx = 0;

    private final PDColorSpace colorSpace;

    List<PageBuffer> pages() {
        return Collections.unmodifiableList(pages);
    }

    private PdfLayoutMgr(PDColorSpace cs) throws IOException {
        doc = new PDDocument();
        colorSpace = cs;
    }

    /**
     Returns a new PdfLayoutMgr with the given color space.
     @param cs the color-space.
     @return a new PdfLayoutMgr
     @throws IOException
     */
    public static PdfLayoutMgr of(PDColorSpace cs) throws IOException {
        return new PdfLayoutMgr(cs);
    }

    /**
     Creates a new PdfLayoutMgr with the PDDeviceRGB color space.
     @return a new Page Manager with an RGB color space
     @throws IOException
     */
    @SuppressWarnings("UnusedDeclaration") // Part of end-user public interface
    public static PdfLayoutMgr newRgbPageMgr() throws IOException {
        return new PdfLayoutMgr(PDDeviceRGB.INSTANCE);
    }

    /**
     Returns the correct page for the given value of y.  This lets the user use any Y value and
     we continue extending their canvas downward (negative) by adding extra pages.
     @param y the un-adjusted y value.
     @return the proper page and adjusted y value for that page.
     */
    LogicalPage.PageBufferAndY appropriatePage(LogicalPage lp, float y) {
        if (pages.size() < 1) {
            throw new IllegalStateException(
                    "Cannot work with the any pages until one has been created by calling newPage().");
        }
        int idx = unCommittedPageIdx;
        // Get the first possible page

        while (y < lp.yPageBottom()) {
            // logger.info("Adjusting y.  Was: " + y + " about to add " + printAreaHeight);
            y += lp.printAreaHeight(); // y could even be negative.  Just keep moving to the top of the next
            // page until it's in the printable area.
            idx++;
            if (pages.size() <= idx) {
                pages.add(new PageBuffer(pages.size() + 1));
            }
        }
        PageBuffer ps = pages.get(idx);
        return new LogicalPage.PageBufferAndY(ps, y);
    }

    /**
    Call this to commit the PDF information to the underlying stream after it is completely built.
    */
    public void save(OutputStream os) throws IOException, COSVisitorException {
        doc.save(os);
        doc.close();
    }

    // TODO: Add logicalPage() method and call pages.add() lazily for the first item actually shown on a page, and logicalPageEnd called before a save.
    // TODO: Add feature for different paper size or orientation for each group of logical pages.
    /**
     Tells this PdfLayoutMgr that you want to start a new logical page (which may be broken across
     two or more physical pages) in the requested page orientation.
     */
    @SuppressWarnings("UnusedDeclaration") // Part of end-user public interface
    public LogicalPage logicalPageStart(LogicalPage.Orientation o) {
        PageBuffer pb = new PageBuffer(pages.size() + 1);
        pages.add(pb);
        return LogicalPage.of(this, o);
    }

    /**
     Get a new logical page (which may be broken across two or more physical pages) in Landscape orientation.
     */
    public LogicalPage logicalPageStart() {
        return logicalPageStart(LogicalPage.Orientation.LANDSCAPE);
    }

    //    void addLogicalPage(PageBuffer pb) {
    //        pages.add(pb);
    //    }

    /**
     Call this when you are through with your current set of pages to commit all pending text and
     drawing operations.  This is the only method that throws an IOException because the purpose of
     PdfLayoutMgr is to buffer all operations until a page is complete so that it can safely be
     written to the underlying stream.  This method turns the potential pages into real output.
     Call when you need a page break, or your document is done and you need to write it out.
     @throws IOException - if there is a failure writing to the underlying stream.
     */
    @SuppressWarnings("UnusedDeclaration") // Part of end-user public interface
    void logicalPageEnd(LogicalPage lp) throws IOException {

        // Write out all uncommitted pages.
        while (unCommittedPageIdx < pages.size()) {
            PDPage pdPage = new PDPage();
            pdPage.setMediaBox(PDPage.PAGE_SIZE_LETTER);
            if (lp.orientation() == LogicalPage.Orientation.LANDSCAPE) {
                pdPage.setRotation(90);
            }
            PDPageContentStream stream = null;
            try {
                stream = new PDPageContentStream(doc, pdPage);
                doc.addPage(pdPage);

                if (lp.orientation() == LogicalPage.Orientation.LANDSCAPE) {
                    stream.concatenate2CTM(0, 1, -1, 0, lp.pageWidth(), 0);
                }
                stream.setStrokingColorSpace(colorSpace);
                stream.setNonStrokingColorSpace(colorSpace);

                PageBuffer pb = pages.get(unCommittedPageIdx);
                pb.commit(stream);
                lp.commitBorderItems(stream);

                stream.close();
                // Set to null to show that no exception was thrown and no need to close again.
                stream = null;
            } finally {
                // Let it throw an exception if the closing doesn't work.
                if (stream != null) {
                    stream.close();
                }
            }
            unCommittedPageIdx++;
        }
    }

    @Override
    public boolean equals(Object other) {
        // First, the obvious...
        if (this == other) {
            return true;
        }
        if (other == null) {
            return false;
        }
        if (!(other instanceof PdfLayoutMgr)) {
            return false;
        }
        // Details...
        final PdfLayoutMgr that = (PdfLayoutMgr) other;
        return this.doc.equals(that.doc) && (this.pages.equals(that.pages));
    }

    @Override
    public int hashCode() {
        return doc.hashCode() + pages.hashCode();
    }

    //    public XyOffset putRect(XyOffset outerTopLeft, XyDim outerDimensions, final Color c) {
    ////        System.out.println("putRect(" + outerTopLeft + " " + outerDimensions + " " +
    ////                           Utils.toString(c) + ")");
    //        putRect(outerTopLeft.x(), outerTopLeft.y(), outerDimensions.x(), outerDimensions.y(), c);
    //        return XyOffset.of(outerTopLeft.x() + outerDimensions.x(),
    //                           outerTopLeft.y() - outerDimensions.y());
    //    }

    //    /**
    //     Puts text on the page.
    //     @param x the x-value of the top-left corner.
    //     @param origY the logical-page Y-value of the top-left corner.
    //     @param cell the cell containing the styling and text to render.
    //     @return the bottom Y-value (logical-page) of the rendered cell.
    //     */
    //    public float putCell(final float x, float origY, final Cell cell) {
    //        return cell.processRows(x, origY, false, this);
    //    }

    private static final String ISO_8859_1 = "ISO_8859_1";
    private static final String UNICODE_BULLET = "\u2022";

    // PDFBox uses an encoding that the PDF spec calls WinAnsiEncoding.  The spec says this is
    // Windows Code Page 1252.
    // http://en.wikipedia.org/wiki/Windows-1252
    // It has a lot in common with ISO-8859-1, but it defines some additional characters such as
    // the Euro symbol.
    private static final Map<String, String> utf16ToWinAnsi;
    static {
        Map<String, String> tempMap = new HashMap<String, String>();

        try {
            // 129, 141, 143, 144, and 157 are undefined in WinAnsi.
            // I had mapped A0-FF to 160-255 without noticing that that maps each character to
            // itself, meaning that Unicode and WinAnsii are the same in that range.

            // Unicode characters with exact WinAnsi equivalents
            tempMap.put("\u0152", new String(new byte[] { 0, (byte) 140 }, ISO_8859_1)); // OE
            tempMap.put("\u0153", new String(new byte[] { 0, (byte) 156 }, ISO_8859_1)); // oe
            tempMap.put("\u0160", new String(new byte[] { 0, (byte) 138 }, ISO_8859_1)); // S Acron
            tempMap.put("\u0161", new String(new byte[] { 0, (byte) 154 }, ISO_8859_1)); // s acron
            tempMap.put("\u0178", new String(new byte[] { 0, (byte) 159 }, ISO_8859_1)); // Y Diaeresis
            tempMap.put("\u017D", new String(new byte[] { 0, (byte) 142 }, ISO_8859_1)); // Capital Z-caron
            tempMap.put("\u017E", new String(new byte[] { 0, (byte) 158 }, ISO_8859_1)); // Lower-case Z-caron
            tempMap.put("\u0192", new String(new byte[] { 0, (byte) 131 }, ISO_8859_1)); // F with a hook (like jf put together)
            tempMap.put("\u02C6", new String(new byte[] { 0, (byte) 136 }, ISO_8859_1)); // circumflex (up-caret)
            tempMap.put("\u02DC", new String(new byte[] { 0, (byte) 152 }, ISO_8859_1)); // Tilde

            // Cyrillic letters map to their closest Romanizations according to ISO 9:1995
            // http://en.wikipedia.org/wiki/ISO_9
            // http://en.wikipedia.org/wiki/A_(Cyrillic)

            // Cyrillic extensions
            // 0400  Cyrillic capital letter IE WITH GRAVE
            //  0415   0300 (left-accent)
            tempMap.put("\u0400", new String(new byte[] { 0, (byte) 200 }, ISO_8859_1));
            // 0401 ? Cyrillic capital letter IO
            //  0415   0308 (diuresis)
            tempMap.put("\u0401", new String(new byte[] { 0, (byte) 203 }, ISO_8859_1));
            // 0402  Cyrillic capital letter DJE
            tempMap.put("\u0402", new String(new byte[] { 0, (byte) 208 }, ISO_8859_1));
            // 0403  Cyrillic capital letter GJE
            //  0413   0301 (accent)
            // Ghe only maps to G-acute, which is not in our charset.
            // 0404  Cyrillic capital letter UKRAINIAN IE
            tempMap.put("\u0404", new String(new byte[] { 0, (byte) 202 }, ISO_8859_1));
            // 0405  Cyrillic capital letter DZE
            tempMap.put("\u0405", "S"); //
            // 0406  Cyrillic capital letter BYELORUSSIAN-
            // UKRAINIAN I
            //  0049 I  latin capital letter i
            //  0456   cyrillic small letter byelorussian-
            // ukrainian i
            //  04C0   cyrillic letter palochka
            tempMap.put("\u0406", new String(new byte[] { 0, (byte) 204 }, ISO_8859_1));
            // 0407  Cyrillic capital letter YI
            //  0406   0308 (diuresis)
            tempMap.put("\u0407", new String(new byte[] { 0, (byte) 207 }, ISO_8859_1));
            // 0408  Cyrillic capital letter JE
            // 0409  Cyrillic capital letter LJE
            // 040A  Cyrillic capital letter NJE
            // 040B  Cyrillic capital letter TSHE
            // 040C  Cyrillic capital letter KJE
            //  041A   0301 (accent)
            // 040D ? Cyrillic capital letter I WITH GRAVE
            //  0418   0300 (accent)
            // 040E  Cyrillic capital letter SHORT U
            //  0423   0306 (accent)
            // 040F ? Cyrillic capital letter DZHE

            // Basic Russian alphabet
            // See: http://www.unicode.org/charts/PDF/U0400.pdf
            // 0410 ? Cyrillic capital letter A => Latin A
            tempMap.put("\u0410", "A");
            // 0411  Cyrillic capital letter BE => Latin B
            //  0183   latin small letter b with topbar
            tempMap.put("\u0411", "B");
            // 0412  Cyrillic capital letter VE => Latin V
            tempMap.put("\u0412", "V");
            // 0413  Cyrillic capital letter GHE => Latin G
            tempMap.put("\u0413", "G");
            // 0414  Cyrillic capital letter DE => Latin D
            tempMap.put("\u0414", "D");
            // 0415  Cyrillic capital letter IE => Latin E
            tempMap.put("\u0415", "E");
            // 0416  Cyrillic capital letter ZHE => Z-caron
            tempMap.put("\u0416", new String(new byte[] { 0, (byte) 142 }, ISO_8859_1));
            // 0417  Cyrillic capital letter ZE => Latin Z
            tempMap.put("\u0417", "Z");
            // 0418  Cyrillic capital letter I => Latin I
            tempMap.put("\u0418", "I");
            // 0419  Cyrillic capital letter SHORT I => Latin J
            //  0418   0306 (a little mark)
            // The two-character form (reversed N plus the mark) is not supported.
            tempMap.put("\u0419", "J");
            // 041A  Cyrillic capital letter KA => Latin K
            tempMap.put("\u041A", "K");
            // 041B  Cyrillic capital letter EL => Latin L
            tempMap.put("\u041B", "L");
            // 041C  Cyrillic capital letter EM => Latin M
            tempMap.put("\u041C", "M");
            // 041D ? Cyrillic capital letter EN => Latin N
            tempMap.put("\u041D", "N");
            // 041E  Cyrillic capital letter O => Latin O
            tempMap.put("\u041E", "O");
            // 041F  Cyrillic capital letter PE => Latin P
            tempMap.put("\u041F", "P");
            // 0420  Cyrillic capital letter ER => Latin R
            tempMap.put("\u0420", "R");
            // 0421  Cyrillic capital letter ES => Latin S
            tempMap.put("\u0421", "S");
            // 0422  Cyrillic capital letter TE => Latin T
            tempMap.put("\u0422", "T");
            // 0423  Cyrillic capital letter U => Latin U
            //  0478   cyrillic capital letter uk
            //  04AF   cyrillic small letter straight u
            //  A64A   cyrillic capital letter monograph uk
            tempMap.put("\u0423", "U");
            tempMap.put("\u0478", "U"); // Is this right?
            tempMap.put("\u04AF", "U"); // Is this right?
            tempMap.put("\uA64A", "U"); // Is this right?
            // 0424  Cyrillic capital letter EF => Latin F
            tempMap.put("\u0424", "F");
            // 0425  Cyrillic capital letter HA => Latin H
            tempMap.put("\u0425", "H");
            // 0426  Cyrillic capital letter TSE => Latin C
            tempMap.put("\u0426", "C");
            // 0427  Cyrillic capital letter CHE => Mapping to "Ch" because there is no
            // C-caron - hope this is the best choice!  A also had this as "CH" but some make it
            // Tch as in Tchaikovsky, really didn't know what to do here.
            tempMap.put("\u0427", "Ch");
            // 0428  Cyrillic capital letter SHA => S-caron
            tempMap.put("\u0428", new String(new byte[] { 0, (byte) 138 }, ISO_8859_1));
            // 0429  Cyrillic capital letter SHCHA => Latin "Shch" because there is no
            // S-circumflex to map it to.  Should it go to S-caron like SHA?
            tempMap.put("\u0429", "Shch");
            // 042A  Cyrillic capital letter HARD SIGN => Latin double prime, or in this case,
            // right double-quote.
            tempMap.put("\u042A", new String(new byte[] { 0, (byte) 148 }, ISO_8859_1));
            // 042B  Cyrillic capital letter YERU => Latin Y
            tempMap.put("\u042B", "Y");
            // 042C  Cyrillic capital letter SOFT SIGN => Latin prime, or in this case,
            // the right-single-quote.
            tempMap.put("\u042C", new String(new byte[] { 0, (byte) 146 }, ISO_8859_1));
            // 042D  Cyrillic capital letter E => Latin E-grave
            tempMap.put("\u042D", new String(new byte[] { 0, (byte) 200 }, ISO_8859_1));
            // 042E  Cyrillic capital letter YU => Latin U-circumflex
            tempMap.put("\u042E", new String(new byte[] { 0, (byte) 219 }, ISO_8859_1));
            // 042F  Cyrillic capital letter YA => A-circumflex
            tempMap.put("\u042F", new String(new byte[] { 0, (byte) 194 }, ISO_8859_1));
            // 0430  Cyrillic small letter A
            tempMap.put("\u0430", "a");
            // 0431  Cyrillic small letter BE
            tempMap.put("\u0431", "b");
            // 0432  Cyrillic small letter VE
            tempMap.put("\u0432", "v");
            // 0433  Cyrillic small letter GHE
            tempMap.put("\u0433", "g");
            // 0434  Cyrillic small letter DE
            tempMap.put("\u0434", "d");
            // 0435  Cyrillic small letter IE
            tempMap.put("\u0435", "e");
            // 0436  Cyrillic small letter ZHE
            tempMap.put("\u0436", new String(new byte[] { 0, (byte) 158 }, ISO_8859_1));
            // 0437  Cyrillic small letter ZE
            tempMap.put("\u0437", "z");
            // 0438  Cyrillic small letter I
            tempMap.put("\u0438", "i");
            // 0439  Cyrillic small letter SHORT I
            //  0438   0306 (accent)
            tempMap.put("\u0439", "j");
            // 043A  Cyrillic small letter KA
            tempMap.put("\u043A", "k");
            // 043B  Cyrillic small letter EL
            tempMap.put("\u043B", "l");
            // 043C  Cyrillic small letter EM
            tempMap.put("\u043C", "m");
            // 043D  Cyrillic small letter EN
            tempMap.put("\u043D", "n");
            // 043E  Cyrillic small letter O
            tempMap.put("\u043E", "o");
            // 043F  Cyrillic small letter PE
            tempMap.put("\u043F", "p");
            // 0440  Cyrillic small letter ER
            tempMap.put("\u0440", "r");
            // 0441 ? Cyrillic small letter ES
            tempMap.put("\u0441", "s");
            // 0442  Cyrillic small letter TE
            tempMap.put("\u0442", "t");
            // 0443  Cyrillic small letter U
            tempMap.put("\u0443", "u");
            // 0444  Cyrillic small letter EF
            tempMap.put("\u0444", "f");
            // 0445  Cyrillic small letter HA
            tempMap.put("\u0445", "h");
            // 0446  Cyrillic small letter TSE
            tempMap.put("\u0446", "c");
            // 0447  Cyrillic small letter CHE - see notes on capital letter.
            tempMap.put("\u0447", "ch");
            // 0448  Cyrillic small letter SHA
            tempMap.put("\u0448", new String(new byte[] { 0, (byte) 154 }, ISO_8859_1));
            // 0449  Cyrillic small letter SHCHA
            tempMap.put("\u0449", "shch");
            // 044A  Cyrillic small letter HARD SIGN
            tempMap.put("\u044A", new String(new byte[] { 0, (byte) 148 }, ISO_8859_1));
            // 044B  Cyrillic small letter YERU
            //  A651   cyrillic small letter yeru with back yer
            tempMap.put("\u044B", "y");
            // 044C  Cyrillic small letter SOFT SIGN
            //  0185   latin small letter tone six
            //  A64F ?  cyrillic small letter neutral yer
            tempMap.put("\u044C", new String(new byte[] { 0, (byte) 146 }, ISO_8859_1));
            // 044D ? Cyrillic small letter E
            tempMap.put("\u044D", new String(new byte[] { 0, (byte) 232 }, ISO_8859_1));
            // 044E  Cyrillic small letter YU
            //  A655   cyrillic small letter reversed yu
            tempMap.put("\u044E", new String(new byte[] { 0, (byte) 251 }, ISO_8859_1));
            tempMap.put("\uA655", new String(new byte[] { 0, (byte) 251 }, ISO_8859_1)); // is this right?
            // 044F ? Cyrillic small letter YA => a-circumflex
            tempMap.put("\u044F", new String(new byte[] { 0, (byte) 226 }, ISO_8859_1));

            // Cyrillic extensions
            // 0450 ? CYRILLIC SMALL LETTER IE WITH GRAVE
            //  Macedonian
            //  0435   0300 $
            tempMap.put("\u0450", new String(new byte[] { 0, (byte) 232 }, ISO_8859_1)); // e-grave => e-grave
            // 0451  CYRILLIC SMALL LETTER IO
            //  Russian, ...
            //  0435   0308 $
            tempMap.put("\u0451", new String(new byte[] { 0, (byte) 235 }, ISO_8859_1));
            // 0452  CYRILLIC SMALL LETTER DJE
            //  Serbian
            //  0111   latin small letter d with stroke
            tempMap.put("\u0452", new String(new byte[] { 0, (byte) 240 }, ISO_8859_1));
            // 0453  CYRILLIC SMALL LETTER GJE - only maps to g-acute, which is not in our charset.
            //  Macedonian
            //  0433   0301 $?
            // 0454  CYRILLIC SMALL LETTER UKRAINIAN IE
            // = Old Cyrillic yest
            tempMap.put("\u0454", new String(new byte[] { 0, (byte) 234 }, ISO_8859_1));
            // 0455  CYRILLIC SMALL LETTER DZE
            //  Macedonian
            //  A643   cyrillic small letter dzelo
            tempMap.put("\u0455", "s");
            // 0456 CYRILLIC SMALL LETTER BYELORUSSIAN-
            // UKRAINIAN I
            // = Old Cyrillic i
            tempMap.put("\u0456", new String(new byte[] { 0, (byte) 236 }, ISO_8859_1));
            // 0457  CYRILLIC SMALL LETTER YI
            //  Ukrainian
            //  0456   0308 $
            tempMap.put("\u0457", new String(new byte[] { 0, (byte) 239 }, ISO_8859_1));
            // 0458  CYRILLIC SMALL LETTER JE
            //  Serbian, Azerbaijani, Altay
            // 0459  CYRILLIC SMALL LETTER LJE
            //  Serbian, Macedonian
            //  01C9 lj  latin small letter lj
            // 045A  CYRILLIC SMALL LETTER NJE
            //  Serbian, Macedonian
            //  01CC nj  latin small letter nj
            // 045B  CYRILLIC SMALL LETTER TSHE
            //  Serbian
            //  0107   latin small letter c with acute
            //  0127   latin small letter h with stroke
            //  040B   cyrillic capital letter tshe
            //  210F   planck constant over two pi
            //  A649   cyrillic small letter djerv
            // 045C  CYRILLIC SMALL LETTER KJE
            //  Macedonian
            //  043A   0301 $?
            // 045D ? CYRILLIC SMALL LETTER I WITH GRAVE
            //  Macedonian, Bulgarian
            //  0438   0300 $
            // 045E  CYRILLIC SMALL LETTER SHORT U
            //  Byelorussian, Uzbek
            //  0443   0306 $
            // 045F  CYRILLIC SMALL LETTER DZHE
            //  Serbian, Macedonian, Abkhasian
            //  01C6 d  latin small letter dz with caron

            // Extended Cyrillic
            // ...
            // 0490 ? CYRILLIC CAPITAL LETTER GHE WITH UPTURN => G ?
            tempMap.put("\u0490", "G"); // Ghe with upturn
            // 0491  CYRILLIC SMALL LETTER GHE WITH UPTURN
            //  Ukrainian
            tempMap.put("\u0491", "g");

            // Other commonly-used unicode characters with exact WinAnsi equivalents
            tempMap.put("\u2013", new String(new byte[] { 0, (byte) 150 }, ISO_8859_1)); // En-dash
            tempMap.put("\u2014", new String(new byte[] { 0, (byte) 151 }, ISO_8859_1)); // Em-dash
            tempMap.put("\u2018", new String(new byte[] { 0, (byte) 145 }, ISO_8859_1)); // Curved single open quote
            tempMap.put("\u2019", new String(new byte[] { 0, (byte) 146 }, ISO_8859_1)); // Curved single close-quote
            tempMap.put("\u201A", new String(new byte[] { 0, (byte) 130 }, ISO_8859_1)); // Low single curved-quote
            tempMap.put("\u201C", new String(new byte[] { 0, (byte) 147 }, ISO_8859_1)); // Curved double open quote
            tempMap.put("\u201D", new String(new byte[] { 0, (byte) 148 }, ISO_8859_1)); // Curved double close-quote
            tempMap.put("\u201E", new String(new byte[] { 0, (byte) 132 }, ISO_8859_1)); // Low right double quote.
            tempMap.put("\u2020", new String(new byte[] { 0, (byte) 134 }, ISO_8859_1)); // Dagger
            tempMap.put("\u2021", new String(new byte[] { 0, (byte) 135 }, ISO_8859_1)); // Double dagger
            tempMap.put(UNICODE_BULLET, new String(new byte[] { 0, (byte) 149 }, ISO_8859_1)); // Bullet - use this as replacement character.
            tempMap.put("\u2026", new String(new byte[] { 0, (byte) 133 }, ISO_8859_1)); // Ellipsis
            tempMap.put("\u2030", new String(new byte[] { 0, (byte) 137 }, ISO_8859_1)); // Permille
            tempMap.put("\u2039", new String(new byte[] { 0, (byte) 139 }, ISO_8859_1)); // Left angle-quote
            tempMap.put("\u203A", new String(new byte[] { 0, (byte) 155 }, ISO_8859_1)); // Right angle-quote
            tempMap.put("\u20ac", new String(new byte[] { 0, (byte) 128 }, ISO_8859_1)); // Euro symbol
            tempMap.put("\u2122", new String(new byte[] { 0, (byte) 153 }, ISO_8859_1)); // Trademark symbol

        } catch (UnsupportedEncodingException uee) {
            throw new IllegalStateException(
                    "Problem creating translation table due to Unsupported Encoding (coding error)", uee);
        }
        utf16ToWinAnsi = Collections.unmodifiableMap(tempMap);
    }

    // private static final Pattern whitespacePattern = Pattern.compile("\\p{Z}+");
    // What about \u00ba??
    // \u00a0-\u00a9 \u00ab-\u00b9 \u00bb-\u00bf \u00d7 \u00f7
    private static final Pattern nonAsciiPattern = Pattern.compile("[^\u0000-\u00ff]");

    /**
     <p>PDF files are limited to the 217 characters of Windows-1252 which the PDF spec calls WinAnsi
     and Java calls ISO-8859-1.  This method transliterates the standard Java UTF-16 character
     representations to their Windows-1252 equivalents where such translation is possible.  Any
     character (e.g. Kanji) which does not have an appropriate substitute in Windows-1252 will be
     mapped to the bullet character (a round dot).</p>
         
     <p>This transliteration covers the modern alphabets of the following languages:<br>
         
     Afrikaans (af),
     Albanian (sq), Basque (eu), Catalan (ca), Danish (da), Dutch (nl), English (en), Faroese (fo),
     Finnish (fi), French (fr), Galician (gl), German (de), Icelandic (is), Irish (ga), 
     Italian (it), Norwegian (no), Portuguese (pt), Scottish (gd), Spanish (es), Swedish (sv).</p>
         
     <p>Romanized substitutions are used for the Cyrillic characters of the modern Russian (ru)
     alphabet according to ISO 9:1995 with the following phonetic substitutions: 'Ch' for  and
     'Shch' for .</p>
         
     <p>The PdfLayoutMgr calls this method internally whenever it renders text (transliteration has
     to happen before line breaking), but is available externally in case you wish to use it
     directly with PDFBox.</p>
     @param in a string in the standard Java UTF-16 encoding
     @return a string in Windows-1252 (informally called ISO-8859-1 or WinAnsi)
     */
    public static String convertJavaStringToWinAnsi(String in) {
        //        ByteBuffer bb = StandardCharsets.UTF_16.encode(CharBuffer.wrap(in));
        //        // then decode those bytes as US-ASCII
        //        return StandardCharsets.ISO_8859_1.decode(bb).toString();
        // return java.nio.charset.StandardCharsets.ISO_8859_1.encode(in);

        Matcher m = nonAsciiPattern.matcher(in);

        StringBuilder sB = new StringBuilder();
        int idx = 0;
        while (m.find()) {

            int start = m.start(); // first character of match.
            if (idx < start) {
                // Append everything from the last match up to this one.
                sB.append(in.subSequence(idx, start));
            }

            String s = utf16ToWinAnsi.get(m.group());

            // "In WinAnsiEncoding, all unused codes greater than 40 map to the bullet character."
            // source: PDF spec, Annex D.3 PDFDocEncoding Character Set p. 656 footnote about
            // WinAnsiEncoding.
            //
            // I think the bullet is the closest thing to a "replacement character" in the
            // WinAnsi character set, so that's what I'll use it for.  It looks tons better than
            // nullnullnull...
            if (s == null) {
                s = utf16ToWinAnsi.get(UNICODE_BULLET);
            }
            sB.append(s);

            idx = m.end(); // m.end() is exclusive
        }
        if (idx < in.length()) {
            sB.append(in.subSequence(idx, in.length()));
        }
        return sB.toString();
    }

}