com.ecmkit.service.convert.impl.PDFToImage.java Source code

Java tutorial

Introduction

Here is the source code for com.ecmkit.service.convert.impl.PDFToImage.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.ecmkit.service.convert.impl;

import java.awt.HeadlessException;
import java.awt.Toolkit;
import java.awt.image.BufferedImage;
import java.io.File;
import java.util.List;

import javax.imageio.ImageIO;

import org.apache.pdfbox.exceptions.InvalidPasswordException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDTrueTypeFont;
import org.apache.pdfbox.util.PDFImageWriter;

/**
 * Convert a PDF document to an image.
 *
 * @author <a href="ben@benlitchfield.com">Ben Litchfield</a>
 * @version $Revision: 1.6 $
 */
public class PDFToImage {

    private static final String PASSWORD = "-password";
    private static final String START_PAGE = "-startPage";
    private static final String END_PAGE = "-endPage";
    private static final String IMAGE_FORMAT = "-imageType";
    private static final String OUTPUT_PREFIX = "-outputPrefix";
    private static final String COLOR = "-color";
    private static final String RESOLUTION = "-resolution";
    private static final String CROPBOX = "-cropbox";
    private static final String NONSEQ = "-nonSeq";

    /**
     * private constructor.
    */
    private PDFToImage() {
        //static class
    }

    /**
     * Infamous main method.
     *
     * @param args Command line arguments, should be one and a reference to a file.
     *
     * @throws Exception If there is an error parsing the document.
     */
    public static void main(String[] args) throws Exception {
        boolean useNonSeqParser = false;
        String password = "";
        String pdfFile = null;
        String outputPrefix = null;
        String imageFormat = "jpg";
        int startPage = 1;
        int endPage = Integer.MAX_VALUE;
        String color = "rgb";
        int resolution;
        float cropBoxLowerLeftX = 0;
        float cropBoxLowerLeftY = 0;
        float cropBoxUpperRightX = 0;
        float cropBoxUpperRightY = 0;
        try {
            resolution = Toolkit.getDefaultToolkit().getScreenResolution();
        } catch (HeadlessException e) {
            resolution = 96;
        }
        for (int i = 0; i < args.length; i++) {
            if (args[i].equals(PASSWORD)) {
                i++;
                if (i >= args.length) {
                    usage();
                }
                password = args[i];
            } else if (args[i].equals(START_PAGE)) {
                i++;
                if (i >= args.length) {
                    usage();
                }
                startPage = Integer.parseInt(args[i]);
            } else if (args[i].equals(END_PAGE)) {
                i++;
                if (i >= args.length) {
                    usage();
                }
                endPage = Integer.parseInt(args[i]);
            } else if (args[i].equals(IMAGE_FORMAT)) {
                i++;
                imageFormat = args[i];
            } else if (args[i].equals(OUTPUT_PREFIX)) {
                i++;
                outputPrefix = args[i];
            } else if (args[i].equals(COLOR)) {
                i++;
                color = args[i];
            } else if (args[i].equals(RESOLUTION)) {
                i++;
                resolution = Integer.parseInt(args[i]);
            } else if (args[i].equals(CROPBOX)) {
                i++;
                cropBoxLowerLeftX = Float.valueOf(args[i]).floatValue();
                i++;
                cropBoxLowerLeftY = Float.valueOf(args[i]).floatValue();
                i++;
                cropBoxUpperRightX = Float.valueOf(args[i]).floatValue();
                i++;
                cropBoxUpperRightY = Float.valueOf(args[i]).floatValue();
            } else if (args[i].equals(NONSEQ)) {
                useNonSeqParser = true;
            } else {
                if (pdfFile == null) {
                    pdfFile = args[i];
                }
            }
        }
        if (pdfFile == null) {
            usage();
        } else {
            if (outputPrefix == null) {
                outputPrefix = pdfFile.substring(0, pdfFile.lastIndexOf('.'));
            }

            PDDocument document = null;
            try {
                if (useNonSeqParser) {
                    document = PDDocument.loadNonSeq(new File(pdfFile), null, password);
                } else {
                    document = PDDocument.load(pdfFile);
                    if (document.isEncrypted()) {
                        try {
                            document.decrypt(password);
                        } catch (InvalidPasswordException e) {
                            if (args.length == 4)//they supplied the wrong password
                            {
                                System.err.println("Error: The supplied password is incorrect.");
                                System.exit(2);
                            } else {
                                //they didn't supply a password and the default of "" was wrong.
                                System.err.println("Error: The document is encrypted.");
                                usage();
                            }
                        }
                    }
                }
                //PDFont font = PDTrueTypeFont.loadTTF(document, new File("/usr/share/fonts/truetype/simsun.ttc"));
                int imageType = 24;
                if ("bilevel".equalsIgnoreCase(color)) {
                    imageType = BufferedImage.TYPE_BYTE_BINARY;
                } else if ("indexed".equalsIgnoreCase(color)) {
                    imageType = BufferedImage.TYPE_BYTE_INDEXED;
                } else if ("gray".equalsIgnoreCase(color)) {
                    imageType = BufferedImage.TYPE_BYTE_GRAY;
                } else if ("rgb".equalsIgnoreCase(color)) {
                    imageType = BufferedImage.TYPE_INT_RGB;
                } else if ("rgba".equalsIgnoreCase(color)) {
                    imageType = BufferedImage.TYPE_INT_ARGB;
                } else {
                    System.err.println("Error: the number of bits per pixel must be 1, 8 or 24.");
                    System.exit(2);
                }

                //if a CropBox has been specified, update the CropBox:
                //changeCropBoxes(PDDocument document,float a, float b, float c,float d)
                if (cropBoxLowerLeftX != 0 || cropBoxLowerLeftY != 0 || cropBoxUpperRightX != 0
                        || cropBoxUpperRightY != 0) {
                    changeCropBoxes(document, cropBoxLowerLeftX, cropBoxLowerLeftY, cropBoxUpperRightX,
                            cropBoxUpperRightY);
                }

                //Make the call
                PDFImageWriter imageWriter = new PDFImageWriter();
                boolean success = imageWriter.writeImage(document, imageFormat, password, startPage, endPage,
                        outputPrefix, imageType, resolution);
                if (!success) {
                    System.err.println("Error: no writer found for image format '" + imageFormat + "'");
                    System.exit(1);
                }
            } catch (Exception e) {
                System.err.println(e);
            } finally {
                if (document != null) {
                    document.close();
                }
            }
        }
    }

    /**
     * This will print the usage requirements and exit.
     */
    private static void usage() {
        System.err.println("Usage: java -jar pdfbox-app-x.y.z.jar PDFToImage [OPTIONS] <PDF file>\n"
                + "  -password  <password>          Password to decrypt document\n"
                + "  -imageType <image type>        (" + getImageFormats() + ")\n"
                + "  -outputPrefix <output prefix>  Filename prefix for image files\n"
                + "  -startPage <number>            The first page to start extraction(1 based)\n"
                + "  -endPage <number>              The last page to extract(inclusive)\n"
                + "  -color <string>                The color depth (valid: bilevel, indexed, gray, rgb, rgba)\n"
                + "  -resolution <number>           The bitmap resolution in dpi\n"
                + "  -cropbox <number> <number> <number> <number> The page area to export\n"
                + "  -nonSeq                        Enables the new non-sequential parser\n"
                + "  <PDF file>                     The PDF document to use\n");
        System.exit(1);
    }

    private static String getImageFormats() {
        StringBuffer retval = new StringBuffer();
        String[] formats = ImageIO.getReaderFormatNames();
        for (int i = 0; i < formats.length; i++) {
            retval.append(formats[i]);
            if (i + 1 < formats.length) {
                retval.append(",");
            }
        }
        return retval.toString();
    }

    private static void changeCropBoxes(PDDocument document, float a, float b, float c, float d) {
        List pages = document.getDocumentCatalog().getAllPages();
        for (int i = 0; i < pages.size(); i++) {
            System.out.println("resizing page");
            PDPage page = (PDPage) pages.get(i);
            PDRectangle rectangle = new PDRectangle();
            rectangle.setLowerLeftX(a);
            rectangle.setLowerLeftY(b);
            rectangle.setUpperRightX(c);
            rectangle.setUpperRightY(d);
            page.setMediaBox(rectangle);
            page.setCropBox(rectangle);

        }
    }

}