Java tutorial
package model.util.pdf; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.awt.HeadlessException; import java.awt.Toolkit; import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; import javax.imageio.ImageIO; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageContentStream; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; import org.apache.pdfbox.rendering.ImageType; import org.apache.pdfbox.rendering.PDFRenderer; import org.apache.pdfbox.tools.imageio.ImageIOUtil; /** * * This file and its methods are based on the source code of the file * PDFToImage of the apache pdfbox document, version 2.0.0. * The project's download page can be found at * https://pdfbox.apache.org/download.cgi#scm. * The original file is edited by Julius Huelsmann. * * * Convert a PDF document to an image. * * @author Ben Litchfield * @author Julius Huelsmann */ public final class PDFUtils { /** * Identifiers that serve as parameters for the method call. */ private static final String PASSWORD = "-password", START_PAGE = "-startPage", END_PAGE = "-endPage", PAGE = "-page", IMAGE_TYPE = "-imageType", FORMAT = "-format", OUTPUT_PREFIX = "-outputPrefix", PREFIX = "-prefix", COLOR = "-color", RESOLUTION = "-resolution", DPI = "-dpi", CROPBOX = "-cropbox", TIME = "-time"; /** * Private utility class constructor */ private PDFUtils() { } /** * Method for directly extracting image information from inside * a specified PDF document. * @param _path the location of the PDF document on disk. * @return the extracted BufferedImage. */ public static BufferedImage pdf2image(final String _path) { try { return pdf2image(new String[] { _path }); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); return null; } } /** * Infamous main method. * Adapted by Julius Huelsmann. * * @author Ben Litchfield * * @param args Command line arguments, should be one and a reference to a file. * * @throws IOException If there is an error parsing the document. */ public static BufferedImage pdf2image(final String[] _parameters) throws IOException { // suppress the Dock icon on OS X if called from outside // the paint - project. System.setProperty("apple.awt.UIElement", "true"); String password = ""; String pdfFile = null; String outputPrefix = null; String imageFormat = "jpg"; int startPage = 1; int endPage = Integer.MAX_VALUE; String color = "rgb"; int dpi; float cropBoxLowerLeftX = 0; float cropBoxLowerLeftY = 0; float cropBoxUpperRightX = 0; float cropBoxUpperRightY = 0; boolean showTime = false; try { dpi = Toolkit.getDefaultToolkit().getScreenResolution(); } catch (HeadlessException e) { dpi = 96; } for (int i = 0; i < _parameters.length; i++) { if (_parameters[i].equals(PASSWORD)) { i++; if (i >= _parameters.length) { usage(); } password = _parameters[i]; } else if (_parameters[i].equals(START_PAGE)) { i++; if (i >= _parameters.length) { usage(); } startPage = Integer.parseInt(_parameters[i]); } else if (_parameters[i].equals(END_PAGE)) { i++; if (i >= _parameters.length) { usage(); } endPage = Integer.parseInt(_parameters[i]); } else if (_parameters[i].equals(PAGE)) { i++; if (i >= _parameters.length) { usage(); } startPage = Integer.parseInt(_parameters[i]); endPage = Integer.parseInt(_parameters[i]); } else if (_parameters[i].equals(IMAGE_TYPE) || _parameters[i].equals(FORMAT)) { i++; imageFormat = _parameters[i]; } else if (_parameters[i].equals(OUTPUT_PREFIX) || _parameters[i].equals(PREFIX)) { i++; outputPrefix = _parameters[i]; } else if (_parameters[i].equals(COLOR)) { i++; color = _parameters[i]; } else if (_parameters[i].equals(RESOLUTION) || _parameters[i].equals(DPI)) { i++; dpi = Integer.parseInt(_parameters[i]); } else if (_parameters[i].equals(CROPBOX)) { i++; cropBoxLowerLeftX = Float.valueOf(_parameters[i]); i++; cropBoxLowerLeftY = Float.valueOf(_parameters[i]); i++; cropBoxUpperRightX = Float.valueOf(_parameters[i]); i++; cropBoxUpperRightY = Float.valueOf(_parameters[i]); } else if (_parameters[i].equals(TIME)) { showTime = true; } else { if (pdfFile == null) { pdfFile = _parameters[i]; } } } if (pdfFile == null) { usage(); } else { if (outputPrefix == null) { outputPrefix = pdfFile.substring(0, pdfFile.lastIndexOf('.')); } PDDocument document = null; try { document = PDDocument.load(new File(pdfFile), password); ImageType imageType = null; if ("bilevel".equalsIgnoreCase(color)) { imageType = ImageType.BINARY; } else if ("gray".equalsIgnoreCase(color)) { imageType = ImageType.GRAY; } else if ("rgb".equalsIgnoreCase(color)) { imageType = ImageType.RGB; } else if ("rgba".equalsIgnoreCase(color)) { imageType = ImageType.ARGB; } if (imageType == null) { System.err.println("Error: Invalid color."); System.exit(2); } //if a CropBox has been specified, update the CropBox: //changeCropBoxes(PDDocument document,float a, float b, float c,float d) if (cropBoxLowerLeftX != 0 || cropBoxLowerLeftY != 0 || cropBoxUpperRightX != 0 || cropBoxUpperRightY != 0) { changeCropBox(document, cropBoxLowerLeftX, cropBoxLowerLeftY, cropBoxUpperRightX, cropBoxUpperRightY); } long startTime = System.nanoTime(); // render the pages boolean success = true; endPage = Math.min(endPage, document.getNumberOfPages()); PDFRenderer renderer = new PDFRenderer(document); for (int i = startPage - 1; i < endPage;) { BufferedImage image = renderer.renderImageWithDPI(i, dpi, imageType); String fileName = outputPrefix + (i + 1) + "." + imageFormat; success &= ImageIOUtil.writeImage(image, fileName, dpi); return image; } // performance stats long endTime = System.nanoTime(); long duration = endTime - startTime; int count = 1 + endPage - startPage; if (showTime) { System.err.printf("Rendered %d page%s in %dms\n", count, count == 1 ? "" : "s", duration / 1000000); } if (!success) { System.err.println("Error: no writer found for image format '" + imageFormat + "'"); System.exit(1); } } finally { if (document != null) { document.close(); } } return null; } return null; } public static BufferedImage pdf2image(PDDocument _document, int _pageNumber) { int dpi = 0; try { dpi = Toolkit.getDefaultToolkit().getScreenResolution(); dpi = 130; } catch (HeadlessException e) { dpi = 96; dpi = 130; } PDFRenderer renderer = new PDFRenderer(_document); BufferedImage image; try { image = renderer.renderImageWithDPI(_pageNumber, dpi, ImageType.RGB); return image; } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); return null; } } /** * This will print the usage requirements and exit. */ private static void usage() { System.err.println("Usage: java -jar pdfbox-app-x.y.z.jar PDFToImage [OPTIONS] <PDF file>\n" + " -password <password> Password to decrypt document\n" + " -format <string> Image format: " + getImageFormats() + "\n" + " -prefix <string> Filename prefix for image files\n" + " -page <number> The only page to extract (1-based)\n" + " -startPage <number> The first page to start extraction (1-based)\n" + " -endPage <number> The last page to extract(inclusive)\n" + " -color <string> The color depth (valid: bilevel, indexed, gray, rgb, rgba)\n" + " -dpi <number> The DPI of the output image\n" + " -cropbox <number> <number> <number> <number> The page area to export\n" + " -time Prints timing information to stdout\n" + " <PDF file> The PDF document to use\n"); System.exit(1); } private static String getImageFormats() { StringBuilder retval = new StringBuilder(); String[] formats = ImageIO.getReaderFormatNames(); for (int i = 0; i < formats.length; i++) { if (formats[i].equalsIgnoreCase(formats[i])) { retval.append(formats[i]); if (i + 1 < formats.length) { retval.append(", "); } } } return retval.toString(); } private static void changeCropBox(PDDocument document, float a, float b, float c, float d) { for (PDPage page : document.getPages()) { PDRectangle rectangle = new PDRectangle(); rectangle.setLowerLeftX(a); rectangle.setLowerLeftY(b); rectangle.setUpperRightX(c); rectangle.setUpperRightY(d); page.setCropBox(rectangle); } } /** * Test: Create pdf from image and pdf file. * * @param _inputFile pdf input * @param _imagePath image input * @param _outputFile pdf output * * @throws IOException occurs if reading the data fails. */ public void saveAsPdf(String _imagePath, String _outputFile) throws IOException { PDDocument doc = null; try { // create new document and insert empty page to the document. doc = new PDDocument(); PDPage page = new PDPage(); doc.addPage(page); // createFromFile is the easiest way with an image file // if you already have the image in a BufferedImage, // call LosslessFactory.createFromImage() instead PDImageXObject pdImage = PDImageXObject.createFromFile(_imagePath, doc); PDPageContentStream contentStream = new PDPageContentStream(doc, page, true, true); // contentStream.drawImage(ximage, 20, 20 ); // better method inspired by http://stackoverflow.com/a/22318681/535646 // reduce this value if the image is too large float scale = 1f; contentStream.drawImage(pdImage, 20, 20, pdImage.getWidth() * scale, pdImage.getHeight() * scale); contentStream.close(); doc.save(_outputFile); } finally { if (doc != null) { doc.close(); } } } }