Java tutorial
/* This file is part of PDFPicMangler, an image resampling tool for pdf documents. * Copyright (C) 2017 Ingo Kresse * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package pdfpicmangler; import java.awt.Graphics; import java.awt.Image; import java.awt.image.BufferedImage; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.util.List; import java.util.Map; import org.apache.pdfbox.exceptions.COSVisitorException; import org.apache.pdfbox.pdfparser.PDFParser; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.graphics.xobject.PDJpeg; import org.apache.pdfbox.pdmodel.graphics.xobject.PDPixelMap; import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObject; import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectForm; import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage; public class PDFPicMangler { private Options opts = new Options(); private Map<String, Float> resolutions; private int currentPage = -1; private PDDocument process(PDDocument doc, Map<String, Float> resolutions) throws IOException { this.resolutions = resolutions; List<?> pages = doc.getDocumentCatalog().getAllPages(); for (int i = 0; i < pages.size(); i++) { if (!(pages.get(i) instanceof PDPage)) continue; PDPage page = (PDPage) pages.get(i); currentPage = i + 1; scanResources(page.getResources(), doc); } return doc; } private void scanResources(final PDResources rList, final PDDocument doc) throws FileNotFoundException, IOException { if (rList == null) { return; } Map<String, PDXObject> xObs = rList.getXObjects(); for (String imgName : xObs.keySet()) { final PDXObject xObj = xObs.get(imgName); if (xObj instanceof PDXObjectForm) scanResources(((PDXObjectForm) xObj).getResources(), doc); if (!(xObj instanceof PDXObjectImage)) continue; PDXObjectImage img = (PDXObjectImage) xObj; String imageName = currentPage + imgName; // got an image! if (opts.doExtract) { img.write2file(imageName); } if (opts.doStatistics) { System.out.println(imageInfo(img, imageName)); } if (opts.doImport && opts.importNames.containsKey(imageName)) { String path = opts.importPath; String fileName = opts.importNames.get(imageName); String fileNameLower = fileName.toLowerCase(); String fileWithPath = path + "/" + fileName; if (fileNameLower.endsWith(".png")) { System.out.println("importing " + fileWithPath + " as " + imageName + " [PNG]"); FileInputStream is = new FileInputStream(fileWithPath); img = new PDPng(doc, is); xObs.put(imgName, img); is.close(); } if (fileNameLower.endsWith(".jpg") || fileNameLower.endsWith(".jpeg")) { System.out.println("importing " + fileWithPath + " as " + imageName + " [JPG]"); FileInputStream is = new FileInputStream(path + "/" + fileName); img = new PDJpeg(doc, is); xObs.put(imgName, img); is.close(); } } if (opts.doShrink) { System.out.println("Compressing image: " + imageName + " ..."); img = imageShrink(doc, resolutions.get(imageName), img); xObs.put(imgName, img); } } rList.setXObjects(xObs); } private PDXObjectImage imageShrink(final PDDocument doc, float resolution, PDXObjectImage img) throws IOException { if (resolution > opts.resolutionThreshold) { int width = (int) (img.getWidth() * opts.resolution / resolution); int height = (int) (img.getHeight() * opts.resolution / resolution); System.out.println( " - resizing: " + img.getWidth() + "x" + img.getHeight() + " -> " + width + "x" + height); BufferedImage image = img.getRGBImage(); BufferedImage imageSmall = resizedImage(width, height, image); String suffix = img.getSuffix(); System.out.println(" - writing back as " + suffix); img.clear(); int uncompressed = width * height * 3; try { if ("jpg".equals(suffix)) { PDJpeg jpg = makeJpeg(imageSmall, doc); int compressed = jpg.getPDStream().getLength(); System.out.println(" - jpg: ratio: " + (float) compressed / uncompressed + "% uncompressed: " + uncompressed + " compressed: " + compressed); jpg.clear(); return jpg; } if ("png".equals(suffix)) { PDPixelMap png = makePng(imageSmall, doc); int compressed = png.getPDStream().getLength(); System.out.println(" - png: ratio: " + (float) compressed / uncompressed + "% uncompressed: " + uncompressed + " compressed: " + compressed); png.clear(); return png; } } catch (IOException e) { System.out.println(e.getMessage()); e.printStackTrace(); } } return img; } private String imageInfo(PDXObjectImage img, String imageName) { StringBuilder info = new StringBuilder(); info.append(img.getPDStream().getLength()); info.append(" "); info.append((int) (resolutions.get(imageName).floatValue())); info.append(" "); info.append(img.getWidth()); info.append("x"); info.append(img.getHeight()); info.append(" "); info.append(img.getSuffix()); info.append(" "); int numBytes = img.getPDStream().getLength(); int pixels = img.getWidth() * img.getHeight(); info.append((800 * numBytes / pixels) / 100.0); info.append(" "); info.append(imageName); return info.toString(); } private BufferedImage resizedImage(int width, int height, Image image) { // get rescaled image Image imageSmall = image.getScaledInstance(width, height, Image.SCALE_AREA_AVERAGING); // render rescaled image into buffer BufferedImage imageSmallBuffer = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB); Graphics g = imageSmallBuffer.getGraphics(); g.drawImage(imageSmall, 0, 0, null); g.dispose(); return imageSmallBuffer; } private PDPixelMap makePng(BufferedImage image, final PDDocument doc) throws IOException { // TODO use better compression lib here return new PDPixelMap(doc, image); } private PDJpeg makeJpeg(BufferedImage image, final PDDocument doc) throws IOException { PDJpeg jpg = new PDJpeg(doc, image, (float) opts.quality); return jpg; } private static PDDocument openDocument(String fileName) throws IOException { final FileInputStream fis = new FileInputStream(fileName); final PDFParser parser = new PDFParser(fis); parser.parse(); return parser.getPDDocument(); } public static void main(String[] args) { if (args.length == 0) { Options.usage(); return; } PDFPicMangler mangler = new PDFPicMangler(); mangler.opts.read(args); System.out.println("opening file " + mangler.opts.pdfFileName); try { PDDocument doc = openDocument(mangler.opts.pdfFileName); ResolutionAnalyzer occurences = new ResolutionAnalyzer(); Map<String, Float> cache = occurences.analyze(doc); System.out.println("--------------------------------------------"); doc = mangler.process(doc, cache); System.out.println("--------------------------------------------"); if (mangler.opts.doShrink || mangler.opts.doImport) { System.out.println("writing to " + mangler.opts.outputFileName); doc.save(mangler.opts.outputFileName); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (COSVisitorException e) { e.printStackTrace(); } } }