pdfmt.pdf2image.java Source code

Java tutorial

Introduction

Here is the source code for pdfmt.pdf2image.java

Source

/*  This file is part of PDFCleaner.
 *  The class displays the help screen.
 *
 *  Copyright (C) 2012  Sourabh Gandhe
 *                      
 *   
 *  PDFCleaner is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  PDFCleaner is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with PDFCleaner. Check for file named COPYING.
 *  If not, see <http://www.gnu.org/licenses/>.
*/

package pdfmt;

import java.awt.Color;
import java.awt.Graphics;
import java.awt.Image;
import java.awt.image.BufferedImage;
import java.awt.image.PixelGrabber;
import java.awt.image.RenderedImage;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;

import java.util.Iterator;

import javax.imageio.IIOImage;
import javax.imageio.ImageIO;
import javax.imageio.ImageTypeSpecifier;
import javax.imageio.ImageWriter;
import javax.imageio.metadata.IIOInvalidTreeException;
import javax.imageio.metadata.IIOMetadata;
import javax.imageio.stream.FileImageOutputStream;
import javax.media.jai.PlanarImage;
import javax.swing.JFileChooser;
import javax.swing.JOptionPane;

import org.icepdf.core.exceptions.PDFException;
import org.icepdf.core.exceptions.PDFSecurityException;
import org.icepdf.core.pobjects.Page;
import org.icepdf.core.util.GraphicsRenderingHints;

import com.itextpdf.text.Document;
import com.itextpdf.text.pdf.PdfCopy;
import com.itextpdf.text.pdf.PdfImportedPage;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.RandomAccessFileOrArray;
import com.sun.media.imageio.plugins.tiff.BaselineTIFFTagSet;
import com.sun.media.imageio.plugins.tiff.TIFFDirectory;
import com.sun.media.imageio.plugins.tiff.TIFFField;
import com.sun.media.imageio.plugins.tiff.TIFFTag;
import com.sun.media.jai.codec.ByteArraySeekableStream;
import com.sun.media.jai.codec.ImageCodec;
import com.sun.media.jai.codec.ImageDecoder;
import com.sun.media.jai.codec.SeekableStream;

public class pdf2image {
    public static final int FAILURE = 2;
    public static final int BLANK = 0;
    public static final int NOTBLANK = 1;
    // value where we can consider that this is a blank image
    // can be much higher depending of the TIF source 
    // (ex. scanner or fax)
    public static final int BLANK_THRESHOLD = 1000;

    private static final char[] INCH_RESOLUTION_UNIT = new char[] { 2 };
    private static final long[][] X_DPI_RESOLUTION = new long[][] { { 150, 1 } };
    private static final long[][] Y_DPI_RESOLUTION = new long[][] { { 150, 1 } };
    private static final char[] BITS_PER_SAMPLE = new char[] { 1 };
    private static final char[] COMPRESSION = new char[] { BaselineTIFFTagSet.COMPRESSION_LZW };
    private static final int HEIGHT = 1650;

    /** 
     * ::Constructor() 
     */
    pdf2image() {

    }

    /** 
     * Convert a PDF document to a TIF file 
     */
    public boolean convertPDF(String pdf, String destPdf) {
        try {
            convert(pdf, "C://test//3.tiff", destPdf);
            return true;
        } catch (IOException e) {
            return false;
        }
    }

    /** 
     * Convert a PDF document to a TIF file 
     */
    protected static void convert(String pdf, String tif, String destPdf) throws IOException {

        org.icepdf.core.pobjects.Document pdffile = new org.icepdf.core.pobjects.Document();

        try {
            pdffile.setFile(pdf);

        } catch (PDFException ex) {
            //  System.out.println("Error parsing PDF document " + ex); 
        } catch (PDFSecurityException ex) {
            //  System.out.println("Error encryption not supported " + ex); 
        } catch (FileNotFoundException ex) {
            // System.out.println("Error file not found " + ex); 
        } catch (IOException ex) {
            //  System.out.println("Error handling PDF document " + ex); 
        }

        int numPgs = pdffile.getNumberOfPages();

        try {
            // step 1: create new reader
            PdfReader r = new PdfReader(pdf);
            //  System.out.println("File Lenght:"  + r.getFileLength());
            RandomAccessFileOrArray raf = new RandomAccessFileOrArray(pdf);
            // System.out.println("Raf:" + raf);
            Document document = new Document(r.getPageSizeWithRotation(1));
            //           // step 2: create a writer that listens to the document
            PdfCopy writer = new PdfCopy(document, new FileOutputStream(destPdf));
            //           
            //           // step 3: we open the document
            document.open();
            //           // step 4: we add content
            PdfImportedPage page = null;
            //        
            //loop through each page and if the bs is larger than 20 than we know it is not blank.
            //if it is less than 20 than we don't include that blank page.

            float scale = 2.084f;
            float rotation = 0f;

            BufferedImage image[] = new BufferedImage[numPgs];

            for (int i = 0; i < numPgs; i++) {

                byte bContent[] = r.getPageContent(i + 1, raf);
                // System.out.println(bContent.toString());

                ByteArrayOutputStream bs = new ByteArrayOutputStream();
                //write the content to an output stream
                bs.write(bContent);

                //System.out.println("page content length of page " + i+1 + " = "
                //      + bs.size());

                /* 
                 * Generate the image: 
                 * Notes: 1275x1650 = 8.5 x 11 @ 150dpi ??? 
                 */
                image[i] = (BufferedImage) pdffile.getPageImage(i, GraphicsRenderingHints.SCREEN,
                        Page.BOUNDARY_CROPBOX, rotation, scale);
                Iterator writers = ImageIO.getImageWritersByFormatName("TIFF");
                if (writers == null || !writers.hasNext()) {
                    throw new RuntimeException("No writers for available.");

                }
                ImageWriter myWriter = (ImageWriter) writers.next();
                myWriter.setOutput(new FileImageOutputStream(new File(tif)));
                myWriter.prepareWriteSequence(null);
                ImageTypeSpecifier imageType = ImageTypeSpecifier.createFromRenderedImage(image[i]);
                IIOMetadata imageMetadata = myWriter.getDefaultImageMetadata(imageType, null);
                imageMetadata = createImageMetadata(imageMetadata);
                myWriter.writeToSequence(new IIOImage(image[i], null, imageMetadata), null);

                myWriter.dispose();
                image[i] = null;
                myWriter = null;

                FileInputStream in = new FileInputStream(tif);
                FileChannel channel = in.getChannel();
                ByteBuffer buffer = ByteBuffer.allocate((int) channel.size());
                channel.read(buffer);
                Image imageBlank;

                imageBlank = load(buffer.array());

                BufferedImage bufferedImage = imageToBufferedImage(imageBlank);
                boolean isBlank;
                isBlank = isBlank(bufferedImage);
                // System.out.println("isblank "+ isBlank);

                if (isBlank == false) {

                    page = writer.getImportedPage(r, i + 1);
                    writer.addPage(page);

                }
                bs.close();

                System.gc();
            }

            document.close();
            writer.close();
            raf.close();
            r.close();

        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

    }

    public static Image load(byte[] data) throws Exception {
        Image image = null;
        SeekableStream stream = new ByteArraySeekableStream(data);
        String[] names = ImageCodec.getDecoderNames(stream);
        ImageDecoder dec = ImageCodec.createImageDecoder(names[0], stream, null);
        RenderedImage im = dec.decodeAsRenderedImage();
        image = PlanarImage.wrapRenderedImage(im).getAsBufferedImage();
        // scale-down the image , maximum width : 500 px
        // to preserve memory
        Image imageScaled = image.getScaledInstance(500, -1, Image.SCALE_SMOOTH);
        return imageScaled;
    }

    public static BufferedImage imageToBufferedImage(Image im) {
        BufferedImage bi = new BufferedImage(im.getWidth(null), im.getHeight(null), BufferedImage.TYPE_INT_RGB);
        Graphics bg = bi.getGraphics();
        bg.drawImage(im, 0, 0, null);
        bg.dispose();
        bg = null;
        return bi;
    }

    public static boolean isBlank(BufferedImage bi) throws Exception {
        long count = 0;
        long total = 0;
        double totalVariance = 0;
        double stdDev = 0;
        int height = bi.getHeight();
        int width = bi.getWidth();

        int[] pixels = new int[width * height];
        PixelGrabber pg = new PixelGrabber(bi, 0, 0, width, height, pixels, 0, width);
        pg.grabPixels();
        for (int j = 0; j < height; j++) {
            for (int i = 0; i < width; i++) {
                count++;
                int pixel = pixels[j * width + i];
                int red = (pixel >> 16) & 0xff;
                int green = (pixel >> 8) & 0xff;
                int blue = (pixel) & 0xff;
                int pixelValue = new Color(red, green, blue, 0).getRGB();
                total += pixelValue;
                double avg = total / count;
                totalVariance += Math.pow(pixelValue - avg, 2);
                stdDev = Math.sqrt(totalVariance / count);
            }
        }
        return (stdDev < BLANK_THRESHOLD);
    }

    /** 
     * Save tiff 
     */
    //   @SuppressWarnings("unchecked") 
    //   private static void save(BufferedImage[] b, String tif) throws IOException { 
    //
    //      //Get a TIFF writer and set its output. 
    //      Iterator writers = ImageIO.getImageWritersByFormatName("TIFF"); 
    //
    //      if (writers == null || !writers.hasNext()) { 
    //         throw new RuntimeException("No writers for available."); 
    //      } 
    //
    //      ImageWriter writer = (ImageWriter) writers.next(); 
    //      writer.setOutput(new FileImageOutputStream(new File(tif))); 
    //      writer.prepareWriteSequence(null); 
    //
    //      for (int i = 0; i < b.length; i++) { 
    //         ImageTypeSpecifier imageType = ImageTypeSpecifier.createFromRenderedImage(b[i]); 
    //         IIOMetadata imageMetadata = writer.getDefaultImageMetadata(imageType, null); 
    //         imageMetadata = createImageMetadata(imageMetadata); 
    //         writer.writeToSequence(new IIOImage(b[i], null, imageMetadata), null); 
    //      } 
    //
    //      writer.dispose(); 
    //      writer = null; 
    //      
    //
    //   }   

    /** 
     * Return the metadata for the new TIF image 
     */
    private static IIOMetadata createImageMetadata(IIOMetadata imageMetadata) throws IIOInvalidTreeException {

        //Get the IFD (Image File Directory) which is the root of all the tags 
        //for this image. From here we can get all the tags in the image. 
        TIFFDirectory ifd = TIFFDirectory.createFromMetadata(imageMetadata);

        //Create the necessary TIFF tags that we want to add to the image metadata 
        BaselineTIFFTagSet base = BaselineTIFFTagSet.getInstance();

        //Resolution tags... 
        TIFFTag tagResUnit = base.getTag(BaselineTIFFTagSet.TAG_RESOLUTION_UNIT);
        TIFFTag tagXRes = base.getTag(BaselineTIFFTagSet.TAG_X_RESOLUTION);
        TIFFTag tagYRes = base.getTag(BaselineTIFFTagSet.TAG_Y_RESOLUTION);

        //BitsPerSample tag 
        TIFFTag tagBitSample = base.getTag(BaselineTIFFTagSet.TAG_BITS_PER_SAMPLE);

        //Row and Strip tags... 
        TIFFTag tagRowStrips = base.getTag(BaselineTIFFTagSet.TAG_ROWS_PER_STRIP);

        //Compression tag 
        TIFFTag tagCompression = base.getTag(BaselineTIFFTagSet.TAG_COMPRESSION);

        //Set the tag values 
        TIFFField fieldResUnit = new TIFFField(tagResUnit, TIFFTag.TIFF_SHORT, 1, INCH_RESOLUTION_UNIT);
        TIFFField fieldXRes = new TIFFField(tagXRes, TIFFTag.TIFF_RATIONAL, 1, X_DPI_RESOLUTION);
        TIFFField fieldYRes = new TIFFField(tagYRes, TIFFTag.TIFF_RATIONAL, 1, Y_DPI_RESOLUTION);
        TIFFField fieldBitSample = new TIFFField(tagBitSample, TIFFTag.TIFF_SHORT, 1, BITS_PER_SAMPLE);
        TIFFField fieldRowStrips = new TIFFField(tagRowStrips, TIFFTag.TIFF_LONG, 1, new long[] { HEIGHT });
        TIFFField fieldCompression = new TIFFField(tagCompression, TIFFTag.TIFF_SHORT, 1, COMPRESSION);

        //Cleanup the fields 
        //ifd.removeTIFFFields(); 

        //Add the new tag/value sets to the image metadata 
        ifd.addTIFFField(fieldResUnit);
        ifd.addTIFFField(fieldXRes);
        ifd.addTIFFField(fieldYRes);
        ifd.addTIFFField(fieldBitSample);
        ifd.addTIFFField(fieldRowStrips);
        ifd.addTIFFField(fieldCompression);

        return ifd.getAsMetadata();

    }

    public static void main(String[] args) {
        //convertPDF("C://3.pdf", "C://3.tiff");

    }
}