Example usage for com.itextpdf.text.pdf PdfReader getPageContent

List of usage examples for com.itextpdf.text.pdf PdfReader getPageContent

Introduction

In this page you can find the example usage for com.itextpdf.text.pdf PdfReader getPageContent.

Prototype

public byte[] getPageContent(final int pageNum, final RandomAccessFileOrArray file) throws IOException 

Source Link

Document

Gets the contents of the page.

Usage

From source file:com.betel.flowers.pdf.util.RemoveBlankPageFromPDF.java

public static void removeBlankPdfPages(String source, String destination)
        throws IOException, DocumentException {
    PdfReader r = null;
    RandomAccessSourceFactory rasf = null;
    RandomAccessFileOrArray raf = null;/*from w  w w .j  av a2 s . c  o m*/
    Document document = null;
    PdfCopy writer = null;

    try {
        r = new PdfReader(source);
        // deprecated
        //    RandomAccessFileOrArray raf
        //           = new RandomAccessFileOrArray(pdfSourceFile);
        // itext 5.4.1
        rasf = new RandomAccessSourceFactory();
        raf = new RandomAccessFileOrArray(rasf.createBestSource(source));
        document = new Document(r.getPageSizeWithRotation(1));
        writer = new PdfCopy(document, new FileOutputStream(destination));
        document.open();
        PdfImportedPage page = null;

        for (int i = 1; i <= r.getNumberOfPages(); i++) {
            // first check, examine the resource dictionary for /Font or
            // /XObject keys.  If either are present -> not blank.
            PdfDictionary pageDict = r.getPageN(i);
            PdfDictionary resDict = (PdfDictionary) pageDict.get(PdfName.RESOURCES);
            boolean noFontsOrImages = true;
            if (resDict != null) {
                noFontsOrImages = resDict.get(PdfName.FONT) == null && resDict.get(PdfName.XOBJECT) == null;
            }

            if (!noFontsOrImages) {
                byte bContent[] = r.getPageContent(i, raf);
                ByteArrayOutputStream bs = new ByteArrayOutputStream();
                bs.write(bContent);

                if (bs.size() > BLANK_THRESHOLD) {
                    page = writer.getImportedPage(r, i);
                    writer.addPage(page);
                }
            }
        }
    } finally {
        if (document != null) {
            document.close();
        }
        if (writer != null) {
            writer.close();
        }
        if (raf != null) {
            raf.close();
        }
        if (r != null) {
            r.close();
        }
    }
}

From source file:pdfmt.pdf2image.java

License:Open Source License

/** 
 * Convert a PDF document to a TIF file 
 *///from  w w w .  jav  a 2  s.  c om
protected static void convert(String pdf, String tif, String destPdf) throws IOException {

    org.icepdf.core.pobjects.Document pdffile = new org.icepdf.core.pobjects.Document();

    try {
        pdffile.setFile(pdf);

    } catch (PDFException ex) {
        //  System.out.println("Error parsing PDF document " + ex); 
    } catch (PDFSecurityException ex) {
        //  System.out.println("Error encryption not supported " + ex); 
    } catch (FileNotFoundException ex) {
        // System.out.println("Error file not found " + ex); 
    } catch (IOException ex) {
        //  System.out.println("Error handling PDF document " + ex); 
    }

    int numPgs = pdffile.getNumberOfPages();

    try {
        // step 1: create new reader
        PdfReader r = new PdfReader(pdf);
        //  System.out.println("File Lenght:"  + r.getFileLength());
        RandomAccessFileOrArray raf = new RandomAccessFileOrArray(pdf);
        // System.out.println("Raf:" + raf);
        Document document = new Document(r.getPageSizeWithRotation(1));
        //           // step 2: create a writer that listens to the document
        PdfCopy writer = new PdfCopy(document, new FileOutputStream(destPdf));
        //           
        //           // step 3: we open the document
        document.open();
        //           // step 4: we add content
        PdfImportedPage page = null;
        //        
        //loop through each page and if the bs is larger than 20 than we know it is not blank.
        //if it is less than 20 than we don't include that blank page.

        float scale = 2.084f;
        float rotation = 0f;

        BufferedImage image[] = new BufferedImage[numPgs];

        for (int i = 0; i < numPgs; i++) {

            byte bContent[] = r.getPageContent(i + 1, raf);
            // System.out.println(bContent.toString());

            ByteArrayOutputStream bs = new ByteArrayOutputStream();
            //write the content to an output stream
            bs.write(bContent);

            //System.out.println("page content length of page " + i+1 + " = "
            //      + bs.size());

            /* 
             * Generate the image: 
             * Notes: 1275x1650 = 8.5 x 11 @ 150dpi ??? 
             */
            image[i] = (BufferedImage) pdffile.getPageImage(i, GraphicsRenderingHints.SCREEN,
                    Page.BOUNDARY_CROPBOX, rotation, scale);
            Iterator writers = ImageIO.getImageWritersByFormatName("TIFF");
            if (writers == null || !writers.hasNext()) {
                throw new RuntimeException("No writers for available.");

            }
            ImageWriter myWriter = (ImageWriter) writers.next();
            myWriter.setOutput(new FileImageOutputStream(new File(tif)));
            myWriter.prepareWriteSequence(null);
            ImageTypeSpecifier imageType = ImageTypeSpecifier.createFromRenderedImage(image[i]);
            IIOMetadata imageMetadata = myWriter.getDefaultImageMetadata(imageType, null);
            imageMetadata = createImageMetadata(imageMetadata);
            myWriter.writeToSequence(new IIOImage(image[i], null, imageMetadata), null);

            myWriter.dispose();
            image[i] = null;
            myWriter = null;

            FileInputStream in = new FileInputStream(tif);
            FileChannel channel = in.getChannel();
            ByteBuffer buffer = ByteBuffer.allocate((int) channel.size());
            channel.read(buffer);
            Image imageBlank;

            imageBlank = load(buffer.array());

            BufferedImage bufferedImage = imageToBufferedImage(imageBlank);
            boolean isBlank;
            isBlank = isBlank(bufferedImage);
            // System.out.println("isblank "+ isBlank);

            if (isBlank == false) {

                page = writer.getImportedPage(r, i + 1);
                writer.addPage(page);

            }
            bs.close();

            System.gc();
        }

        document.close();
        writer.close();
        raf.close();
        r.close();

    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

}

From source file:pdfmt.RemoveBlankPdf.java

License:Open Source License

/**
 * Convert a PDF document to a TIF file.
 * /*  w w  w  .j  av  a 2s.  c  o m*/
 * @param pdf
 *            the pdf
 * @param tif
 *            the tif
 * @param destPdf
 *            the dest pdf
 * @throws IOException
 *             Signals that an I/O exception has occurred.
 */
protected void convert(String pdf, String tif, String destPdf) throws IOException {

    startTime = System.currentTimeMillis();
    logger.info("In the boolean convert(String pdf, String tif, String destPdf)");
    org.icepdf.core.pobjects.Document pdffile = new org.icepdf.core.pobjects.Document();

    try {

        pdffile.setFile(pdf);

    } catch (PDFException ex) {
        // System.out.println("Error parsing PDF document " + ex);
    } catch (PDFSecurityException ex) {
        // System.out.println("Error encryption not supported " + ex);
    } catch (FileNotFoundException ex) {
        // System.out.println("Error file not found " + ex);
    } catch (IOException ex) {
        // System.out.println("Error handling PDF document " + ex);
    }

    int numPgs = pdffile.getNumberOfPages();
    msg.setText(".....::::: Converting pages please wait :::::.....");
    addComponent(contentPane, msg, 10, 110, 200, 18);

    try {
        // step 1: create new reader
        PdfReader r = new PdfReader(pdf);
        // System.out.println("File Lenght:" + r.getFileLength());
        RandomAccessFileOrArray raf = new RandomAccessFileOrArray(pdf);
        // System.out.println("Raf:" + raf);
        Document document = new Document(r.getPageSizeWithRotation(1));
        // // step 2: create a writer that listens to the document
        PdfCopy writer = new PdfCopy(document, new FileOutputStream(destPdf));
        //           
        // // step 3: we open the document
        document.open();
        // // step 4: we add content
        PdfImportedPage page = null;

        float scale = 2.084f;
        float rotation = 0f;

        BufferedImage image[] = new BufferedImage[numPgs];

        // -------- CHANGE
        jprogress.setMaximum(numPgs);
        // -------- CHANGE

        for (int i = 0; i < numPgs; i++) {

            // -------- CHANGE
            jprogress.setValue(i + 1);
            // -------- CHANGE

            byte bContent[] = r.getPageContent(i + 1, raf);
            // System.out.println(bContent.toString());

            ByteArrayOutputStream bs = new ByteArrayOutputStream();
            // write the content to an output stream
            bs.write(bContent);

            // System.out.println("page content length of page " + i+1 +
            // " = "
            // + bs.size());

            /*
             * Generate the image: Notes: 1275x1650 = 8.5 x 11 @ 150dpi ???
             */
            image[i] = (BufferedImage) pdffile.getPageImage(i, GraphicsRenderingHints.SCREEN,
                    Page.BOUNDARY_CROPBOX, rotation, scale);
            Iterator writers = ImageIO.getImageWritersByFormatName("TIFF");
            if (writers == null || !writers.hasNext()) {
                throw new RuntimeException("No writers for available.");

            }
            ImageWriter myWriter = (ImageWriter) writers.next();
            myWriter.setOutput(new FileImageOutputStream(new File(tif)));
            myWriter.prepareWriteSequence(null);
            ImageTypeSpecifier imageType = ImageTypeSpecifier.createFromRenderedImage(image[i]);
            IIOMetadata imageMetadata = myWriter.getDefaultImageMetadata(imageType, null);
            imageMetadata = createImageMetadata(imageMetadata);
            myWriter.writeToSequence(new IIOImage(image[i], null, imageMetadata), null);

            myWriter.dispose();
            image[i] = null;
            myWriter = null;

            FileInputStream in = new FileInputStream(tif);
            FileChannel channel = in.getChannel();
            ByteBuffer buffer = ByteBuffer.allocate((int) channel.size());
            channel.read(buffer);
            Image imageBlank;

            imageBlank = load(buffer.array());

            BufferedImage bufferedImage = imageToBufferedImage(imageBlank);
            boolean isBlank;
            isBlank = isBlank(bufferedImage);
            // System.out.println("isblank "+ isBlank);
            boolean hasContent = false;
            File file = new File(TEMP_DIR + TEMP_EXTR_TEXT);

            try {
                FileWriter fileWriter = new FileWriter(file);

                PageText pageText = pdffile.getPageText(i);
                if (pageText != null && pageText.getPageLines() != null) {
                    fileWriter.write(pageText.toString());
                }

                // close the writer
                fileWriter.close();
                System.out.println(file.length());
                if (file.length() > 20) {
                    hasContent = true;

                }
                file.delete();

                System.out.println(TEMP_TIFF + " deleted");
            } catch (IOException e) {

                e.printStackTrace();
            }

            if (isBlank == false && hasContent == true) {

                page = writer.getImportedPage(r, i + 1);
                writer.addPage(page);

            }
            bs.close();
            in.close();
            File ft = new File(TEMP_DIR + TEMP_TIFF);
            boolean check = ft.delete();
            if (check == true) {
                System.out.println("Deleted");
            } else {
                System.out.println("Stuck");
            }
            System.gc();

        }

        document.close();
        writer.close();
        raf.close();
        r.close();
        stopTime = System.currentTimeMillis();
        logger.info("Exit boolean convert(String pdf, String tif, String destPdf) with time:    "
                + ((stopTime - startTime) / 1000));

    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

}

From source file:spntoolsdata.pdf.util.RemoveBlankPageFromPDF.java

public static void removeBlankPdfPages(String source, String destination)
        throws IOException, DocumentException {
    PdfReader r = null;
    RandomAccessSourceFactory rasf = null;
    RandomAccessFileOrArray raf = null;/*from ww w.ja  v a 2s.  com*/
    Document document = null;
    PdfCopy writer = null;

    try {
        r = new PdfReader(source);
        // deprecated
        //    RandomAccessFileOrArray raf
        //           = new RandomAccessFileOrArray(pdfSourceFile);
        // itext 5.4.1
        rasf = new RandomAccessSourceFactory();
        raf = new RandomAccessFileOrArray(rasf.createBestSource(source));
        document = new Document(r.getPageSizeWithRotation(1));
        writer = new PdfCopy(document, new FileOutputStream(destination));
        document.open();
        PdfImportedPage page = null;

        for (int i = 1; i <= r.getNumberOfPages(); i++) {
            // first check, examine the resource dictionary for /Font or
            // /XObject keys.  If either are present -> not blank.
            PdfDictionary pageDict = r.getPageN(i);
            PdfDictionary resDict = (PdfDictionary) pageDict.get(PdfName.RESOURCES);
            boolean noFontsOrImages = true;
            if (resDict != null) {
                noFontsOrImages = resDict.get(PdfName.FONT) == null && resDict.get(PdfName.XOBJECT) == null;
            }

            if (!noFontsOrImages) {
                byte bContent[] = r.getPageContent(i, raf);
                ByteArrayOutputStream bs = new ByteArrayOutputStream();
                bs.write(bContent);

                if (bs.size() > BLANK_THRESHOLD) {
                    page = writer.getImportedPage(r, i);
                    writer.addPage(page);
                }
            }
        }
    } finally {
        if (document != null)
            document.close();
        if (writer != null)
            writer.close();
        if (raf != null)
            raf.close();
        if (r != null)
            r.close();
    }
}