Example usage for com.itextpdf.text.pdf PdfReader getPageContent

Introduction

In this page you can find the example usage for com.itextpdf.text.pdf PdfReader getPageContent.

Prototype

public byte[] getPageContent(final int pageNum, final RandomAccessFileOrArray file) throws IOException

Source Link

Document

Gets the contents of the page.

Usage

From source file:com.betel.flowers.pdf.util.RemoveBlankPageFromPDF.java

public static void removeBlankPdfPages(String source, String destination)
        throws IOException, DocumentException {
    PdfReader r = null;
    RandomAccessSourceFactory rasf = null;
    RandomAccessFileOrArray raf = null;/*from w  w w .j  av a2 s . c  o m*/
    Document document = null;
    PdfCopy writer = null;

    try {
        r = new PdfReader(source);
        // deprecated
        //    RandomAccessFileOrArray raf
        //           = new RandomAccessFileOrArray(pdfSourceFile);
        // itext 5.4.1
        rasf = new RandomAccessSourceFactory();
        raf = new RandomAccessFileOrArray(rasf.createBestSource(source));
        document = new Document(r.getPageSizeWithRotation(1));
        writer = new PdfCopy(document, new FileOutputStream(destination));
        document.open();
        PdfImportedPage page = null;

        for (int i = 1; i <= r.getNumberOfPages(); i++) {
            // first check, examine the resource dictionary for /Font or
            // /XObject keys.  If either are present -> not blank.
            PdfDictionary pageDict = r.getPageN(i);
            PdfDictionary resDict = (PdfDictionary) pageDict.get(PdfName.RESOURCES);
            boolean noFontsOrImages = true;
            if (resDict != null) {
                noFontsOrImages = resDict.get(PdfName.FONT) == null && resDict.get(PdfName.XOBJECT) == null;
            }

            if (!noFontsOrImages) {
                byte bContent[] = r.getPageContent(i, raf);
                ByteArrayOutputStream bs = new ByteArrayOutputStream();
                bs.write(bContent);

                if (bs.size() > BLANK_THRESHOLD) {
                    page = writer.getImportedPage(r, i);
                    writer.addPage(page);
                }
            }
        }
    } finally {
        if (document != null) {
            document.close();
        }
        if (writer != null) {
            writer.close();
        }
        if (raf != null) {
            raf.close();
        }
        if (r != null) {
            r.close();
        }
    }
}

From source file:pdfmt.pdf2image.java

License:Open Source License

/** 
 * Convert a PDF document to a TIF file 
 *///from  w w w .  jav  a 2  s.  c om
protected static void convert(String pdf, String tif, String destPdf) throws IOException {

    org.icepdf.core.pobjects.Document pdffile = new org.icepdf.core.pobjects.Document();

    try {
        pdffile.setFile(pdf);

    } catch (PDFException ex) {
        //  System.out.println("Error parsing PDF document " + ex); 
    } catch (PDFSecurityException ex) {
        //  System.out.println("Error encryption not supported " + ex); 
    } catch (FileNotFoundException ex) {
        // System.out.println("Error file not found " + ex); 
    } catch (IOException ex) {
        //  System.out.println("Error handling PDF document " + ex); 
    }

    int numPgs = pdffile.getNumberOfPages();

    try {
        // step 1: create new reader
        PdfReader r = new PdfReader(pdf);
        //  System.out.println("File Lenght:"  + r.getFileLength());
        RandomAccessFileOrArray raf = new RandomAccessFileOrArray(pdf);
        // System.out.println("Raf:" + raf);
        Document document = new Document(r.getPageSizeWithRotation(1));
        //           // step 2: create a writer that listens to the document
        PdfCopy writer = new PdfCopy(document, new FileOutputStream(destPdf));
        //           
        //           // step 3: we open the document
        document.open();
        //           // step 4: we add content
        PdfImportedPage page = null;
        //        
        //loop through each page and if the bs is larger than 20 than we know it is not blank.
        //if it is less than 20 than we don't include that blank page.

        float scale = 2.084f;
        float rotation = 0f;

        BufferedImage image[] = new BufferedImage[numPgs];

        for (int i = 0; i < numPgs; i++) {

            byte bContent[] = r.getPageContent(i + 1, raf);
            // System.out.println(bContent.toString());

            ByteArrayOutputStream bs = new ByteArrayOutputStream();
            //write the content to an output stream
            bs.write(bContent);

            //System.out.println("page content length of page " + i+1 + " = "
            //      + bs.size());

            /* 
             * Generate the image: 
             * Notes: 1275x1650 = 8.5 x 11 @ 150dpi ??? 
             */
            image[i] = (BufferedImage) pdffile.getPageImage(i, GraphicsRenderingHints.SCREEN,
                    Page.BOUNDARY_CROPBOX, rotation, scale);
            Iterator writers = ImageIO.getImageWritersByFormatName("TIFF");
            if (writers == null || !writers.hasNext()) {
                throw new RuntimeException("No writers for available.");

            }
            ImageWriter myWriter = (ImageWriter) writers.next();
            myWriter.setOutput(new FileImageOutputStream(new File(tif)));
            myWriter.prepareWriteSequence(null);
            ImageTypeSpecifier imageType = ImageTypeSpecifier.createFromRenderedImage(image[i]);
            IIOMetadata imageMetadata = myWriter.getDefaultImageMetadata(imageType, null);
            imageMetadata = createImageMetadata(imageMetadata);
            myWriter.writeToSequence(new IIOImage(image[i], null, imageMetadata), null);

            myWriter.dispose();
            image[i] = null;
            myWriter = null;

            FileInputStream in = new FileInputStream(tif);
            FileChannel channel = in.getChannel();
            ByteBuffer buffer = ByteBuffer.allocate((int) channel.size());
            channel.read(buffer);
            Image imageBlank;

            imageBlank = load(buffer.array());

            BufferedImage bufferedImage = imageToBufferedImage(imageBlank);
            boolean isBlank;
            isBlank = isBlank(bufferedImage);
            // System.out.println("isblank "+ isBlank);

            if (isBlank == false) {

                page = writer.getImportedPage(r, i + 1);
                writer.addPage(page);

            }
            bs.close();

            System.gc();
        }

        document.close();
        writer.close();
        raf.close();
        r.close();

    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

}

From source file:pdfmt.RemoveBlankPdf.java

License:Open Source License

/**
 * Convert a PDF document to a TIF file.
 * /*  w w  w  .j  av  a 2s.  c  o m*/
 * @param pdf
 *            the pdf
 * @param tif
 *            the tif
 * @param destPdf
 *            the dest pdf
 * @throws IOException
 *             Signals that an I/O exception has occurred.
 */
protected void convert(String pdf, String tif, String destPdf) throws IOException {

    startTime = System.currentTimeMillis();
    logger.info("In the boolean convert(String pdf, String tif, String destPdf)");
    org.icepdf.core.pobjects.Document pdffile = new org.icepdf.core.pobjects.Document();

    try {

        pdffile.setFile(pdf);

    } catch (PDFException ex) {
        // System.out.println("Error parsing PDF document " + ex);
    } catch (PDFSecurityException ex) {
        // System.out.println("Error encryption not supported " + ex);
    } catch (FileNotFoundException ex) {
        // System.out.println("Error file not found " + ex);
    } catch (IOException ex) {
        // System.out.println("Error handling PDF document " + ex);
    }

    int numPgs = pdffile.getNumberOfPages();
    msg.setText(".....::::: Converting pages please wait :::::.....");
    addComponent(contentPane, msg, 10, 110, 200, 18);

    try {
        // step 1: create new reader
        PdfReader r = new PdfReader(pdf);
        // System.out.println("File Lenght:" + r.getFileLength());
        RandomAccessFileOrArray raf = new RandomAccessFileOrArray(pdf);
        // System.out.println("Raf:" + raf);
        Document document = new Document(r.getPageSizeWithRotation(1));
        // // step 2: create a writer that listens to the document
        PdfCopy writer = new PdfCopy(document, new FileOutputStream(destPdf));
        //           
        // // step 3: we open the document
        document.open();
        // // step 4: we add content
        PdfImportedPage page = null;

        float scale = 2.084f;
        float rotation = 0f;

        BufferedImage image[] = new BufferedImage[numPgs];

        // -------- CHANGE
        jprogress.setMaximum(numPgs);
        // -------- CHANGE

        for (int i = 0; i < numPgs; i++) {

            // -------- CHANGE
            jprogress.setValue(i + 1);
            // -------- CHANGE

            byte bContent[] = r.getPageContent(i + 1, raf);
            // System.out.println(bContent.toString());

            ByteArrayOutputStream bs = new ByteArrayOutputStream();
            // write the content to an output stream
            bs.write(bContent);

            // System.out.println("page content length of page " + i+1 +
            // " = "
            // + bs.size());

            /*
             * Generate the image: Notes: 1275x1650 = 8.5 x 11 @ 150dpi ???
             */
            image[i] = (BufferedImage) pdffile.getPageImage(i, GraphicsRenderingHints.SCREEN,
                    Page.BOUNDARY_CROPBOX, rotation, scale);
            Iterator writers = ImageIO.getImageWritersByFormatName("TIFF");
            if (writers == null || !writers.hasNext()) {
                throw new RuntimeException("No writers for available.");

            }
            ImageWriter myWriter = (ImageWriter) writers.next();
            myWriter.setOutput(new FileImageOutputStream(new File(tif)));
            myWriter.prepareWriteSequence(null);
            ImageTypeSpecifier imageType = ImageTypeSpecifier.createFromRenderedImage(image[i]);
            IIOMetadata imageMetadata = myWriter.getDefaultImageMetadata(imageType, null);
            imageMetadata = createImageMetadata(imageMetadata);
            myWriter.writeToSequence(new IIOImage(image[i], null, imageMetadata), null);

            myWriter.dispose();
            image[i] = null;
            myWriter = null;

            FileInputStream in = new FileInputStream(tif);
            FileChannel channel = in.getChannel();
            ByteBuffer buffer = ByteBuffer.allocate((int) channel.size());
            channel.read(buffer);
            Image imageBlank;

            imageBlank = load(buffer.array());

            BufferedImage bufferedImage = imageToBufferedImage(imageBlank);
            boolean isBlank;
            isBlank = isBlank(bufferedImage);
            // System.out.println("isblank "+ isBlank);
            boolean hasContent = false;
            File file = new File(TEMP_DIR + TEMP_EXTR_TEXT);

            try {
                FileWriter fileWriter = new FileWriter(file);

                PageText pageText = pdffile.getPageText(i);
                if (pageText != null && pageText.getPageLines() != null) {
                    fileWriter.write(pageText.toString());
                }

                // close the writer
                fileWriter.close();
                System.out.println(file.length());
                if (file.length() > 20) {
                    hasContent = true;

                }
                file.delete();

                System.out.println(TEMP_TIFF + " deleted");
            } catch (IOException e) {

                e.printStackTrace();
            }

            if (isBlank == false && hasContent == true) {

                page = writer.getImportedPage(r, i + 1);
                writer.addPage(page);

            }
            bs.close();
            in.close();
            File ft = new File(TEMP_DIR + TEMP_TIFF);
            boolean check = ft.delete();
            if (check == true) {
                System.out.println("Deleted");
            } else {
                System.out.println("Stuck");
            }
            System.gc();

        }

        document.close();
        writer.close();
        raf.close();
        r.close();
        stopTime = System.currentTimeMillis();
        logger.info("Exit boolean convert(String pdf, String tif, String destPdf) with time:    "
                + ((stopTime - startTime) / 1000));

    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

}

From source file:spntoolsdata.pdf.util.RemoveBlankPageFromPDF.java

public static void removeBlankPdfPages(String source, String destination)
        throws IOException, DocumentException {
    PdfReader r = null;
    RandomAccessSourceFactory rasf = null;
    RandomAccessFileOrArray raf = null;/*from ww w.ja  v a 2s.  com*/
    Document document = null;
    PdfCopy writer = null;

    try {
        r = new PdfReader(source);
        // deprecated
        //    RandomAccessFileOrArray raf
        //           = new RandomAccessFileOrArray(pdfSourceFile);
        // itext 5.4.1
        rasf = new RandomAccessSourceFactory();
        raf = new RandomAccessFileOrArray(rasf.createBestSource(source));
        document = new Document(r.getPageSizeWithRotation(1));
        writer = new PdfCopy(document, new FileOutputStream(destination));
        document.open();
        PdfImportedPage page = null;

        for (int i = 1; i <= r.getNumberOfPages(); i++) {
            // first check, examine the resource dictionary for /Font or
            // /XObject keys.  If either are present -> not blank.
            PdfDictionary pageDict = r.getPageN(i);
            PdfDictionary resDict = (PdfDictionary) pageDict.get(PdfName.RESOURCES);
            boolean noFontsOrImages = true;
            if (resDict != null) {
                noFontsOrImages = resDict.get(PdfName.FONT) == null && resDict.get(PdfName.XOBJECT) == null;
            }

            if (!noFontsOrImages) {
                byte bContent[] = r.getPageContent(i, raf);
                ByteArrayOutputStream bs = new ByteArrayOutputStream();
                bs.write(bContent);

                if (bs.size() > BLANK_THRESHOLD) {
                    page = writer.getImportedPage(r, i);
                    writer.addPage(page);
                }
            }
        }
    } finally {
        if (document != null)
            document.close();
        if (writer != null)
            writer.close();
        if (raf != null)
            raf.close();
        if (r != null)
            r.close();
    }
}