List of usage examples for com.itextpdf.text.pdf PdfReader getPageContent
public byte[] getPageContent(final int pageNum, final RandomAccessFileOrArray file) throws IOException
From source file:com.betel.flowers.pdf.util.RemoveBlankPageFromPDF.java
public static void removeBlankPdfPages(String source, String destination) throws IOException, DocumentException { PdfReader r = null; RandomAccessSourceFactory rasf = null; RandomAccessFileOrArray raf = null;/*from w w w .j av a2 s . c o m*/ Document document = null; PdfCopy writer = null; try { r = new PdfReader(source); // deprecated // RandomAccessFileOrArray raf // = new RandomAccessFileOrArray(pdfSourceFile); // itext 5.4.1 rasf = new RandomAccessSourceFactory(); raf = new RandomAccessFileOrArray(rasf.createBestSource(source)); document = new Document(r.getPageSizeWithRotation(1)); writer = new PdfCopy(document, new FileOutputStream(destination)); document.open(); PdfImportedPage page = null; for (int i = 1; i <= r.getNumberOfPages(); i++) { // first check, examine the resource dictionary for /Font or // /XObject keys. If either are present -> not blank. PdfDictionary pageDict = r.getPageN(i); PdfDictionary resDict = (PdfDictionary) pageDict.get(PdfName.RESOURCES); boolean noFontsOrImages = true; if (resDict != null) { noFontsOrImages = resDict.get(PdfName.FONT) == null && resDict.get(PdfName.XOBJECT) == null; } if (!noFontsOrImages) { byte bContent[] = r.getPageContent(i, raf); ByteArrayOutputStream bs = new ByteArrayOutputStream(); bs.write(bContent); if (bs.size() > BLANK_THRESHOLD) { page = writer.getImportedPage(r, i); writer.addPage(page); } } } } finally { if (document != null) { document.close(); } if (writer != null) { writer.close(); } if (raf != null) { raf.close(); } if (r != null) { r.close(); } } }
From source file:pdfmt.pdf2image.java
License:Open Source License
/** * Convert a PDF document to a TIF file *///from w w w . jav a 2 s. c om protected static void convert(String pdf, String tif, String destPdf) throws IOException { org.icepdf.core.pobjects.Document pdffile = new org.icepdf.core.pobjects.Document(); try { pdffile.setFile(pdf); } catch (PDFException ex) { // System.out.println("Error parsing PDF document " + ex); } catch (PDFSecurityException ex) { // System.out.println("Error encryption not supported " + ex); } catch (FileNotFoundException ex) { // System.out.println("Error file not found " + ex); } catch (IOException ex) { // System.out.println("Error handling PDF document " + ex); } int numPgs = pdffile.getNumberOfPages(); try { // step 1: create new reader PdfReader r = new PdfReader(pdf); // System.out.println("File Lenght:" + r.getFileLength()); RandomAccessFileOrArray raf = new RandomAccessFileOrArray(pdf); // System.out.println("Raf:" + raf); Document document = new Document(r.getPageSizeWithRotation(1)); // // step 2: create a writer that listens to the document PdfCopy writer = new PdfCopy(document, new FileOutputStream(destPdf)); // // // step 3: we open the document document.open(); // // step 4: we add content PdfImportedPage page = null; // //loop through each page and if the bs is larger than 20 than we know it is not blank. //if it is less than 20 than we don't include that blank page. float scale = 2.084f; float rotation = 0f; BufferedImage image[] = new BufferedImage[numPgs]; for (int i = 0; i < numPgs; i++) { byte bContent[] = r.getPageContent(i + 1, raf); // System.out.println(bContent.toString()); ByteArrayOutputStream bs = new ByteArrayOutputStream(); //write the content to an output stream bs.write(bContent); //System.out.println("page content length of page " + i+1 + " = " // + bs.size()); /* * Generate the image: * Notes: 1275x1650 = 8.5 x 11 @ 150dpi ??? */ image[i] = (BufferedImage) pdffile.getPageImage(i, GraphicsRenderingHints.SCREEN, Page.BOUNDARY_CROPBOX, rotation, scale); Iterator writers = ImageIO.getImageWritersByFormatName("TIFF"); if (writers == null || !writers.hasNext()) { throw new RuntimeException("No writers for available."); } ImageWriter myWriter = (ImageWriter) writers.next(); myWriter.setOutput(new FileImageOutputStream(new File(tif))); myWriter.prepareWriteSequence(null); ImageTypeSpecifier imageType = ImageTypeSpecifier.createFromRenderedImage(image[i]); IIOMetadata imageMetadata = myWriter.getDefaultImageMetadata(imageType, null); imageMetadata = createImageMetadata(imageMetadata); myWriter.writeToSequence(new IIOImage(image[i], null, imageMetadata), null); myWriter.dispose(); image[i] = null; myWriter = null; FileInputStream in = new FileInputStream(tif); FileChannel channel = in.getChannel(); ByteBuffer buffer = ByteBuffer.allocate((int) channel.size()); channel.read(buffer); Image imageBlank; imageBlank = load(buffer.array()); BufferedImage bufferedImage = imageToBufferedImage(imageBlank); boolean isBlank; isBlank = isBlank(bufferedImage); // System.out.println("isblank "+ isBlank); if (isBlank == false) { page = writer.getImportedPage(r, i + 1); writer.addPage(page); } bs.close(); System.gc(); } document.close(); writer.close(); raf.close(); r.close(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:pdfmt.RemoveBlankPdf.java
License:Open Source License
/** * Convert a PDF document to a TIF file. * /* w w w .j av a 2s. c o m*/ * @param pdf * the pdf * @param tif * the tif * @param destPdf * the dest pdf * @throws IOException * Signals that an I/O exception has occurred. */ protected void convert(String pdf, String tif, String destPdf) throws IOException { startTime = System.currentTimeMillis(); logger.info("In the boolean convert(String pdf, String tif, String destPdf)"); org.icepdf.core.pobjects.Document pdffile = new org.icepdf.core.pobjects.Document(); try { pdffile.setFile(pdf); } catch (PDFException ex) { // System.out.println("Error parsing PDF document " + ex); } catch (PDFSecurityException ex) { // System.out.println("Error encryption not supported " + ex); } catch (FileNotFoundException ex) { // System.out.println("Error file not found " + ex); } catch (IOException ex) { // System.out.println("Error handling PDF document " + ex); } int numPgs = pdffile.getNumberOfPages(); msg.setText(".....::::: Converting pages please wait :::::....."); addComponent(contentPane, msg, 10, 110, 200, 18); try { // step 1: create new reader PdfReader r = new PdfReader(pdf); // System.out.println("File Lenght:" + r.getFileLength()); RandomAccessFileOrArray raf = new RandomAccessFileOrArray(pdf); // System.out.println("Raf:" + raf); Document document = new Document(r.getPageSizeWithRotation(1)); // // step 2: create a writer that listens to the document PdfCopy writer = new PdfCopy(document, new FileOutputStream(destPdf)); // // // step 3: we open the document document.open(); // // step 4: we add content PdfImportedPage page = null; float scale = 2.084f; float rotation = 0f; BufferedImage image[] = new BufferedImage[numPgs]; // -------- CHANGE jprogress.setMaximum(numPgs); // -------- CHANGE for (int i = 0; i < numPgs; i++) { // -------- CHANGE jprogress.setValue(i + 1); // -------- CHANGE byte bContent[] = r.getPageContent(i + 1, raf); // System.out.println(bContent.toString()); ByteArrayOutputStream bs = new ByteArrayOutputStream(); // write the content to an output stream bs.write(bContent); // System.out.println("page content length of page " + i+1 + // " = " // + bs.size()); /* * Generate the image: Notes: 1275x1650 = 8.5 x 11 @ 150dpi ??? */ image[i] = (BufferedImage) pdffile.getPageImage(i, GraphicsRenderingHints.SCREEN, Page.BOUNDARY_CROPBOX, rotation, scale); Iterator writers = ImageIO.getImageWritersByFormatName("TIFF"); if (writers == null || !writers.hasNext()) { throw new RuntimeException("No writers for available."); } ImageWriter myWriter = (ImageWriter) writers.next(); myWriter.setOutput(new FileImageOutputStream(new File(tif))); myWriter.prepareWriteSequence(null); ImageTypeSpecifier imageType = ImageTypeSpecifier.createFromRenderedImage(image[i]); IIOMetadata imageMetadata = myWriter.getDefaultImageMetadata(imageType, null); imageMetadata = createImageMetadata(imageMetadata); myWriter.writeToSequence(new IIOImage(image[i], null, imageMetadata), null); myWriter.dispose(); image[i] = null; myWriter = null; FileInputStream in = new FileInputStream(tif); FileChannel channel = in.getChannel(); ByteBuffer buffer = ByteBuffer.allocate((int) channel.size()); channel.read(buffer); Image imageBlank; imageBlank = load(buffer.array()); BufferedImage bufferedImage = imageToBufferedImage(imageBlank); boolean isBlank; isBlank = isBlank(bufferedImage); // System.out.println("isblank "+ isBlank); boolean hasContent = false; File file = new File(TEMP_DIR + TEMP_EXTR_TEXT); try { FileWriter fileWriter = new FileWriter(file); PageText pageText = pdffile.getPageText(i); if (pageText != null && pageText.getPageLines() != null) { fileWriter.write(pageText.toString()); } // close the writer fileWriter.close(); System.out.println(file.length()); if (file.length() > 20) { hasContent = true; } file.delete(); System.out.println(TEMP_TIFF + " deleted"); } catch (IOException e) { e.printStackTrace(); } if (isBlank == false && hasContent == true) { page = writer.getImportedPage(r, i + 1); writer.addPage(page); } bs.close(); in.close(); File ft = new File(TEMP_DIR + TEMP_TIFF); boolean check = ft.delete(); if (check == true) { System.out.println("Deleted"); } else { System.out.println("Stuck"); } System.gc(); } document.close(); writer.close(); raf.close(); r.close(); stopTime = System.currentTimeMillis(); logger.info("Exit boolean convert(String pdf, String tif, String destPdf) with time: " + ((stopTime - startTime) / 1000)); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:spntoolsdata.pdf.util.RemoveBlankPageFromPDF.java
public static void removeBlankPdfPages(String source, String destination) throws IOException, DocumentException { PdfReader r = null; RandomAccessSourceFactory rasf = null; RandomAccessFileOrArray raf = null;/*from ww w.ja v a 2s. com*/ Document document = null; PdfCopy writer = null; try { r = new PdfReader(source); // deprecated // RandomAccessFileOrArray raf // = new RandomAccessFileOrArray(pdfSourceFile); // itext 5.4.1 rasf = new RandomAccessSourceFactory(); raf = new RandomAccessFileOrArray(rasf.createBestSource(source)); document = new Document(r.getPageSizeWithRotation(1)); writer = new PdfCopy(document, new FileOutputStream(destination)); document.open(); PdfImportedPage page = null; for (int i = 1; i <= r.getNumberOfPages(); i++) { // first check, examine the resource dictionary for /Font or // /XObject keys. If either are present -> not blank. PdfDictionary pageDict = r.getPageN(i); PdfDictionary resDict = (PdfDictionary) pageDict.get(PdfName.RESOURCES); boolean noFontsOrImages = true; if (resDict != null) { noFontsOrImages = resDict.get(PdfName.FONT) == null && resDict.get(PdfName.XOBJECT) == null; } if (!noFontsOrImages) { byte bContent[] = r.getPageContent(i, raf); ByteArrayOutputStream bs = new ByteArrayOutputStream(); bs.write(bContent); if (bs.size() > BLANK_THRESHOLD) { page = writer.getImportedPage(r, i); writer.addPage(page); } } } } finally { if (document != null) document.close(); if (writer != null) writer.close(); if (raf != null) raf.close(); if (r != null) r.close(); } }