List of usage examples for com.itextpdf.text.pdf PdfReader getPageN
public PdfDictionary getPageN(final int pageNum)
From source file:de.mat.utils.pdftools.PdfExtractEmptyPages.java
License:Mozilla Public License
/** * <h4>FeatureDomain:</h4>//from w w w . ja v a 2s. c o m * PublishingTools * <h4>FeatureDescription:</h4> * reads readerOrig and adds pages to writerRemoved if empty, or to * writerTrimmed if not empty * <h4>FeatureResult:</h4> * <ul> * <li>updates writerTrimmed - add all pages which are not empty * <li>updates writerRemoved - add all empty pages * </ul> * <h4>FeatureKeywords:</h4> * PDF Publishing * @param origFileName - orig filename of the sourcepdf * @param readerOrig - reader of source * @param writerTrimmed - writer for trimmed pages * @param writerRemoved - writer for empty pages * @param flgTrim - ?? * @return - count of trimmed pages * @throws Exception */ public static int addTrimmedPages(String origFileName, PdfReader readerOrig, PdfCopy writerTrimmed, PdfCopy writerRemoved, boolean flgTrim) throws Exception { PdfImportedPage page = null; int countTrimmedPages = 0; //loop each page for (int i = 1; i <= readerOrig.getNumberOfPages(); i++) { boolean flgIsEmpty = true; // get dictionary PdfDictionary pageDict = readerOrig.getPageN(i); // every pdf-version has its own way :-( char version = readerOrig.getPdfVersion(); if (version == '3') { // PDF-Version: 3 // examine the resource dictionary for /Font or // /XObject keys. If either are present, they're almost // certainly actually used on the page -> not blank. PdfObject myObj = pageDict.get(PdfName.RESOURCES); PdfDictionary resDict = null; if (myObj instanceof PdfDictionary) { resDict = (PdfDictionary) myObj; } else { resDict = (PdfDictionary) PdfReader.getPdfObject(myObj); } if (resDict != null) { flgIsEmpty = resDict.get(PdfName.FONT) == null && resDict.get(PdfName.XOBJECT) == null; if (LOGGER.isInfoEnabled()) { if (flgIsEmpty) { LOGGER.info("probably empty page " + i + " Version: 1." + version + " FONT/XOBJECT found in File:" + origFileName); } else { LOGGER.info("normal page " + i + " Version: 1." + version + " no FONT/XOBJECT found in File:" + origFileName); } } } } else if (version == '4') { // PDF-Version: 4 // check the contentsize. // get the page content byte bContent[] = readerOrig.getPageContent(i); ByteArrayOutputStream bs = new ByteArrayOutputStream(); // write the content to an output stream bs.write(bContent); flgIsEmpty = true; if (bs.size() > blankPdfsize) { if (LOGGER.isInfoEnabled()) LOGGER.info("normal page " + i + " Version: 1." + version + " BS:" + bs.size() + " File:" + origFileName); flgIsEmpty = false; } else { if (LOGGER.isInfoEnabled()) LOGGER.info("probably empty page " + i + " Version: 1." + version + " BS:" + bs.size() + " File:" + origFileName); } } else if (version == '5') { // PDF-Version: 5 // check the contentsize. // get the page content byte bContent[] = readerOrig.getPageContent(i); ByteArrayOutputStream bs = new ByteArrayOutputStream(); // write the content to an output stream bs.write(bContent); flgIsEmpty = true; if (bs.size() > blankPdfsize_v5) { if (LOGGER.isInfoEnabled()) LOGGER.info("normal page " + i + " Version: 1." + version + " BS:" + bs.size() + " File:" + origFileName); flgIsEmpty = false; } else { if (LOGGER.isInfoEnabled()) LOGGER.info("probably empty page " + i + " Version: 1." + version + " BS:" + bs.size() + " File:" + origFileName); } } // add page to removed or trimmed document if (!flgIsEmpty || !flgTrim) { if (LOGGER.isInfoEnabled()) LOGGER.info("add page " + i); page = writerTrimmed.getImportedPage(readerOrig, i); writerTrimmed.addPage(page); countTrimmedPages++; } else { if (LOGGER.isInfoEnabled()) LOGGER.info("skip page " + i + " Version: 1." + version + " File:" + origFileName); if (writerRemoved != null) { page = writerRemoved.getImportedPage(readerOrig, i); writerRemoved.addPage(page); } } } return countTrimmedPages; }
From source file:edu.clemson.lph.pdfgen.MergePDF.java
License:Open Source License
public static void concatPDFs(List<InputStream> pdfInputStreams, OutputStream outputStream, boolean paginate) { Document document = new Document(); try {// w w w .ja v a2 s . c o m PdfCopy cp = new PdfCopy(document, outputStream); document.open(); Iterator<InputStream> iteratorPDFReader = pdfInputStreams.iterator(); // Loop through the PDF streams and add to the output. while (iteratorPDFReader.hasNext()) { InputStream is = iteratorPDFReader.next(); PdfReader pdfReader = new PdfReader(is); int n = pdfReader.getNumberOfPages(); for (int pageNo = 0; pageNo < n;) { pdfReader.getPageN(pageNo); cp.addPage(cp.getImportedPage(pdfReader, ++pageNo)); } } document.close(); outputStream.flush(); outputStream.close(); } catch (Exception e) { logger.error(e); } }
From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java
License:Open Source License
private void cleanUpPage(int pageNum, List<PdfCleanUpLocation> cleanUpLocations) throws IOException, DocumentException { if (cleanUpLocations.size() == 0) { return;// w ww . j a v a 2 s. c om } PdfReader pdfReader = pdfStamper.getReader(); PdfDictionary page = pdfReader.getPageN(pageNum); PdfContentByte canvas = pdfStamper.getUnderContent(pageNum); byte[] pageContentInput = ContentByteUtils.getContentBytesForPage(pdfReader, pageNum); page.remove(PdfName.CONTENTS); canvas.saveState(); PdfCleanUpRegionFilter filter = createFilter(cleanUpLocations); PdfCleanUpRenderListener pdfCleanUpRenderListener = new PdfCleanUpRenderListener(pdfStamper, filter); pdfCleanUpRenderListener.registerNewContext(pdfReader.getPageResources(page), canvas); PdfContentStreamProcessor contentProcessor = new PdfContentStreamProcessor(pdfCleanUpRenderListener); PdfCleanUpContentOperator.populateOperators(contentProcessor, pdfCleanUpRenderListener); contentProcessor.processContent(pageContentInput, page.getAsDict(PdfName.RESOURCES)); pdfCleanUpRenderListener.popContext(); canvas.restoreState(); colorCleanedLocations(canvas, cleanUpLocations); if (redactAnnotIndirRefs != null) { // if it isn't null, then we are in "extract locations from redact annots" mode deleteRedactAnnots(pageNum); } }
From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java
License:Open Source License
/** * Extracts locations from the redact annotations contained in the document. *//* w w w .j a v a2 s . co m*/ private void extractLocationsFromRedactAnnots() { this.pdfCleanUpLocations = new HashMap<Integer, List<PdfCleanUpLocation>>(); PdfReader reader = pdfStamper.getReader(); for (int i = 1; i <= reader.getNumberOfPages(); ++i) { PdfDictionary pageDict = reader.getPageN(i); this.pdfCleanUpLocations.put(i, extractLocationsFromRedactAnnots(i, pageDict)); } }
From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java
License:Open Source License
/** * Deletes redact annotations from the page and substitutes them with either OverlayText or RO object if it's needed. *///from www .j a v a 2 s . co m private void deleteRedactAnnots(int pageNum) throws IOException, DocumentException { Set<String> indirRefs = redactAnnotIndirRefs.get(pageNum); if (indirRefs == null || indirRefs.isEmpty()) { return; } PdfReader reader = pdfStamper.getReader(); PdfContentByte canvas = pdfStamper.getOverContent(pageNum); PdfDictionary pageDict = reader.getPageN(pageNum); PdfArray annotsArray = pageDict.getAsArray(PdfName.ANNOTS); // j is for access annotRect (i can be decreased, so we need to store additional index, // indicating current position in ANNOTS array in case if we don't remove anything for (int i = 0, j = 0; i < annotsArray.size(); ++i, ++j) { PdfIndirectReference annotIndRef = annotsArray.getAsIndirectObject(i); PdfDictionary annotDict = annotsArray.getAsDict(i); if (indirRefs.contains(annotIndRef.toString()) || indirRefs.contains(getParentIndRefStr(annotDict))) { PdfStream formXObj = annotDict.getAsStream(PdfName.RO); PdfString overlayText = annotDict.getAsString(PdfName.OVERLAYTEXT); if (fillCleanedArea && formXObj != null) { PdfArray rectArray = annotDict.getAsArray(PdfName.RECT); Rectangle annotRect = new Rectangle(rectArray.getAsNumber(0).floatValue(), rectArray.getAsNumber(1).floatValue(), rectArray.getAsNumber(2).floatValue(), rectArray.getAsNumber(3).floatValue()); insertFormXObj(canvas, pageDict, formXObj, clippingRects.get(j), annotRect); } else if (fillCleanedArea && overlayText != null && overlayText.toUnicodeString().length() > 0) { drawOverlayText(canvas, clippingRects.get(j), overlayText, annotDict.getAsString(PdfName.DA), annotDict.getAsNumber(PdfName.Q), annotDict.getAsBoolean(PdfName.REPEAT)); } annotsArray.remove(i--); // array size is changed, so we need to decrease i } } if (annotsArray.size() == 0) { pageDict.remove(PdfName.ANNOTS); } }
From source file:org.sejda.impl.itext5.component.AbstractPdfCopier.java
License:Open Source License
public void addPage(PdfReader reader, int pageNumber, PdfRectangle cropBox) throws TaskException { PdfImportedPage page = pdfCopy.getImportedPage(reader, pageNumber); PdfDictionary dictionary = reader.getPageN(pageNumber); dictionary.put(PdfName.MEDIABOX, cropBox); dictionary.put(PdfName.CROPBOX, cropBox); addPage(page);/* ww w. ja v a2s . c o m*/ }
From source file:org.sejda.impl.itext5.component.PdfUnpacker.java
License:Open Source License
private Set<PdfDictionary> getFileAttachmentsDictionaries(PdfReader reader) { Set<PdfDictionary> retSet = new NullSafeSet<PdfDictionary>(); for (int k = 1; k <= reader.getNumberOfPages(); ++k) { PdfArray annots = reader.getPageN(k).getAsArray(PdfName.ANNOTS); if (annots != null) { for (PdfObject current : annots) { PdfDictionary annot = (PdfDictionary) PdfReader.getPdfObject(current); if (PdfName.FILEATTACHMENT.equals(annot.getAsName(PdfName.SUBTYPE))) { retSet.add(annot.getAsDict(PdfName.FS)); }//from w w w .j av a2 s.c o m } } } return retSet; }
From source file:oscar.dms.IncomingDocUtil.java
License:Open Source License
public static void rotatePage(String queueId, String myPdfDir, String myPdfName, String MyPdfPageNumber, int degrees) throws Exception { long lastModified; String filePathName, tempFilePathName; int rot;// w w w. j av a 2 s .c o m int rotatedegrees; tempFilePathName = getIncomingDocumentFilePath(queueId, myPdfDir) + File.separator + "T" + myPdfName; filePathName = getIncomingDocumentFilePathName(queueId, myPdfDir, myPdfName); File f = new File(filePathName); lastModified = f.lastModified(); PdfReader reader = null; PdfStamper stp = null; try { reader = new PdfReader(filePathName); rot = reader.getPageRotation(Integer.parseInt(MyPdfPageNumber)); rotatedegrees = rot + degrees; rotatedegrees = rotatedegrees % 360; reader.getPageN(Integer.parseInt(MyPdfPageNumber)).put(PdfName.ROTATE, new PdfNumber(rotatedegrees)); stp = new PdfStamper(reader, new FileOutputStream(tempFilePathName)); } catch (Exception e) { throw (e); } finally { try { if (stp != null) { stp.close(); } if (reader != null) { reader.close(); } } catch (Exception e) { throw (e); } } boolean success = f.delete(); if (success) { File f1 = new File(tempFilePathName); f1.setLastModified(lastModified); success = f1.renameTo(new File(filePathName)); if (!success) { throw new Exception("Error in renaming file from:" + tempFilePathName + " to " + filePathName); } } else { throw new Exception("Error in deleting file:" + filePathName); } }
From source file:oscar.dms.IncomingDocUtil.java
License:Open Source License
public static void rotateAlPages(String queueId, String myPdfDir, String myPdfName, int degrees) throws Exception { long lastModified; String filePathName, tempFilePathName; int rot;/* w w w.j a v a 2s . c om*/ int rotatedegrees; tempFilePathName = getIncomingDocumentFilePath(queueId, myPdfDir) + File.separator + "T" + myPdfName; filePathName = getIncomingDocumentFilePathName(queueId, myPdfDir, myPdfName); File f = new File(filePathName); lastModified = f.lastModified(); PdfReader reader = null; PdfStamper stp = null; try { reader = new PdfReader(filePathName); for (int p = 1; p <= reader.getNumberOfPages(); ++p) { rot = reader.getPageRotation(p); rotatedegrees = rot + degrees; rotatedegrees = rotatedegrees % 360; reader.getPageN(p).put(PdfName.ROTATE, new PdfNumber(rotatedegrees)); } stp = new PdfStamper(reader, new FileOutputStream(tempFilePathName)); } catch (Exception e) { throw (e); } finally { try { if (stp != null) { stp.close(); } if (reader != null) { reader.close(); } } catch (Exception e) { throw (e); } } boolean success = f.delete(); if (success) { File f1 = new File(tempFilePathName); f1.setLastModified(lastModified); success = f1.renameTo(new File(filePathName)); if (!success) { throw new Exception("Error in renaming file from:" + tempFilePathName + "to " + filePathName); } } else { throw new Exception("Error in deleting file:" + filePathName); } }
From source file:pl.edu.icm.cermine.structure.ITextCharacterExtractor.java
License:Open Source License
/** * Extracts text chunks from PDF using iText and stores them in BxDocument object. * Depending on parsed PDF, extracted text chunks may or may not be individual glyphs, * they correspond to single string operands of PDF's text-showing operators * (Tj, TJ, ' and ").// w ww.j a v a2 s . c om * @param stream PDF's stream * @return BxDocument containing pages with extracted chunks stored as BxChunk lists * @throws AnalysisException AnalysisException */ @Override public BxDocument extractCharacters(InputStream stream) throws AnalysisException { try { BxDocumentCreator documentCreator = new BxDocumentCreator(); PdfReader reader = new PdfReader(stream); PdfContentStreamProcessor processor = new PdfContentStreamProcessor(documentCreator); for (int pageNumber = 1; pageNumber <= reader.getNumberOfPages(); pageNumber++) { if (frontPagesLimit > 0 && backPagesLimit > 0 && pageNumber > frontPagesLimit && pageNumber < reader.getNumberOfPages() - 1 - backPagesLimit) { continue; } documentCreator.processNewBxPage(reader.getPageSize(pageNumber)); PdfDictionary resources = reader.getPageN(pageNumber).getAsDict(PdfName.RESOURCES); processAlternativeFontNames(resources); processAlternativeColorSpace(resources); processor.reset(); processor.processContent(ContentByteUtils.getContentBytesForPage(reader, pageNumber), resources); TimeoutRegister.get().check(); } BxDocument doc = filterComponents(removeDuplicateChunks(documentCreator.document)); if (doc.getFirstChild() == null) { throw new AnalysisException("Document contains no pages"); } return doc; } catch (InvalidPdfException ex) { throw new AnalysisException("Invalid PDF file", ex); } catch (IOException ex) { throw new AnalysisException("Cannot extract characters from PDF file", ex); } }