Example usage for com.itextpdf.text.pdf PdfReader getPageN

List of usage examples for com.itextpdf.text.pdf PdfReader getPageN

Introduction

In this page you can find the example usage for com.itextpdf.text.pdf PdfReader getPageN.

Prototype

public PdfDictionary getPageN(final int pageNum) 

Source Link

Document

Gets the dictionary that represents a page.

Usage

From source file:de.mat.utils.pdftools.PdfExtractEmptyPages.java

License:Mozilla Public License

/**
 * <h4>FeatureDomain:</h4>//from  w  w  w .  ja v  a 2s. c  o m
 *     PublishingTools
 * <h4>FeatureDescription:</h4>
 *     reads readerOrig and adds pages to writerRemoved if empty, or to 
 *     writerTrimmed if not empty
 * <h4>FeatureResult:</h4>
 *   <ul>
 *     <li>updates writerTrimmed - add all pages which are not empty
 *     <li>updates writerRemoved - add all empty pages
 *   </ul> 
 * <h4>FeatureKeywords:</h4>
 *     PDF Publishing
 * @param origFileName - orig filename of the sourcepdf
 * @param readerOrig - reader of source
 * @param writerTrimmed - writer for trimmed pages
 * @param writerRemoved - writer for empty pages
 * @param flgTrim - ??
 * @return - count of trimmed pages
 * @throws Exception
 */
public static int addTrimmedPages(String origFileName, PdfReader readerOrig, PdfCopy writerTrimmed,
        PdfCopy writerRemoved, boolean flgTrim) throws Exception {
    PdfImportedPage page = null;
    int countTrimmedPages = 0;

    //loop each page
    for (int i = 1; i <= readerOrig.getNumberOfPages(); i++) {
        boolean flgIsEmpty = true;

        // get dictionary
        PdfDictionary pageDict = readerOrig.getPageN(i);

        // every pdf-version has its own way :-(
        char version = readerOrig.getPdfVersion();

        if (version == '3') {
            // PDF-Version: 3

            // examine the resource dictionary for /Font or
            // /XObject keys.  If either are present, they're almost
            // certainly actually used on the page -> not blank.
            PdfObject myObj = pageDict.get(PdfName.RESOURCES);
            PdfDictionary resDict = null;
            if (myObj instanceof PdfDictionary) {
                resDict = (PdfDictionary) myObj;
            } else {
                resDict = (PdfDictionary) PdfReader.getPdfObject(myObj);
            }
            if (resDict != null) {
                flgIsEmpty = resDict.get(PdfName.FONT) == null && resDict.get(PdfName.XOBJECT) == null;
                if (LOGGER.isInfoEnabled()) {
                    if (flgIsEmpty) {
                        LOGGER.info("probably empty page " + i + " Version: 1." + version
                                + " FONT/XOBJECT found in File:" + origFileName);
                    } else {
                        LOGGER.info("normal page " + i + " Version: 1." + version
                                + " no FONT/XOBJECT found in File:" + origFileName);
                    }
                }
            }
        } else if (version == '4') {
            // PDF-Version: 4
            // check the contentsize.

            // get the page content
            byte bContent[] = readerOrig.getPageContent(i);
            ByteArrayOutputStream bs = new ByteArrayOutputStream();
            // write the content to an output stream
            bs.write(bContent);

            flgIsEmpty = true;
            if (bs.size() > blankPdfsize) {
                if (LOGGER.isInfoEnabled())
                    LOGGER.info("normal page " + i + " Version: 1." + version + " BS:" + bs.size() + " File:"
                            + origFileName);
                flgIsEmpty = false;
            } else {
                if (LOGGER.isInfoEnabled())
                    LOGGER.info("probably empty page " + i + " Version: 1." + version + " BS:" + bs.size()
                            + " File:" + origFileName);
            }
        } else if (version == '5') {
            // PDF-Version: 5
            // check the contentsize.

            // get the page content
            byte bContent[] = readerOrig.getPageContent(i);
            ByteArrayOutputStream bs = new ByteArrayOutputStream();
            // write the content to an output stream
            bs.write(bContent);

            flgIsEmpty = true;
            if (bs.size() > blankPdfsize_v5) {
                if (LOGGER.isInfoEnabled())
                    LOGGER.info("normal page " + i + " Version: 1." + version + " BS:" + bs.size() + " File:"
                            + origFileName);
                flgIsEmpty = false;
            } else {
                if (LOGGER.isInfoEnabled())
                    LOGGER.info("probably empty page " + i + " Version: 1." + version + " BS:" + bs.size()
                            + " File:" + origFileName);
            }
        }

        // add page to removed or trimmed document
        if (!flgIsEmpty || !flgTrim) {
            if (LOGGER.isInfoEnabled())
                LOGGER.info("add page " + i);
            page = writerTrimmed.getImportedPage(readerOrig, i);
            writerTrimmed.addPage(page);
            countTrimmedPages++;
        } else {
            if (LOGGER.isInfoEnabled())
                LOGGER.info("skip page " + i + " Version: 1." + version + " File:" + origFileName);
            if (writerRemoved != null) {
                page = writerRemoved.getImportedPage(readerOrig, i);
                writerRemoved.addPage(page);
            }
        }
    }

    return countTrimmedPages;
}

From source file:edu.clemson.lph.pdfgen.MergePDF.java

License:Open Source License

public static void concatPDFs(List<InputStream> pdfInputStreams, OutputStream outputStream, boolean paginate) {
    Document document = new Document();
    try {// w w  w  .ja v a2  s  . c o m
        PdfCopy cp = new PdfCopy(document, outputStream);
        document.open();
        Iterator<InputStream> iteratorPDFReader = pdfInputStreams.iterator();

        // Loop through the PDF streams and add to the output.
        while (iteratorPDFReader.hasNext()) {
            InputStream is = iteratorPDFReader.next();
            PdfReader pdfReader = new PdfReader(is);
            int n = pdfReader.getNumberOfPages();
            for (int pageNo = 0; pageNo < n;) {
                pdfReader.getPageN(pageNo);
                cp.addPage(cp.getImportedPage(pdfReader, ++pageNo));
            }
        }
        document.close();
        outputStream.flush();
        outputStream.close();
    } catch (Exception e) {
        logger.error(e);
    }
}

From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java

License:Open Source License

private void cleanUpPage(int pageNum, List<PdfCleanUpLocation> cleanUpLocations)
        throws IOException, DocumentException {
    if (cleanUpLocations.size() == 0) {
        return;//  w ww  .  j a v  a  2  s. c  om
    }

    PdfReader pdfReader = pdfStamper.getReader();
    PdfDictionary page = pdfReader.getPageN(pageNum);
    PdfContentByte canvas = pdfStamper.getUnderContent(pageNum);
    byte[] pageContentInput = ContentByteUtils.getContentBytesForPage(pdfReader, pageNum);
    page.remove(PdfName.CONTENTS);

    canvas.saveState();

    PdfCleanUpRegionFilter filter = createFilter(cleanUpLocations);
    PdfCleanUpRenderListener pdfCleanUpRenderListener = new PdfCleanUpRenderListener(pdfStamper, filter);
    pdfCleanUpRenderListener.registerNewContext(pdfReader.getPageResources(page), canvas);

    PdfContentStreamProcessor contentProcessor = new PdfContentStreamProcessor(pdfCleanUpRenderListener);
    PdfCleanUpContentOperator.populateOperators(contentProcessor, pdfCleanUpRenderListener);
    contentProcessor.processContent(pageContentInput, page.getAsDict(PdfName.RESOURCES));
    pdfCleanUpRenderListener.popContext();

    canvas.restoreState();

    colorCleanedLocations(canvas, cleanUpLocations);

    if (redactAnnotIndirRefs != null) { // if it isn't null, then we are in "extract locations from redact annots" mode
        deleteRedactAnnots(pageNum);
    }
}

From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java

License:Open Source License

/**
 * Extracts locations from the redact annotations contained in the document.
 *//* w  w w  .j  a v a2  s . co m*/
private void extractLocationsFromRedactAnnots() {
    this.pdfCleanUpLocations = new HashMap<Integer, List<PdfCleanUpLocation>>();
    PdfReader reader = pdfStamper.getReader();

    for (int i = 1; i <= reader.getNumberOfPages(); ++i) {
        PdfDictionary pageDict = reader.getPageN(i);
        this.pdfCleanUpLocations.put(i, extractLocationsFromRedactAnnots(i, pageDict));
    }
}

From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java

License:Open Source License

/**
 * Deletes redact annotations from the page and substitutes them with either OverlayText or RO object if it's needed.
 *///from www .j a v  a  2 s  .  co  m
private void deleteRedactAnnots(int pageNum) throws IOException, DocumentException {
    Set<String> indirRefs = redactAnnotIndirRefs.get(pageNum);

    if (indirRefs == null || indirRefs.isEmpty()) {
        return;
    }

    PdfReader reader = pdfStamper.getReader();
    PdfContentByte canvas = pdfStamper.getOverContent(pageNum);
    PdfDictionary pageDict = reader.getPageN(pageNum);
    PdfArray annotsArray = pageDict.getAsArray(PdfName.ANNOTS);

    // j is for access annotRect (i can be decreased, so we need to store additional index,
    // indicating current position in ANNOTS array in case if we don't remove anything
    for (int i = 0, j = 0; i < annotsArray.size(); ++i, ++j) {
        PdfIndirectReference annotIndRef = annotsArray.getAsIndirectObject(i);
        PdfDictionary annotDict = annotsArray.getAsDict(i);

        if (indirRefs.contains(annotIndRef.toString()) || indirRefs.contains(getParentIndRefStr(annotDict))) {
            PdfStream formXObj = annotDict.getAsStream(PdfName.RO);
            PdfString overlayText = annotDict.getAsString(PdfName.OVERLAYTEXT);

            if (fillCleanedArea && formXObj != null) {
                PdfArray rectArray = annotDict.getAsArray(PdfName.RECT);
                Rectangle annotRect = new Rectangle(rectArray.getAsNumber(0).floatValue(),
                        rectArray.getAsNumber(1).floatValue(), rectArray.getAsNumber(2).floatValue(),
                        rectArray.getAsNumber(3).floatValue());

                insertFormXObj(canvas, pageDict, formXObj, clippingRects.get(j), annotRect);
            } else if (fillCleanedArea && overlayText != null && overlayText.toUnicodeString().length() > 0) {
                drawOverlayText(canvas, clippingRects.get(j), overlayText, annotDict.getAsString(PdfName.DA),
                        annotDict.getAsNumber(PdfName.Q), annotDict.getAsBoolean(PdfName.REPEAT));
            }

            annotsArray.remove(i--); // array size is changed, so we need to decrease i
        }
    }

    if (annotsArray.size() == 0) {
        pageDict.remove(PdfName.ANNOTS);
    }
}

From source file:org.sejda.impl.itext5.component.AbstractPdfCopier.java

License:Open Source License

public void addPage(PdfReader reader, int pageNumber, PdfRectangle cropBox) throws TaskException {
    PdfImportedPage page = pdfCopy.getImportedPage(reader, pageNumber);
    PdfDictionary dictionary = reader.getPageN(pageNumber);
    dictionary.put(PdfName.MEDIABOX, cropBox);
    dictionary.put(PdfName.CROPBOX, cropBox);
    addPage(page);/* ww w. ja  v a2s . c o m*/
}

From source file:org.sejda.impl.itext5.component.PdfUnpacker.java

License:Open Source License

private Set<PdfDictionary> getFileAttachmentsDictionaries(PdfReader reader) {
    Set<PdfDictionary> retSet = new NullSafeSet<PdfDictionary>();
    for (int k = 1; k <= reader.getNumberOfPages(); ++k) {
        PdfArray annots = reader.getPageN(k).getAsArray(PdfName.ANNOTS);
        if (annots != null) {
            for (PdfObject current : annots) {
                PdfDictionary annot = (PdfDictionary) PdfReader.getPdfObject(current);
                if (PdfName.FILEATTACHMENT.equals(annot.getAsName(PdfName.SUBTYPE))) {
                    retSet.add(annot.getAsDict(PdfName.FS));
                }//from w w w .j av  a2 s.c  o  m
            }
        }
    }
    return retSet;
}

From source file:oscar.dms.IncomingDocUtil.java

License:Open Source License

public static void rotatePage(String queueId, String myPdfDir, String myPdfName, String MyPdfPageNumber,
        int degrees) throws Exception {
    long lastModified;
    String filePathName, tempFilePathName;
    int rot;//  w  w w.  j  av  a 2 s .c o m
    int rotatedegrees;

    tempFilePathName = getIncomingDocumentFilePath(queueId, myPdfDir) + File.separator + "T" + myPdfName;
    filePathName = getIncomingDocumentFilePathName(queueId, myPdfDir, myPdfName);

    File f = new File(filePathName);
    lastModified = f.lastModified();

    PdfReader reader = null;
    PdfStamper stp = null;

    try {
        reader = new PdfReader(filePathName);
        rot = reader.getPageRotation(Integer.parseInt(MyPdfPageNumber));
        rotatedegrees = rot + degrees;
        rotatedegrees = rotatedegrees % 360;

        reader.getPageN(Integer.parseInt(MyPdfPageNumber)).put(PdfName.ROTATE, new PdfNumber(rotatedegrees));
        stp = new PdfStamper(reader, new FileOutputStream(tempFilePathName));

    } catch (Exception e) {
        throw (e);
    } finally {
        try {
            if (stp != null) {
                stp.close();
            }

            if (reader != null) {
                reader.close();
            }
        } catch (Exception e) {
            throw (e);
        }
    }

    boolean success = f.delete();

    if (success) {
        File f1 = new File(tempFilePathName);
        f1.setLastModified(lastModified);
        success = f1.renameTo(new File(filePathName));
        if (!success) {
            throw new Exception("Error in renaming file from:" + tempFilePathName + " to " + filePathName);
        }
    } else {
        throw new Exception("Error in deleting file:" + filePathName);
    }
}

From source file:oscar.dms.IncomingDocUtil.java

License:Open Source License

public static void rotateAlPages(String queueId, String myPdfDir, String myPdfName, int degrees)
        throws Exception {
    long lastModified;
    String filePathName, tempFilePathName;
    int rot;/* w w w.j a v a  2s  . c  om*/
    int rotatedegrees;

    tempFilePathName = getIncomingDocumentFilePath(queueId, myPdfDir) + File.separator + "T" + myPdfName;
    filePathName = getIncomingDocumentFilePathName(queueId, myPdfDir, myPdfName);

    File f = new File(filePathName);
    lastModified = f.lastModified();

    PdfReader reader = null;
    PdfStamper stp = null;

    try {
        reader = new PdfReader(filePathName);

        for (int p = 1; p <= reader.getNumberOfPages(); ++p) {
            rot = reader.getPageRotation(p);
            rotatedegrees = rot + degrees;
            rotatedegrees = rotatedegrees % 360;

            reader.getPageN(p).put(PdfName.ROTATE, new PdfNumber(rotatedegrees));
        }
        stp = new PdfStamper(reader, new FileOutputStream(tempFilePathName));

    } catch (Exception e) {
        throw (e);
    } finally {
        try {
            if (stp != null) {
                stp.close();
            }

            if (reader != null) {
                reader.close();
            }
        } catch (Exception e) {
            throw (e);
        }
    }

    boolean success = f.delete();

    if (success) {
        File f1 = new File(tempFilePathName);
        f1.setLastModified(lastModified);
        success = f1.renameTo(new File(filePathName));
        if (!success) {
            throw new Exception("Error in renaming file from:" + tempFilePathName + "to " + filePathName);
        }
    } else {
        throw new Exception("Error in deleting file:" + filePathName);
    }
}

From source file:pl.edu.icm.cermine.structure.ITextCharacterExtractor.java

License:Open Source License

/**
 * Extracts text chunks from PDF using iText and stores them in BxDocument object.
 * Depending on parsed PDF, extracted text chunks may or may not be individual glyphs,
 * they correspond to single string operands of PDF's text-showing operators
 * (Tj, TJ, ' and ").//  w ww.j a v  a2  s .  c  om
 * @param stream PDF's stream
 * @return BxDocument containing pages with extracted chunks stored as BxChunk lists
 * @throws AnalysisException AnalysisException
 */
@Override
public BxDocument extractCharacters(InputStream stream) throws AnalysisException {
    try {
        BxDocumentCreator documentCreator = new BxDocumentCreator();

        PdfReader reader = new PdfReader(stream);
        PdfContentStreamProcessor processor = new PdfContentStreamProcessor(documentCreator);

        for (int pageNumber = 1; pageNumber <= reader.getNumberOfPages(); pageNumber++) {
            if (frontPagesLimit > 0 && backPagesLimit > 0 && pageNumber > frontPagesLimit
                    && pageNumber < reader.getNumberOfPages() - 1 - backPagesLimit) {
                continue;
            }
            documentCreator.processNewBxPage(reader.getPageSize(pageNumber));

            PdfDictionary resources = reader.getPageN(pageNumber).getAsDict(PdfName.RESOURCES);
            processAlternativeFontNames(resources);
            processAlternativeColorSpace(resources);

            processor.reset();
            processor.processContent(ContentByteUtils.getContentBytesForPage(reader, pageNumber), resources);
            TimeoutRegister.get().check();
        }

        BxDocument doc = filterComponents(removeDuplicateChunks(documentCreator.document));
        if (doc.getFirstChild() == null) {
            throw new AnalysisException("Document contains no pages");
        }
        return doc;
    } catch (InvalidPdfException ex) {
        throw new AnalysisException("Invalid PDF file", ex);
    } catch (IOException ex) {
        throw new AnalysisException("Cannot extract characters from PDF file", ex);
    }
}