Example usage for org.apache.pdfbox.pdmodel PDPage getContents

List of usage examples for org.apache.pdfbox.pdmodel PDPage getContents

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDPage getContents.

Prototype

@Override
public InputStream getContents() throws IOException 

Source Link

Document

Returns the content stream(s) of this page as a single input stream.

Usage

From source file:net.padaf.preflight.contentstream.ContentStreamWrapper.java

License:Apache License

/**
 * Process the validation of a PageContent (The page is initialized by the
 * constructor)/*from  w  ww  .j a va 2s  .  co m*/
 * 
 * @return A list of validation error. This list is empty if the validation
 *         succeed.
 * @throws ValidationException.
 */
public List<ValidationError> validPageContentStream(PDPage page) throws ValidationException {
    List<ValidationError> errors = new ArrayList<ValidationError>();

    try {
        PDStream pstream = page.getContents();
        if (pstream != null) {
            processStream(page, page.findResources(), pstream.getStream());
        }
    } catch (ContentStreamException e) {
        errors.add(new ValidationError(e.getValidationError(), e.getMessage()));
    } catch (IOException e) {
        throw new ValidationException("Unable to check the ContentStream : " + e.getMessage(), e);
    }

    return errors;
}

From source file:net.timendum.pdf.Images2HTML.java

License:Open Source License

public void processDocument(PDDocument document) throws IOException {
    List allPages = document.getDocumentCatalog().getAllPages();
    for (int i = 0; i < allPages.size(); i++) {
        PDPage page = (PDPage) allPages.get(i);
        processStream(page, page.findResources(), page.getContents().getStream());
    }/* ww  w. j  av a 2  s . c  o m*/
}

From source file:onyx.core.parser.PDFTextStripper.java

License:Apache License

/**
 * This will process all of the pages and the text that is in them.
 *
 * @param pages The pages object in the document.
 *
 * @throws IOException If there is an error parsing the text.
 *//*from ww  w  .  j a v a 2 s  .c  o  m*/
protected void processPages(List<COSObjectable> pages) throws IOException {
    if (startBookmark != null) {
        startBookmarkPageNumber = getPageNumber(startBookmark, pages);
    }

    if (endBookmark != null) {
        endBookmarkPageNumber = getPageNumber(endBookmark, pages);
    }

    if (startBookmarkPageNumber == -1 && startBookmark != null && endBookmarkPageNumber == -1
            && endBookmark != null && startBookmark.getCOSObject() == endBookmark.getCOSObject()) {
        //this is a special case where both the start and end bookmark
        //are the same but point to nothing.  In this case
        //we will not extract any text.
        startBookmarkPageNumber = 0;
        endBookmarkPageNumber = 0;
    }

    Iterator<COSObjectable> pageIter = pages.iterator();
    while (pageIter.hasNext()) {
        PDPage nextPage = (PDPage) pageIter.next();
        PDStream contentStream = nextPage.getContents();
        currentPageNo++;
        if (contentStream != null) {
            COSStream contents = contentStream.getStream();
            processPage(nextPage, contents);
        }
    }
}

From source file:org.apache.fop.render.pdf.pdfbox.PDFBoxAdapter.java

License:Apache License

private PDStream getContents(PDPage page) throws IOException {
    PDStream pdStream = new PDStream(new COSStream());
    OutputStream os = pdStream.createOutputStream();
    IOUtils.copy(page.getContents(), os);
    os.close();/*from   www .j a  v  a2s . c  o  m*/
    return pdStream;
}

From source file:org.apache.padaf.preflight.RetrieveMissingStream.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 1) {
        System.err.println("usage : RetrieveMissingStream file");
        System.exit(233);/*from  w  ww  .j  av a 2s.c om*/
    }

    HashSet<COSObjectKey> listOfKeys = new HashSet<COSObjectKey>();

    PDDocument document = PDDocument.load(new FileInputStream(args[0]));
    List<COSObject> lCosObj = document.getDocument().getObjects();
    for (COSObject cosObject : lCosObj) {

        if (cosObject.getObject() instanceof COSStream) {
            listOfKeys.add(new COSObjectKey(cosObject.getObjectNumber().intValue(),
                    cosObject.getGenerationNumber().intValue()));
        }

    }

    PDDocumentCatalog catalog = document.getDocumentCatalog();
    List<?> pages = catalog.getAllPages();
    for (int i = 0; i < pages.size(); ++i) {
        PDPage pdp = (PDPage) pages.get(i);
        PDStream pdStream = pdp.getContents();

        COSBase b = pdp.getCOSDictionary().getItem(COSName.getPDFName("Contents"));
        System.out.println();
    }
}

From source file:org.elacin.pdfextract.datasource.pdfbox.PDFBoxIntegration.java

License:Apache License

public void processDocument() throws IOException {

    resetEngine();//w  ww.  jav a2s .  c  om

    try {
        if (doc.isEncrypted()) {
            doc.decrypt("");
        }
    } catch (Exception e) {
        throw new RuntimeException("Could not decrypt document", e);
    }

    currentPageNo = 0;
    docContent = new DocumentContent();
    fonts = new Fonts();

    for (final PDPage nextPage : (List<PDPage>) doc.getDocumentCatalog().getAllPages()) {
        PDStream contentStream = nextPage.getContents();

        currentPageNo++;

        if (contentStream != null) {
            COSStream contents = contentStream.getStream();

            processPage(nextPage, contents);
        }
    }

    docContent.setStyles(fonts.styles.values());
}

From source file:org.kuali.coeus.common.impl.person.signature.PersonSignatureServiceImpl.java

License:Open Source License

/**
 * This method is to scan for signature tag in each page and apply the signature
 * at desired location./*from  w w w  . j  ava 2s. c o  m*/
 * @param imageData
 * @param originalByteArrayOutputStream
 */
@SuppressWarnings("unchecked")
protected ByteArrayOutputStream scanAndApplyAutographInEachPage(byte[] imageData,
        ByteArrayOutputStream originalByteArrayOutputStream) throws Exception {
    ByteArrayOutputStream outputStream = originalByteArrayOutputStream;
    byte[] pdfFileData = originalByteArrayOutputStream.toByteArray();
    PDDocument originalDocument = getPdfDocument(pdfFileData); //PDDocument.load(is);
    PDDocument signatureDocument = new PDDocument();
    List<PDPage> originalDocumentPages = originalDocument.getDocumentCatalog().getAllPages();
    for (PDPage page : originalDocumentPages) {
        List<String> signatureTags = new ArrayList<String>(getSignatureTagParameter());
        PersonSignatureLocationHelper printer = new PersonSignatureLocationHelper(signatureTags);
        PDStream contents = page.getContents();
        if (contents != null) {
            printer.processStream(page, page.findResources(), page.getContents().getStream());
        }
        PDPage signaturePage = new PDPage();
        if (printer.isSignatureTagExists()) {
            PDJpeg signatureImage = new PDJpeg(signatureDocument, getBufferedImage(imageData));
            PDPageContentStream stream = new PDPageContentStream(signatureDocument, signaturePage, true, true);
            for (PersonSignaturePrintHelper signatureHelper : printer.getPersonSignatureLocations()) {
                float coordinateX = signatureHelper.getCoordinateX();
                float coordinateY = signatureHelper.getCoordinateY() - signatureImage.getHeight()
                        - ADDITIONAL_SPACE_BETWEEN_TAG_AND_IMAGE;
                stream.drawImage(signatureImage, coordinateX, coordinateY);
                stream.close();
            }
        } else {
            signaturePage = page;
        }
        signatureDocument.addPage(signaturePage);
    }

    Overlay overlay = new Overlay();
    overlay.overlay(signatureDocument, originalDocument);

    originalDocument.save(outputStream);
    originalDocument.close();
    signatureDocument.close();
    return outputStream;
}

From source file:org.opencps.util.ExtractTextLocations.java

License:Open Source License

public ExtractTextLocations(String fullPath) throws IOException {

    PDDocument document = null;/*from   www  .  j a v  a 2s  .c o  m*/

    try {
        File input = new File(fullPath);
        document = PDDocument.load(input);

        if (document.isEncrypted()) {
            try {
                document.decrypt(StringPool.BLANK);
            } catch (Exception e) {
                _log.error(e);
            }
        }

        // ExtractTextLocations printer = new ExtractTextLocations();

        List allPages = document.getDocumentCatalog().getAllPages();
        if (allPages != null && allPages.size() > 0) {
            PDPage page = (PDPage) allPages.get(0);

            PDStream contents = page.getContents();
            if (contents != null) {
                this.processStream(page, page.findResources(), page.getContents().getStream());
            }

            PDRectangle pageSize = page.findMediaBox();
            if (pageSize != null) {
                setPageWidth(pageSize.getWidth());
                setPageHeight(pageSize.getHeight());
                setPageLLX(pageSize.getLowerLeftX());
                setPageURX(pageSize.getUpperRightX());
                setPageLLY(pageSize.getLowerLeftY());
                setPageURY(pageSize.getUpperRightY());
            }
        }
    } catch (Exception e) {
        _log.error(e);
    } finally {
        if (document != null) {
            document.close();
        }
    }
}

From source file:pdfpicmangler.ResolutionAnalyzer.java

License:Open Source License

public Map<String, Float> analyze(PDDocument document) throws IOException {
    resolutions.clear();/*  ww  w . j  a v  a  2 s .c om*/

    List<?> allPages = document.getDocumentCatalog().getAllPages();
    for (int i = 0; i < allPages.size(); i++) {
        PDPage page = (PDPage) allPages.get(i);
        currentPage = i + 1;
        System.out.println("Processing page: " + i);
        processStream(page, page.findResources(), page.getContents().getStream());
    }

    return resolutions;
}

From source file:pdfpositional.PdfPositional.java

/**
 * @param args the command line arguments
 *//*from w w  w.ja va  2s . c  o  m*/
public static void main(String[] args) {
    try {
        // check file param
        if (args.length == 0) {
            throw new ParameterException("No file parameter specified");
        }

        String file = args[args.length - 1];
        Pattern patternFile = Pattern.compile("(?i)^[\\w,\\s-()/]+\\.pdf$");
        Matcher matcherFile = patternFile.matcher(file);

        // check file is valid format
        if (!matcherFile.find()) {
            throw new ParameterException("File parameter invalid: " + file);
        }

        // check if file exists
        File input = new File(file);
        if (!input.exists()) {
            throw new ParameterException("File does not exist: " + file);
        }

        // ensure it isnt a directory
        if (input.isDirectory()) {
            throw new ParameterException("File is a directory: " + file);
        }

        PdfPositional pdfPositional = new PdfPositional(input);
        pdfPositional.setConversion(new Float(1.388888888889));

        pdfPositional.processFileArgument(args[args.length - 1]);
        Pattern patternArgument = Pattern.compile("^-{2}([^=]+)[=]([\\s\\S]+)$");
        Matcher matcher;

        for (int i = 0; i < args.length - 1; i++) {
            matcher = patternArgument.matcher(args[i]);
            while (matcher.find()) {
                switch (matcher.group(1)) {
                case "page":
                    pdfPositional.setPageNumber(Integer.parseInt(matcher.group(2)));
                    break;
                case "output":
                    pdfPositional.setOutputFile(matcher.group(2));
                    break;
                }
            }
        }

        PDDocument document;
        document = PDDocument.load(pdfPositional.getInputFile());

        // check for encrypted document
        if (document.isEncrypted()) {
            try {
                document.decrypt("");
            } catch (CryptographyException | IOException e) {
                document.close();
                throw new EncryptedDocumentException();
            }
        }

        List allPages = document.getDocumentCatalog().getAllPages();
        if (pdfPositional.hasPageNumber()) {
            if (document.getNumberOfPages() < pdfPositional.getPageNumber()) {
                throw new ParameterException("illegal page number");
            }
            PDPage page = (PDPage) allPages.get(pdfPositional.getPageNumber() - 1);
            PDStream contents = page.getContents();
            if (contents != null) {
                pdfPositional.processStream(page, page.findResources(), page.getContents().getStream());
                pdfPositional.addPageDataToPdfData();
                pdfPositional.writeJSONToOutputStream();
            }
        } else {
            for (int i = 0; i < allPages.size(); i++) {
                pdfPositional.setPageNumber(i + 1);
                PDPage page = (PDPage) allPages.get(i);
                PDStream contents = page.getContents();

                if (contents != null) {
                    pdfPositional.processStream(page, page.findResources(), page.getContents().getStream());
                    pdfPositional.addPageDataToPdfData();
                    pdfPositional.writeJSONToOutputStream();
                }

                page.clear();
            }
        }

        pdfPositional.destroyOutputStream();
        document.close();

        System.exit(0);
    } catch (ParameterException ex) {
        System.out.println("Parameter Error: " + ex.getMessage());
        System.exit(1);
    } catch (EncryptedDocumentException ex) {
        System.out.println("Encrypted Document Error");
        System.exit(1);
    } catch (IOException | NumberFormatException ex) {
        System.out.println("General Error");
        System.exit(1);
    }

}