List of usage examples for org.apache.pdfbox.pdmodel PDPage getContents
@Override public InputStream getContents() throws IOException
From source file:net.padaf.preflight.contentstream.ContentStreamWrapper.java
License:Apache License
/** * Process the validation of a PageContent (The page is initialized by the * constructor)/*from w ww .j a va 2s . co m*/ * * @return A list of validation error. This list is empty if the validation * succeed. * @throws ValidationException. */ public List<ValidationError> validPageContentStream(PDPage page) throws ValidationException { List<ValidationError> errors = new ArrayList<ValidationError>(); try { PDStream pstream = page.getContents(); if (pstream != null) { processStream(page, page.findResources(), pstream.getStream()); } } catch (ContentStreamException e) { errors.add(new ValidationError(e.getValidationError(), e.getMessage())); } catch (IOException e) { throw new ValidationException("Unable to check the ContentStream : " + e.getMessage(), e); } return errors; }
From source file:net.timendum.pdf.Images2HTML.java
License:Open Source License
public void processDocument(PDDocument document) throws IOException { List allPages = document.getDocumentCatalog().getAllPages(); for (int i = 0; i < allPages.size(); i++) { PDPage page = (PDPage) allPages.get(i); processStream(page, page.findResources(), page.getContents().getStream()); }/* ww w. j av a 2 s . c o m*/ }
From source file:onyx.core.parser.PDFTextStripper.java
License:Apache License
/** * This will process all of the pages and the text that is in them. * * @param pages The pages object in the document. * * @throws IOException If there is an error parsing the text. *//*from ww w . j a v a 2 s .c o m*/ protected void processPages(List<COSObjectable> pages) throws IOException { if (startBookmark != null) { startBookmarkPageNumber = getPageNumber(startBookmark, pages); } if (endBookmark != null) { endBookmarkPageNumber = getPageNumber(endBookmark, pages); } if (startBookmarkPageNumber == -1 && startBookmark != null && endBookmarkPageNumber == -1 && endBookmark != null && startBookmark.getCOSObject() == endBookmark.getCOSObject()) { //this is a special case where both the start and end bookmark //are the same but point to nothing. In this case //we will not extract any text. startBookmarkPageNumber = 0; endBookmarkPageNumber = 0; } Iterator<COSObjectable> pageIter = pages.iterator(); while (pageIter.hasNext()) { PDPage nextPage = (PDPage) pageIter.next(); PDStream contentStream = nextPage.getContents(); currentPageNo++; if (contentStream != null) { COSStream contents = contentStream.getStream(); processPage(nextPage, contents); } } }
From source file:org.apache.fop.render.pdf.pdfbox.PDFBoxAdapter.java
License:Apache License
private PDStream getContents(PDPage page) throws IOException { PDStream pdStream = new PDStream(new COSStream()); OutputStream os = pdStream.createOutputStream(); IOUtils.copy(page.getContents(), os); os.close();/*from www .j a v a2s . c o m*/ return pdStream; }
From source file:org.apache.padaf.preflight.RetrieveMissingStream.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 1) { System.err.println("usage : RetrieveMissingStream file"); System.exit(233);/*from w ww .j av a 2s.c om*/ } HashSet<COSObjectKey> listOfKeys = new HashSet<COSObjectKey>(); PDDocument document = PDDocument.load(new FileInputStream(args[0])); List<COSObject> lCosObj = document.getDocument().getObjects(); for (COSObject cosObject : lCosObj) { if (cosObject.getObject() instanceof COSStream) { listOfKeys.add(new COSObjectKey(cosObject.getObjectNumber().intValue(), cosObject.getGenerationNumber().intValue())); } } PDDocumentCatalog catalog = document.getDocumentCatalog(); List<?> pages = catalog.getAllPages(); for (int i = 0; i < pages.size(); ++i) { PDPage pdp = (PDPage) pages.get(i); PDStream pdStream = pdp.getContents(); COSBase b = pdp.getCOSDictionary().getItem(COSName.getPDFName("Contents")); System.out.println(); } }
From source file:org.elacin.pdfextract.datasource.pdfbox.PDFBoxIntegration.java
License:Apache License
public void processDocument() throws IOException { resetEngine();//w ww. jav a2s . c om try { if (doc.isEncrypted()) { doc.decrypt(""); } } catch (Exception e) { throw new RuntimeException("Could not decrypt document", e); } currentPageNo = 0; docContent = new DocumentContent(); fonts = new Fonts(); for (final PDPage nextPage : (List<PDPage>) doc.getDocumentCatalog().getAllPages()) { PDStream contentStream = nextPage.getContents(); currentPageNo++; if (contentStream != null) { COSStream contents = contentStream.getStream(); processPage(nextPage, contents); } } docContent.setStyles(fonts.styles.values()); }
From source file:org.kuali.coeus.common.impl.person.signature.PersonSignatureServiceImpl.java
License:Open Source License
/** * This method is to scan for signature tag in each page and apply the signature * at desired location./*from w w w . j ava 2s. c o m*/ * @param imageData * @param originalByteArrayOutputStream */ @SuppressWarnings("unchecked") protected ByteArrayOutputStream scanAndApplyAutographInEachPage(byte[] imageData, ByteArrayOutputStream originalByteArrayOutputStream) throws Exception { ByteArrayOutputStream outputStream = originalByteArrayOutputStream; byte[] pdfFileData = originalByteArrayOutputStream.toByteArray(); PDDocument originalDocument = getPdfDocument(pdfFileData); //PDDocument.load(is); PDDocument signatureDocument = new PDDocument(); List<PDPage> originalDocumentPages = originalDocument.getDocumentCatalog().getAllPages(); for (PDPage page : originalDocumentPages) { List<String> signatureTags = new ArrayList<String>(getSignatureTagParameter()); PersonSignatureLocationHelper printer = new PersonSignatureLocationHelper(signatureTags); PDStream contents = page.getContents(); if (contents != null) { printer.processStream(page, page.findResources(), page.getContents().getStream()); } PDPage signaturePage = new PDPage(); if (printer.isSignatureTagExists()) { PDJpeg signatureImage = new PDJpeg(signatureDocument, getBufferedImage(imageData)); PDPageContentStream stream = new PDPageContentStream(signatureDocument, signaturePage, true, true); for (PersonSignaturePrintHelper signatureHelper : printer.getPersonSignatureLocations()) { float coordinateX = signatureHelper.getCoordinateX(); float coordinateY = signatureHelper.getCoordinateY() - signatureImage.getHeight() - ADDITIONAL_SPACE_BETWEEN_TAG_AND_IMAGE; stream.drawImage(signatureImage, coordinateX, coordinateY); stream.close(); } } else { signaturePage = page; } signatureDocument.addPage(signaturePage); } Overlay overlay = new Overlay(); overlay.overlay(signatureDocument, originalDocument); originalDocument.save(outputStream); originalDocument.close(); signatureDocument.close(); return outputStream; }
From source file:org.opencps.util.ExtractTextLocations.java
License:Open Source License
public ExtractTextLocations(String fullPath) throws IOException { PDDocument document = null;/*from www . j a v a 2s .c o m*/ try { File input = new File(fullPath); document = PDDocument.load(input); if (document.isEncrypted()) { try { document.decrypt(StringPool.BLANK); } catch (Exception e) { _log.error(e); } } // ExtractTextLocations printer = new ExtractTextLocations(); List allPages = document.getDocumentCatalog().getAllPages(); if (allPages != null && allPages.size() > 0) { PDPage page = (PDPage) allPages.get(0); PDStream contents = page.getContents(); if (contents != null) { this.processStream(page, page.findResources(), page.getContents().getStream()); } PDRectangle pageSize = page.findMediaBox(); if (pageSize != null) { setPageWidth(pageSize.getWidth()); setPageHeight(pageSize.getHeight()); setPageLLX(pageSize.getLowerLeftX()); setPageURX(pageSize.getUpperRightX()); setPageLLY(pageSize.getLowerLeftY()); setPageURY(pageSize.getUpperRightY()); } } } catch (Exception e) { _log.error(e); } finally { if (document != null) { document.close(); } } }
From source file:pdfpicmangler.ResolutionAnalyzer.java
License:Open Source License
public Map<String, Float> analyze(PDDocument document) throws IOException { resolutions.clear();/* ww w . j a v a 2 s .c om*/ List<?> allPages = document.getDocumentCatalog().getAllPages(); for (int i = 0; i < allPages.size(); i++) { PDPage page = (PDPage) allPages.get(i); currentPage = i + 1; System.out.println("Processing page: " + i); processStream(page, page.findResources(), page.getContents().getStream()); } return resolutions; }
From source file:pdfpositional.PdfPositional.java
/** * @param args the command line arguments *//*from w w w.ja va 2s . c o m*/ public static void main(String[] args) { try { // check file param if (args.length == 0) { throw new ParameterException("No file parameter specified"); } String file = args[args.length - 1]; Pattern patternFile = Pattern.compile("(?i)^[\\w,\\s-()/]+\\.pdf$"); Matcher matcherFile = patternFile.matcher(file); // check file is valid format if (!matcherFile.find()) { throw new ParameterException("File parameter invalid: " + file); } // check if file exists File input = new File(file); if (!input.exists()) { throw new ParameterException("File does not exist: " + file); } // ensure it isnt a directory if (input.isDirectory()) { throw new ParameterException("File is a directory: " + file); } PdfPositional pdfPositional = new PdfPositional(input); pdfPositional.setConversion(new Float(1.388888888889)); pdfPositional.processFileArgument(args[args.length - 1]); Pattern patternArgument = Pattern.compile("^-{2}([^=]+)[=]([\\s\\S]+)$"); Matcher matcher; for (int i = 0; i < args.length - 1; i++) { matcher = patternArgument.matcher(args[i]); while (matcher.find()) { switch (matcher.group(1)) { case "page": pdfPositional.setPageNumber(Integer.parseInt(matcher.group(2))); break; case "output": pdfPositional.setOutputFile(matcher.group(2)); break; } } } PDDocument document; document = PDDocument.load(pdfPositional.getInputFile()); // check for encrypted document if (document.isEncrypted()) { try { document.decrypt(""); } catch (CryptographyException | IOException e) { document.close(); throw new EncryptedDocumentException(); } } List allPages = document.getDocumentCatalog().getAllPages(); if (pdfPositional.hasPageNumber()) { if (document.getNumberOfPages() < pdfPositional.getPageNumber()) { throw new ParameterException("illegal page number"); } PDPage page = (PDPage) allPages.get(pdfPositional.getPageNumber() - 1); PDStream contents = page.getContents(); if (contents != null) { pdfPositional.processStream(page, page.findResources(), page.getContents().getStream()); pdfPositional.addPageDataToPdfData(); pdfPositional.writeJSONToOutputStream(); } } else { for (int i = 0; i < allPages.size(); i++) { pdfPositional.setPageNumber(i + 1); PDPage page = (PDPage) allPages.get(i); PDStream contents = page.getContents(); if (contents != null) { pdfPositional.processStream(page, page.findResources(), page.getContents().getStream()); pdfPositional.addPageDataToPdfData(); pdfPositional.writeJSONToOutputStream(); } page.clear(); } } pdfPositional.destroyOutputStream(); document.close(); System.exit(0); } catch (ParameterException ex) { System.out.println("Parameter Error: " + ex.getMessage()); System.exit(1); } catch (EncryptedDocumentException ex) { System.out.println("Encrypted Document Error"); System.exit(1); } catch (IOException | NumberFormatException ex) { System.out.println("General Error"); System.exit(1); } }