List of usage examples for org.apache.pdfbox.pdmodel PDPage getResources
@Override
public PDResources getResources()
From source file:org.freeeed.ocr.PDFImageExtractor.java
License:Apache License
@SuppressWarnings("rawtypes") @Override/*from ww w. j a va 2 s.c o m*/ public List<String> extractImages() { File extractionDir = new File(conf.getPdfImageExtractionDir()); extractionDir.mkdirs(); List<String> result = new ArrayList<String>(); PDDocument document = null; try { document = PDDocument.load(file); List pages = document.getDocumentCatalog().getAllPages(); Iterator iter = pages.iterator(); int i = 1; int maxNumberOfImages = Project.getCurrentProject().getOcrMaxImagesPerPDF(); while (iter.hasNext()) { PDPage page = (PDPage) iter.next(); PDResources resources = page.getResources(); Map pageImages = resources.getImages(); if (pageImages != null) { Iterator imageIter = pageImages.keySet().iterator(); while (imageIter.hasNext()) { if (i > maxNumberOfImages) { return result; } String key = (String) imageIter.next(); PDXObjectImage image = (PDXObjectImage) pageImages.get(key); String fileName = conf.getPdfImageExtractionDir() + OCRUtil.createUniqueFileName("image"); image.write2file(fileName); result.add(fileName + "." + image.getSuffix()); i++; } } } } catch (IOException ex) { ex.printStackTrace(); } return result; }
From source file:org.nuxeo.pdf.test.PDFWatermarkingTest.java
License:Open Source License
protected void checkHasImage(Blob inBlob, int inExpectedWidth, int inExpectedHeight) throws Exception { PDDocument doc = PDDocument.load(inBlob.getStream()); utils.track(doc);//from w w w. j av a2 s.c o m List<?> allPages = doc.getDocumentCatalog().getAllPages(); int max = allPages.size(); for (int i = 1; i < max; i++) { PDPage page = (PDPage) allPages.get(i); PDResources pdResources = page.getResources(); Map<String, PDXObject> allXObjects = pdResources.getXObjects(); assertNotNull(allXObjects); boolean gotIt = false; for (Map.Entry<String, PDXObject> entry : allXObjects.entrySet()) { PDXObject xobject = entry.getValue(); if (xobject instanceof PDXObjectImage) { PDXObjectImage pdxObjectImage = (PDXObjectImage) xobject; if (inExpectedWidth == pdxObjectImage.getWidth() && inExpectedHeight == pdxObjectImage.getHeight()) { gotIt = true; break; } } } assertTrue("Page " + i + "does not have the image", gotIt); } doc.close(); utils.untrack(doc); }
From source file:org.tnc.doctrack.behaviours.docTrackBehaviours.java
License:Open Source License
private Result[] extractQRfromPDF(InputStream PDF) throws Exception { System.out.println("TNC - DocTrack - extractQRfromPDF starting...."); //Initialize variable for QR decoding. PDDocument document = null;// w w w . ja v a 2 s.c om String password = ""; String prefix = null; boolean addKey = false; Result[] QR = null; try { //read PDF document document = PDDocument.loadNonSeq(PDF, null, password); //Check permission to PDF AccessPermission ap = document.getCurrentAccessPermission(); if (!ap.canExtractContent()) { System.out.println( "TNC - DocTrack Error - extractQRfromPDF - You do not have permission to extract images from PDF."); throw new IOException( "TNC - DocTrack Error - extractQRfromPDF - You do not have permission to extract images from PDF."); } //Iterate throw the PDF pages. List<?> pages = document.getDocumentCatalog().getAllPages(); Iterator<?> iter = pages.iterator(); while (iter.hasNext()) { PDPage page = (PDPage) iter.next(); PDResources resources = page.getResources(); // extract all XObjectImages which are part of the page resources System.out.println("TNC - DocTrack - extractQRfromPDF - Try to process image and find QR code"); QR = processResources(resources, prefix, addKey); } } finally { if ((document != null)) { try { document.close(); } catch (Exception e) { } } } System.out.println("TNC - DocTrack - extractQRfromPDF finished. QR code string : " + QR); return QR; }
From source file:org.xwiki.test.misc.PDFTest.java
License:Open Source License
private Map<String, PDImageXObject> getImages(URL url) throws Exception { Map<String, PDImageXObject> results = new HashMap<>(); PDDocument document = PDDocument.load(IOUtils.toByteArray(url)); try {// ww w .ja v a 2s . c o m for (PDPage page : document.getDocumentCatalog().getPages()) { PDResources pdResources = page.getResources(); for (COSName name : pdResources.getXObjectNames()) { if (pdResources.isImageXObject(name)) { PDImageXObject pdxObjectImage = (PDImageXObject) pdResources.getXObject(name); results.put(name.getName(), pdxObjectImage); } } } } finally { if (document != null) { document.close(); } } return results; }
From source file:org.zorbaxquery.modules.readPdf.GetImages.java
License:Apache License
public void getXMPInformation(String path) { // Open PDF document PDDocument document = null;//from w w w.j av a 2 s .c o m try { document = PDDocument.load(path); } catch (IOException e) { e.printStackTrace(); } // Get all pages and loop through them List pages = document.getDocumentCatalog().getAllPages(); Iterator iter = pages.iterator(); int pageNo = 1; while (iter.hasNext()) { System.out.println("Examining page " + pageNo++ + " :"); PDPage page = (PDPage) iter.next(); PDResources resources = page.getResources(); Map images = null; // Get all Images on page try { images = resources.getImages(); } catch (IOException e) { e.printStackTrace(); } if (images != null) { // Check all images for metadata Iterator imageIter = images.keySet().iterator(); while (imageIter.hasNext()) { String key = (String) imageIter.next(); PDXObjectImage image = (PDXObjectImage) images.get(key); PDMetadata metadata = image.getMetadata(); System.out.println("Found a image: Analyzing for Metadata"); if (metadata == null) { System.out.println("No Metadata found for this image."); System.out.println( "image: " + image.getWidth() + "x" + image.getHeight() + " " + image.getSuffix()); try { System.out.println(" bitsPerComponent: " + image.getBitsPerComponent() + " colorSpace: " + image.getColorSpace().getName() + " hasImageMask: " + image.getImageMask()); } catch (IOException e) { e.printStackTrace(); } } else { InputStream xmlInputStream = null; try { xmlInputStream = metadata.createInputStream(); } catch (IOException e) { e.printStackTrace(); } try { System.out.println( "--------------------------------------------------------------------------------"); String mystring = convertStreamToString(xmlInputStream); System.out.println(mystring); } catch (IOException e) { e.printStackTrace(); } } // Export the images String name = getUniqueFileName(key, image.getSuffix()); System.out.println("Writing image:" + name); try { //image.write2file(name); File f = new File(name); OutputStream os = new FileOutputStream(f); image.write2OutputStream(os); } catch (IOException e) { e.printStackTrace(); } System.out.println( "--------------------------------------------------------------------------------"); } } } }
From source file:pdfbox.GetImagesFromPDF.java
public static void main(String[] args) { try {//from w w w. j a v a2 s . co m String sourceDir = "D:/PdfBox/04-Request-Headers.pdf";// Paste pdf files in PDFCopy folder to read String destinationDir = "D:/PdfBox/"; File oldFile = new File(sourceDir); if (oldFile.exists()) { PDDocument document = PDDocument.load(sourceDir); List<PDPage> list = document.getDocumentCatalog().getAllPages(); String fileName = oldFile.getName().replace(".pdf", "_cover"); int totalImages = 1; for (PDPage page : list) { PDResources pdResources = page.getResources(); Map pageImages = pdResources.getImages(); if (pageImages != null) { Iterator imageIter = pageImages.keySet().iterator(); while (imageIter.hasNext()) { String key = (String) imageIter.next(); PDXObjectImage pdxObjectImage = (PDXObjectImage) pageImages.get(key); pdxObjectImage.write2file(destinationDir + fileName + "_" + totalImages); totalImages++; } } } } else { System.err.println("File not exists"); } } catch (Exception e) { } }
From source file:pdfpicmangler.PDFPicMangler.java
License:Open Source License
private PDDocument process(PDDocument doc, Map<String, Float> resolutions) throws IOException { this.resolutions = resolutions; List<?> pages = doc.getDocumentCatalog().getAllPages(); for (int i = 0; i < pages.size(); i++) { if (!(pages.get(i) instanceof PDPage)) continue; PDPage page = (PDPage) pages.get(i); currentPage = i + 1;/*from www .j a va2 s . c om*/ scanResources(page.getResources(), doc); } return doc; }
From source file:Project.data.preparation.ImageExtraction.java
public void extractImages(String sourceDir, String destinationDir) throws IOException, CryptographyException, COSVisitorException { PDDocument document = null;/*from w w w .j a va 2 s.co m*/ double[] size; if (oldFile.exists()) { document = PDDocument.load(sourceDir); if (document.isEncrypted()) { document.decrypt(""); } PrintImageLocation printer; // Get image location List<PDPage> list = document.getDocumentCatalog().getAllPages(); String fileName_img = oldFile.getName().replace(".pdf", "_cover"); int pageNum = 0; int totalImages = 1; System.out.println("\n" + filename); for (PDPage page : list) { original_imgName = new ArrayList<String>(); location_xy = new ArrayList<double[]>(); size_xy_ordered = new ArrayList<double[]>(); size_xy_tmp = new ArrayList<double[]>(); PDResources pdResources = page.getResources(); Map pageImages = pdResources.getXObjects(); pageNum++; if (pageImages != null && pageImages.size() > 0) { Iterator imageIter = pageImages.keySet().iterator(); while (imageIter.hasNext()) { String key = (String) imageIter.next(); PDXObjectImage pdxObjectImage = (PDXObjectImage) pageImages.get(key); String imgName = fileName_img + "_" + totalImages; System.out.println("Page Number : " + pageNum + "\t" + imgName); pdxObjectImage.write2file(destinationDir + imgName); original_imgName.add(imgName + "." + pdxObjectImage.getSuffix()); size = new double[] { pdxObjectImage.getWidth(), pdxObjectImage.getHeight() }; size_xy_ordered.add(size); totalImages++; } //Start for detect figure name for image renaming printer = new PrintImageLocation(page); location_xy = printer.getLocation_xy(); size_xy_tmp = printer.getSize_xy(); RearrangeImageOrder(location_xy, size_xy_tmp, size_xy_ordered); //PrinttoString(); DetectFigureName detectFig = new DetectFigureName(original_imgName, filename, pageNum, page, location_ordered, size_xy_ordered); } } } else { System.err.println("File not exists"); } if (document != null) { document.close(); } }
From source file:se.streamsource.streamflow.web.application.pdf.Underlay.java
License:Apache License
private void overlayWithDarkenBlendMode(PDDocument document, PDDocument overlay) throws IOException { PDXObjectForm xobject = importAsXObject(document, (PDPage) overlay.getDocumentCatalog().getAllPages().get(0)); PDExtendedGraphicsState darken = new PDExtendedGraphicsState(); darken.getCOSDictionary().setName("BM", "Darken"); List<PDPage> pages = document.getDocumentCatalog().getAllPages(); for (PDPage page : pages) { Map<String, PDExtendedGraphicsState> states = page.getResources().getGraphicsStates(); if (states == null) states = new HashMap<>(); String darkenKey = MapUtil.getNextUniqueKey(states, "Dkn"); states.put(darkenKey, darken);// w w w . java 2 s . c om page.getResources().setGraphicsStates(states); PDPageContentStream stream = new PDPageContentStream(document, page, true, false, true); stream.appendRawCommands(String.format("/%s gs ", darkenKey)); stream.drawXObject(xobject, 0, 0, 1, 1); stream.close(); } }
From source file:uk.ac.ebi.tools.PDFExtractor.java
License:Open Source License
/** * Extracts images from a PDF file and returns them in a list. * * @param filePath//from w ww . j a va 2s.c o m * @return * @throws java.io.IOException */ public HashMap<String, String> getImages(String filePath) throws IOException { HashMap<String, String> imagePaths = new HashMap<>(); try { if (new File(filePath).exists()) { PDDocument document = PDDocument.load(filePath); List<PDPage> list = document.getDocumentCatalog().getAllPages(); for (PDPage page : list) { PDResources pdResources = page.getResources(); Map pageImages = pdResources.getImages(); if (pageImages.size() > 0) { Iterator imageIter = pageImages.keySet().iterator(); while (imageIter.hasNext()) { String key = (String) imageIter.next(); PDXObjectImage pdxObjectImage = (PDXObjectImage) pageImages.get(key); String uniqueName = PDFDoc.generateUniqueName(); StringBuilder builder = new StringBuilder(); // set the imageFormat String imageFormat = pdxObjectImage.getSuffix(); builder = builder.append(System.getProperty("user.home")).append("/") .append(uniqueName); imagePaths.put(builder.toString(), imageFormat); pdxObjectImage.write2file(builder.toString()); builder.delete(0, builder.length()); } } } document.close(); } else { System.err.println("File not exists"); } } catch (IOException ex) { PDFlogger.log(Level.SEVERE, "Error while extracting: Please check the input.", ex.getMessage()); } return imagePaths; }