Example usage for org.apache.pdfbox.pdmodel PDPage getResources

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDPage getResources.

Prototype

@Override
public PDResources getResources()

Source Link

Document

A dictionary containing any resources required by the page.

Usage

From source file:org.freeeed.ocr.PDFImageExtractor.java

License:Apache License

@SuppressWarnings("rawtypes")
@Override/*from ww  w.  j  a va  2 s.c o m*/
public List<String> extractImages() {
    File extractionDir = new File(conf.getPdfImageExtractionDir());
    extractionDir.mkdirs();

    List<String> result = new ArrayList<String>();

    PDDocument document = null;
    try {
        document = PDDocument.load(file);

        List pages = document.getDocumentCatalog().getAllPages();
        Iterator iter = pages.iterator();
        int i = 1;
        int maxNumberOfImages = Project.getCurrentProject().getOcrMaxImagesPerPDF();

        while (iter.hasNext()) {
            PDPage page = (PDPage) iter.next();
            PDResources resources = page.getResources();
            Map pageImages = resources.getImages();
            if (pageImages != null) {
                Iterator imageIter = pageImages.keySet().iterator();
                while (imageIter.hasNext()) {
                    if (i > maxNumberOfImages) {
                        return result;
                    }

                    String key = (String) imageIter.next();
                    PDXObjectImage image = (PDXObjectImage) pageImages.get(key);

                    String fileName = conf.getPdfImageExtractionDir() + OCRUtil.createUniqueFileName("image");
                    image.write2file(fileName);

                    result.add(fileName + "." + image.getSuffix());

                    i++;
                }
            }
        }
    } catch (IOException ex) {
        ex.printStackTrace();
    }

    return result;
}

From source file:org.nuxeo.pdf.test.PDFWatermarkingTest.java

License:Open Source License

protected void checkHasImage(Blob inBlob, int inExpectedWidth, int inExpectedHeight) throws Exception {

    PDDocument doc = PDDocument.load(inBlob.getStream());
    utils.track(doc);//from w w  w. j av  a2 s.c  o  m

    List<?> allPages = doc.getDocumentCatalog().getAllPages();
    int max = allPages.size();
    for (int i = 1; i < max; i++) {
        PDPage page = (PDPage) allPages.get(i);

        PDResources pdResources = page.getResources();
        Map<String, PDXObject> allXObjects = pdResources.getXObjects();
        assertNotNull(allXObjects);

        boolean gotIt = false;
        for (Map.Entry<String, PDXObject> entry : allXObjects.entrySet()) {
            PDXObject xobject = entry.getValue();
            if (xobject instanceof PDXObjectImage) {
                PDXObjectImage pdxObjectImage = (PDXObjectImage) xobject;
                if (inExpectedWidth == pdxObjectImage.getWidth()
                        && inExpectedHeight == pdxObjectImage.getHeight()) {
                    gotIt = true;
                    break;
                }
            }
        }
        assertTrue("Page " + i + "does not have the image", gotIt);
    }

    doc.close();
    utils.untrack(doc);
}

From source file:org.tnc.doctrack.behaviours.docTrackBehaviours.java

License:Open Source License

private Result[] extractQRfromPDF(InputStream PDF) throws Exception {
    System.out.println("TNC - DocTrack  - extractQRfromPDF starting....");
    //Initialize variable for QR decoding.

    PDDocument document = null;// w w  w  . ja v  a 2 s.c om
    String password = "";
    String prefix = null;
    boolean addKey = false;
    Result[] QR = null;
    try {
        //read PDF document 
        document = PDDocument.loadNonSeq(PDF, null, password);
        //Check permission to PDF
        AccessPermission ap = document.getCurrentAccessPermission();
        if (!ap.canExtractContent()) {
            System.out.println(
                    "TNC - DocTrack  Error - extractQRfromPDF - You do not have permission to extract images from PDF.");
            throw new IOException(
                    "TNC - DocTrack  Error - extractQRfromPDF - You do not have permission to extract images from PDF.");
        }
        //Iterate throw the PDF pages. 
        List<?> pages = document.getDocumentCatalog().getAllPages();
        Iterator<?> iter = pages.iterator();
        while (iter.hasNext()) {
            PDPage page = (PDPage) iter.next();
            PDResources resources = page.getResources();
            // extract all XObjectImages which are part of the page resources
            System.out.println("TNC - DocTrack  - extractQRfromPDF - Try to process image and find QR code");
            QR = processResources(resources, prefix, addKey);
        }

    } finally {
        if ((document != null)) {
            try {
                document.close();
            } catch (Exception e) {

            }

        }
    }
    System.out.println("TNC - DocTrack  - extractQRfromPDF finished. QR code string : " + QR);
    return QR;
}

From source file:org.xwiki.test.misc.PDFTest.java

License:Open Source License

private Map<String, PDImageXObject> getImages(URL url) throws Exception {
    Map<String, PDImageXObject> results = new HashMap<>();

    PDDocument document = PDDocument.load(IOUtils.toByteArray(url));
    try {//  ww w  .ja v a 2s  .  c o  m
        for (PDPage page : document.getDocumentCatalog().getPages()) {
            PDResources pdResources = page.getResources();
            for (COSName name : pdResources.getXObjectNames()) {
                if (pdResources.isImageXObject(name)) {
                    PDImageXObject pdxObjectImage = (PDImageXObject) pdResources.getXObject(name);
                    results.put(name.getName(), pdxObjectImage);
                }
            }
        }
    } finally {
        if (document != null) {
            document.close();
        }
    }

    return results;
}

From source file:org.zorbaxquery.modules.readPdf.GetImages.java

License:Apache License

public void getXMPInformation(String path) {
    // Open PDF document
    PDDocument document = null;//from w w  w.j  av  a 2  s  .c  o m
    try {
        document = PDDocument.load(path);
    } catch (IOException e) {
        e.printStackTrace();
    }
    // Get all pages and loop through them
    List pages = document.getDocumentCatalog().getAllPages();
    Iterator iter = pages.iterator();
    int pageNo = 1;
    while (iter.hasNext()) {
        System.out.println("Examining page " + pageNo++ + " :");
        PDPage page = (PDPage) iter.next();
        PDResources resources = page.getResources();
        Map images = null;
        // Get all Images on page
        try {
            images = resources.getImages();
        } catch (IOException e) {
            e.printStackTrace();
        }
        if (images != null) {
            // Check all images for metadata
            Iterator imageIter = images.keySet().iterator();
            while (imageIter.hasNext()) {
                String key = (String) imageIter.next();
                PDXObjectImage image = (PDXObjectImage) images.get(key);
                PDMetadata metadata = image.getMetadata();
                System.out.println("Found a image: Analyzing for Metadata");
                if (metadata == null) {
                    System.out.println("No Metadata found for this image.");
                    System.out.println(
                            "image: " + image.getWidth() + "x" + image.getHeight() + " " + image.getSuffix());
                    try {
                        System.out.println("       bitsPerComponent: " + image.getBitsPerComponent()
                                + "  colorSpace: " + image.getColorSpace().getName() + "  hasImageMask: "
                                + image.getImageMask());
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                } else {
                    InputStream xmlInputStream = null;
                    try {
                        xmlInputStream = metadata.createInputStream();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                    try {
                        System.out.println(
                                "--------------------------------------------------------------------------------");
                        String mystring = convertStreamToString(xmlInputStream);
                        System.out.println(mystring);
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
                // Export the images
                String name = getUniqueFileName(key, image.getSuffix());
                System.out.println("Writing image:" + name);
                try {
                    //image.write2file(name);
                    File f = new File(name);
                    OutputStream os = new FileOutputStream(f);
                    image.write2OutputStream(os);
                } catch (IOException e) {
                    e.printStackTrace();
                }
                System.out.println(
                        "--------------------------------------------------------------------------------");
            }
        }
    }
}

From source file:pdfbox.GetImagesFromPDF.java

public static void main(String[] args) {
    try {//from  w  w  w.  j  a v  a2  s .  co m
        String sourceDir = "D:/PdfBox/04-Request-Headers.pdf";// Paste pdf files in PDFCopy folder to read
        String destinationDir = "D:/PdfBox/";
        File oldFile = new File(sourceDir);
        if (oldFile.exists()) {
            PDDocument document = PDDocument.load(sourceDir);

            List<PDPage> list = document.getDocumentCatalog().getAllPages();

            String fileName = oldFile.getName().replace(".pdf", "_cover");
            int totalImages = 1;
            for (PDPage page : list) {
                PDResources pdResources = page.getResources();

                Map pageImages = pdResources.getImages();
                if (pageImages != null) {

                    Iterator imageIter = pageImages.keySet().iterator();
                    while (imageIter.hasNext()) {
                        String key = (String) imageIter.next();
                        PDXObjectImage pdxObjectImage = (PDXObjectImage) pageImages.get(key);
                        pdxObjectImage.write2file(destinationDir + fileName + "_" + totalImages);
                        totalImages++;
                    }
                }
            }
        } else {
            System.err.println("File not exists");
        }
    } catch (Exception e) {
    }
}

From source file:pdfpicmangler.PDFPicMangler.java

License:Open Source License

private PDDocument process(PDDocument doc, Map<String, Float> resolutions) throws IOException {
    this.resolutions = resolutions;

    List<?> pages = doc.getDocumentCatalog().getAllPages();
    for (int i = 0; i < pages.size(); i++) {
        if (!(pages.get(i) instanceof PDPage))
            continue;
        PDPage page = (PDPage) pages.get(i);
        currentPage = i + 1;/*from www .j a  va2 s  . c om*/
        scanResources(page.getResources(), doc);
    }
    return doc;
}

From source file:Project.data.preparation.ImageExtraction.java

public void extractImages(String sourceDir, String destinationDir)
        throws IOException, CryptographyException, COSVisitorException {
    PDDocument document = null;/*from   w w w .j  a va  2  s.co  m*/
    double[] size;
    if (oldFile.exists()) {
        document = PDDocument.load(sourceDir);
        if (document.isEncrypted()) {
            document.decrypt("");
        }
        PrintImageLocation printer; // Get image location
        List<PDPage> list = document.getDocumentCatalog().getAllPages();

        String fileName_img = oldFile.getName().replace(".pdf", "_cover");
        int pageNum = 0;
        int totalImages = 1;
        System.out.println("\n" + filename);

        for (PDPage page : list) {

            original_imgName = new ArrayList<String>();
            location_xy = new ArrayList<double[]>();
            size_xy_ordered = new ArrayList<double[]>();
            size_xy_tmp = new ArrayList<double[]>();
            PDResources pdResources = page.getResources();
            Map pageImages = pdResources.getXObjects();
            pageNum++;
            if (pageImages != null && pageImages.size() > 0) {

                Iterator imageIter = pageImages.keySet().iterator();
                while (imageIter.hasNext()) {

                    String key = (String) imageIter.next();
                    PDXObjectImage pdxObjectImage = (PDXObjectImage) pageImages.get(key);
                    String imgName = fileName_img + "_" + totalImages;
                    System.out.println("Page Number : " + pageNum + "\t" + imgName);
                    pdxObjectImage.write2file(destinationDir + imgName);

                    original_imgName.add(imgName + "." + pdxObjectImage.getSuffix());
                    size = new double[] { pdxObjectImage.getWidth(), pdxObjectImage.getHeight() };
                    size_xy_ordered.add(size);
                    totalImages++;
                }
                //Start for detect figure name for image renaming
                printer = new PrintImageLocation(page);
                location_xy = printer.getLocation_xy();
                size_xy_tmp = printer.getSize_xy();
                RearrangeImageOrder(location_xy, size_xy_tmp, size_xy_ordered);
                //PrinttoString();
                DetectFigureName detectFig = new DetectFigureName(original_imgName, filename, pageNum, page,
                        location_ordered, size_xy_ordered);
            }
        }
    } else {
        System.err.println("File not exists");
    }

    if (document != null) {
        document.close();
    }

}

From source file:se.streamsource.streamflow.web.application.pdf.Underlay.java

License:Apache License

private void overlayWithDarkenBlendMode(PDDocument document, PDDocument overlay) throws IOException {
    PDXObjectForm xobject = importAsXObject(document,
            (PDPage) overlay.getDocumentCatalog().getAllPages().get(0));
    PDExtendedGraphicsState darken = new PDExtendedGraphicsState();
    darken.getCOSDictionary().setName("BM", "Darken");

    List<PDPage> pages = document.getDocumentCatalog().getAllPages();

    for (PDPage page : pages) {
        Map<String, PDExtendedGraphicsState> states = page.getResources().getGraphicsStates();
        if (states == null)
            states = new HashMap<>();
        String darkenKey = MapUtil.getNextUniqueKey(states, "Dkn");
        states.put(darkenKey, darken);//  w w  w . java  2 s .  c om
        page.getResources().setGraphicsStates(states);

        PDPageContentStream stream = new PDPageContentStream(document, page, true, false, true);
        stream.appendRawCommands(String.format("/%s gs ", darkenKey));
        stream.drawXObject(xobject, 0, 0, 1, 1);
        stream.close();
    }
}

From source file:uk.ac.ebi.tools.PDFExtractor.java

License:Open Source License

/**
 * Extracts images from a PDF file and returns them in a list.
 *
 * @param filePath//from   w  ww  .  j  a  va  2s.c  o m
 * @return
 * @throws java.io.IOException
 */
public HashMap<String, String> getImages(String filePath) throws IOException {

    HashMap<String, String> imagePaths = new HashMap<>();
    try {

        if (new File(filePath).exists()) {
            PDDocument document = PDDocument.load(filePath);
            List<PDPage> list = document.getDocumentCatalog().getAllPages();

            for (PDPage page : list) {
                PDResources pdResources = page.getResources();

                Map pageImages = pdResources.getImages();
                if (pageImages.size() > 0) {
                    Iterator imageIter = pageImages.keySet().iterator();
                    while (imageIter.hasNext()) {
                        String key = (String) imageIter.next();
                        PDXObjectImage pdxObjectImage = (PDXObjectImage) pageImages.get(key);
                        String uniqueName = PDFDoc.generateUniqueName();
                        StringBuilder builder = new StringBuilder();

                        // set the imageFormat
                        String imageFormat = pdxObjectImage.getSuffix();

                        builder = builder.append(System.getProperty("user.home")).append("/")
                                .append(uniqueName);
                        imagePaths.put(builder.toString(), imageFormat);
                        pdxObjectImage.write2file(builder.toString());
                        builder.delete(0, builder.length());
                    }
                }
            }

            document.close();
        } else {
            System.err.println("File not exists");
        }

    } catch (IOException ex) {
        PDFlogger.log(Level.SEVERE, "Error while extracting: Please check the input.", ex.getMessage());
    }

    return imagePaths;
}