Example usage for org.apache.pdfbox.pdmodel PDPage getResources

List of usage examples for org.apache.pdfbox.pdmodel PDPage getResources

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDPage getResources.

Prototype

@Override
public PDResources getResources() 

Source Link

Document

A dictionary containing any resources required by the page.

Usage

From source file:net.padaf.preflight.helpers.PagesValidationHelper.java

License:Apache License

/**
 * This method check the Shading entry of the resource dictionary if exists.
 * /*w ww.j ava 2s .co  m*/
 * @param page
 * @param handler
 * @param result
 * @return
 * @throws ValidationException
 */
protected boolean validateShadingPattern(PDPage page, DocumentHandler handler, List<ValidationError> result)
        throws ValidationException {
    PDResources resources = page.getResources();
    COSDictionary shadings = (COSDictionary) resources.getCOSDictionary()
            .getDictionaryObject(PATTERN_KEY_SHADING);
    boolean res = true;
    if (shadings != null) {
        for (Object key : shadings.keySet()) {
            COSDictionary aShading = (COSDictionary) shadings.getDictionaryObject((COSName) key);
            ShadingPattern sp = new ShadingPattern(handler, aShading);
            List<ValidationError> lErrors = sp.validate();
            if (lErrors != null && !lErrors.isEmpty()) {
                result.addAll(lErrors);
                res = false;
            }
        }
    }
    return res;
}

From source file:no.digipost.print.validate.PdfFontValidator.java

License:Apache License

public Collection<PDFont> getPageFonts(PDPage page) throws IOException {
    PDResources resources = page.getResources();
    if (resources != null) {
        Map<String, PDFont> fontMap = resources.getFonts();
        return fontMap.values();
    }//from   w  w w.j  a va2s.com
    return emptySet();
}

From source file:org.ala.harvester.ExtractPubfSciNamesAndImages.java

License:Apache License

private static void extractSciNameAndImages(PDDocument document) throws IOException {
    PDFTextStripperByArea stripper = new PDFTextStripperByArea();
    stripper.setSortByPosition(true);//www. j  a  v  a 2 s.co  m
    Rectangle rect = new Rectangle(10, 60, 275, 20);
    stripper.addRegion("class1", rect);
    List allPages = document.getDocumentCatalog().getAllPages();

    Writer writer = getSiteMapWriter("anic");

    writeColumnHeaders(writer);

    for (int pageNum = 37; pageNum <= 249; pageNum++) {
        //        for (int pageNum = 156; pageNum <= 156; pageNum++) {
        PDPage page = (PDPage) allPages.get(pageNum);
        PDResources resources = page.getResources();
        Map images = resources.getImages();
        stripper.extractRegions(page);

        String sciName = stripper.getTextForRegion("class1").trim();
        System.out.println("Scientific Name: " + sciName);

        if (images != null) {
            Iterator imageIter = images.keySet().iterator();
            while (imageIter.hasNext()) {
                String key = (String) imageIter.next();
                PDXObjectImage image = (PDXObjectImage) images.get(key);
                String name = null;

                if ("jpg".equals(image.getSuffix())) {
                    name = getUniqueFileName(sciName + "_" + key, image.getSuffix());
                    System.out.println("Writing image:" + name);
                    image.write2file("/data/tmp/" + name);

                    writer.write(sciName);
                    writer.write(",");
                    writer.write(name + "." + image.getSuffix());
                    writer.write("\n");
                }
            }
        }
    }
}

From source file:org.apache.fop.render.pdf.pdfbox.PDFBoxAdapter.java

License:Apache License

/**
 * Creates a stream (from FOP's PDF library) from a PDF page parsed with PDFBox.
 * @param sourceDoc the source PDF the given page to be copied belongs to
 * @param page the page to transform into a stream
 * @param key value to use as key for the stream
 * @param atdoc adjustment for stream//from   w  w  w.  j  a  v a  2s .c  om
 * @param fontinfo fonts
 * @param pos rectangle
 * @return the stream
 * @throws IOException if an I/O error occurs
 */
public String createStreamFromPDFBoxPage(PDDocument sourceDoc, PDPage page, String key, AffineTransform atdoc,
        FontInfo fontinfo, Rectangle pos) throws IOException {
    handleAnnotations(sourceDoc, page, atdoc);
    if (pageNumbers.containsKey(targetPage.getPageIndex())) {
        pageNumbers.get(targetPage.getPageIndex()).set(0, targetPage.makeReference());
    }
    PDResources sourcePageResources = page.getResources();
    PDStream pdStream = getContents(page);

    COSDictionary fonts = (COSDictionary) sourcePageResources.getCOSObject().getDictionaryObject(COSName.FONT);
    COSDictionary fontsBackup = null;
    UniqueName uniqueName = new UniqueName(key, sourcePageResources);
    String newStream = null;
    if (fonts != null && pdfDoc.isMergeFontsEnabled()) {
        fontsBackup = new COSDictionary(fonts);
        MergeFontsPDFWriter m = new MergeFontsPDFWriter(fonts, fontinfo, uniqueName, parentFonts, currentMCID);
        newStream = m.writeText(pdStream);
        //            if (newStream != null) {
        //                for (Object f : fonts.keySet().toArray()) {
        //                    COSDictionary fontdata = (COSDictionary)fonts.getDictionaryObject((COSName)f);
        //                    if (getUniqueFontName(fontdata) != null) {
        //                        fonts.removeItem((COSName)f);
        //                    }
        //                }
        //            }
    }
    if (newStream == null) {
        PDFWriter writer = new PDFWriter(uniqueName, currentMCID);
        newStream = writer.writeText(pdStream);
        currentMCID = writer.getCurrentMCID();

    }
    pdStream = new PDStream(sourceDoc, new ByteArrayInputStream(newStream.getBytes("ISO-8859-1")));
    mergeXObj(sourcePageResources.getCOSObject(), fontinfo, uniqueName);
    PDFDictionary pageResources = (PDFDictionary) cloneForNewDocument(sourcePageResources.getCOSObject());

    PDFDictionary fontDict = (PDFDictionary) pageResources.get("Font");
    if (fontDict != null && pdfDoc.isMergeFontsEnabled()) {
        for (Map.Entry<String, Typeface> fontEntry : fontinfo.getUsedFonts().entrySet()) {
            Typeface font = fontEntry.getValue();
            if (font instanceof FOPPDFFont) {
                FOPPDFFont pdfFont = (FOPPDFFont) font;
                if (pdfFont.getRef() == null) {
                    pdfFont.setRef(new PDFDictionary());
                    pdfDoc.assignObjectNumber(pdfFont.getRef());
                }
                fontDict.put(fontEntry.getKey(), pdfFont.getRef());
            }
        }
    }
    updateXObj(sourcePageResources.getCOSObject(), pageResources);
    if (fontsBackup != null) {
        sourcePageResources.getCOSObject().setItem(COSName.FONT, fontsBackup);
    }

    COSStream originalPageContents = pdStream.getCOSObject();

    bindOptionalContent(sourceDoc);

    PDFStream pageStream;
    Set filter;
    //        if (originalPageContents instanceof COSStreamArray) {
    //            COSStreamArray array = (COSStreamArray)originalPageContents;
    //            pageStream = new PDFStream();
    //            InputStream in = array.getUnfilteredStream();
    //            OutputStream out = pageStream.getBufferOutputStream();
    //            IOUtils.copyLarge(in, out);
    //            filter = FILTER_FILTER;
    //        } else {
    pageStream = (PDFStream) cloneForNewDocument(originalPageContents);
    filter = Collections.EMPTY_SET;
    //        }
    if (pageStream == null) {
        pageStream = new PDFStream();
    }
    if (originalPageContents != null) {
        transferDict(originalPageContents, pageStream, filter);
    }

    transferPageDict(fonts, uniqueName, sourcePageResources);

    PDRectangle mediaBox = page.getMediaBox();
    PDRectangle cropBox = page.getCropBox();
    PDRectangle viewBox = cropBox != null ? cropBox : mediaBox;

    //Handle the /Rotation entry on the page dict
    int rotation = PDFUtil.getNormalizedRotation(page);

    //Transform to FOP's user space
    float w = (float) pos.getWidth() / 1000f;
    float h = (float) pos.getHeight() / 1000f;
    if (rotation == 90 || rotation == 270) {
        float tmp = w;
        w = h;
        h = tmp;
    }
    atdoc.setTransform(AffineTransform.getScaleInstance(w / viewBox.getWidth(), h / viewBox.getHeight()));
    atdoc.translate(0, viewBox.getHeight());
    atdoc.rotate(-Math.PI);
    atdoc.scale(-1, 1);
    atdoc.translate(-viewBox.getLowerLeftX(), -viewBox.getLowerLeftY());

    rotate(rotation, viewBox, atdoc);

    StringBuilder boxStr = new StringBuilder();
    boxStr.append(PDFNumber.doubleOut(mediaBox.getLowerLeftX())).append(' ')
            .append(PDFNumber.doubleOut(mediaBox.getLowerLeftY())).append(' ')
            .append(PDFNumber.doubleOut(mediaBox.getWidth())).append(' ')
            .append(PDFNumber.doubleOut(mediaBox.getHeight())).append(" re W n\n");
    return boxStr.toString() + IOUtils.toString(pdStream.createInputStream(null), "ISO-8859-1");
}

From source file:org.apache.fop.render.pdf.PDFBoxAdapterTestCase.java

License:Apache License

private COSDictionary getFont(PDDocument doc, String internalname) throws IOException {
    PDPage page = (PDPage) doc.getDocumentCatalog().getPages().get(0);
    PDResources sourcePageResources = page.getResources();
    COSDictionary fonts = (COSDictionary) sourcePageResources.getCOSObject().getDictionaryObject(COSName.FONT);
    return (COSDictionary) fonts.getDictionaryObject(internalname);
}

From source file:org.apache.tika.parser.pdf.EnhancedPDF2XHTML.java

License:Apache License

@Override
protected void endPage(PDPage page) throws IOException {
    try {// w  ww  .java  2s  .  c o m
        writeParagraphEnd();

        extractImages(page.getResources());

        EmbeddedDocumentExtractor extractor = getEmbeddedDocumentExtractor();
        for (PDAnnotation annotation : page.getAnnotations()) {

            if (annotation instanceof PDAnnotationFileAttachment) {
                PDAnnotationFileAttachment fann = (PDAnnotationFileAttachment) annotation;
                PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) fann.getFile();
                try {
                    extractMultiOSPDEmbeddedFiles("", fileSpec, extractor);
                } catch (SAXException e) {
                    throw new IOExceptionWithCause("file embedded in annotation sax exception", e);
                } catch (TikaException e) {
                    throw new IOExceptionWithCause("file embedded in annotation tika exception", e);
                }
            }
            // TODO: remove once PDFBOX-1143 is fixed:
            if (config.getExtractAnnotationText()) {
                if (annotation instanceof PDAnnotationLink) {
                    PDAnnotationLink annotationlink = (PDAnnotationLink) annotation;
                    if (annotationlink.getAction() != null) {
                        PDAction action = annotationlink.getAction();
                        if (action instanceof PDActionURI) {
                            PDActionURI uri = (PDActionURI) action;
                            String link = uri.getURI();
                            if (link != null) {
                                handler.startElement("div", "class", "annotation");
                                handler.startElement("a", "href", link);
                                handler.endElement("a");
                                handler.endElement("div");
                            }
                        }
                    }
                }

                if (annotation instanceof PDAnnotationMarkup) {
                    PDAnnotationMarkup annotationMarkup = (PDAnnotationMarkup) annotation;
                    String title = annotationMarkup.getTitlePopup();
                    String subject = annotationMarkup.getSubject();
                    String contents = annotationMarkup.getContents();
                    // TODO: maybe also annotationMarkup.getRichContents()?
                    if (title != null || subject != null || contents != null) {
                        handler.startElement("div", "class", "annotation");

                        if (title != null) {
                            handler.startElement("div", "class", "annotationTitle");
                            handler.characters(title);
                            handler.endElement("div");
                        }

                        if (subject != null) {
                            handler.startElement("div", "class", "annotationSubject");
                            handler.characters(subject);
                            handler.endElement("div");
                        }

                        if (contents != null) {
                            handler.startElement("div", "class", "annotationContents");
                            handler.characters(contents);
                            handler.endElement("div");
                        }

                        handler.endElement("div");
                    }
                }
            }
        }

        handler.endElement("div");
    } catch (SAXException e) {
        throw new IOExceptionWithCause("Unable to end a page", e);
    }
}

From source file:org.apache.tika.parser.pdf.PDF2XHTML.java

License:Apache License

@Override
protected void endPage(PDPage page) throws IOException {
    try {/*www  .  j  ava  2 s . c  om*/
        writeParagraphEnd();

        extractImages(page.getResources(), new HashSet<COSBase>());

        EmbeddedDocumentExtractor extractor = getEmbeddedDocumentExtractor();
        for (PDAnnotation annotation : page.getAnnotations()) {

            if (annotation instanceof PDAnnotationFileAttachment) {
                PDAnnotationFileAttachment fann = (PDAnnotationFileAttachment) annotation;
                PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) fann.getFile();
                try {
                    extractMultiOSPDEmbeddedFiles("", fileSpec, extractor);
                } catch (SAXException e) {
                    throw new IOExceptionWithCause("file embedded in annotation sax exception", e);
                } catch (TikaException e) {
                    throw new IOExceptionWithCause("file embedded in annotation tika exception", e);
                }
            }
            // TODO: remove once PDFBOX-1143 is fixed:
            if (config.getExtractAnnotationText()) {
                if (annotation instanceof PDAnnotationLink) {
                    PDAnnotationLink annotationlink = (PDAnnotationLink) annotation;
                    if (annotationlink.getAction() != null) {
                        PDAction action = annotationlink.getAction();
                        if (action instanceof PDActionURI) {
                            PDActionURI uri = (PDActionURI) action;
                            String link = uri.getURI();
                            if (link != null) {
                                handler.startElement("div", "class", "annotation");
                                handler.startElement("a", "href", link);
                                handler.endElement("a");
                                handler.endElement("div");
                            }
                        }
                    }
                }

                if (annotation instanceof PDAnnotationMarkup) {
                    PDAnnotationMarkup annotationMarkup = (PDAnnotationMarkup) annotation;
                    String title = annotationMarkup.getTitlePopup();
                    String subject = annotationMarkup.getSubject();
                    String contents = annotationMarkup.getContents();
                    // TODO: maybe also annotationMarkup.getRichContents()?
                    if (title != null || subject != null || contents != null) {
                        handler.startElement("div", "class", "annotation");

                        if (title != null) {
                            handler.startElement("div", "class", "annotationTitle");
                            handler.characters(title);
                            handler.endElement("div");
                        }

                        if (subject != null) {
                            handler.startElement("div", "class", "annotationSubject");
                            handler.characters(subject);
                            handler.endElement("div");
                        }

                        if (contents != null) {
                            handler.startElement("div", "class", "annotationContents");
                            handler.characters(contents);
                            handler.endElement("div");
                        }

                        handler.endElement("div");
                    }
                }
            }
        }

        handler.endElement("div");
    } catch (SAXException e) {
        throw new IOExceptionWithCause("Unable to end a page", e);
    }
    page.clear();
}

From source file:org.apache.tika.parser.pdf.PDF2XHTMLPureJava.java

License:Apache License

@Override
protected void endPage(PDPage page) throws IOException {
    try {//from w w w  . java  2s  .c  om
        writeParagraphEnd();
        try {
            extractImages(page.getResources(), new HashSet<COSBase>());
        } catch (IOException e) {
            handleCatchableIOE(e);
        }
        super.endPage(page);
    } catch (SAXException e) {
        throw new IOException("Unable to end a page", e);
    } catch (IOException e) {
        exceptions.add(e);
    }
}

From source file:org.argrr.extractor.gdrive.downloader.ChartsDownloader.java

License:Open Source License

public static void extractPictures(String path, String fileName) throws IOException {
    PDDocument document = null;/*from   ww w. ja v  a 2  s.  co m*/
    try {
        document = PDDocument.load(path + "/" + fileName + ".pdf");
    } catch (IOException ex) {
        System.out.println("" + ex);
    }
    List pages = document.getDocumentCatalog().getAllPages();
    Iterator iter = pages.iterator();
    int i = 1;
    String name = null;

    while (iter.hasNext()) {
        PDPage page = (PDPage) iter.next();
        PDResources resources = page.getResources();
        Map pageImages = resources.getImages();
        if (pageImages != null) {
            Iterator imageIter = pageImages.keySet().iterator();
            while (imageIter.hasNext()) {
                String key = (String) imageIter.next();
                PDXObjectImage image = (PDXObjectImage) pageImages.get(key);
                image.write2file(ChartsDownloader.rootOutputPathCharts + "/" + fileName + "-" + i);
                i++;
            }
        }
    }
}

From source file:org.example.extractimagesfrompdfpages.ExtractImagesFromPDFPagesMain.java

public static void main(String[] args) {
    try {/*from w ww .  ja  v  a  2  s  . c  om*/
        File thePDFFile = new File(args[0]);
        PDDocument document = PDDocument.load(thePDFFile);
        PDPageTree list = document.getPages();
        int i = 1;
        for (PDPage page : list) {
            Boolean alreadyCreatedFolderForThisPage = false;
            File thePDFFileDirectory = thePDFFile.getParentFile();
            File thePDFPageFolder = new File(thePDFFileDirectory.getAbsolutePath() + "/temp_images" + "/" + i);
            PDResources pdResources = page.getResources();
            int j = 1;
            for (COSName c : pdResources.getXObjectNames()) {
                PDXObject o = pdResources.getXObject(c);
                if (o instanceof org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject) {
                    if (alreadyCreatedFolderForThisPage == false) {
                        thePDFPageFolder.mkdirs();
                        alreadyCreatedFolderForThisPage = true;
                    }

                    File file = new File(thePDFPageFolder.getAbsolutePath() + "/" + j + ".png");
                    ImageIO.write(((org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject) o).getImage(),
                            "png", file);
                    System.out.println(thePDFPageFolder.getAbsolutePath() + "/" + j + ".png");

                    j++;
                }

            }
            i++;
        }
    } catch (IOException ex) {
        Logger.getLogger(ExtractImagesFromPDFPagesMain.class.getName()).log(Level.SEVERE, null, ex);
        throw new RuntimeException(ex);
    }

}