Example usage for org.apache.pdfbox.pdmodel PDPage getAnnotations

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDPage getAnnotations.

Prototype

public List<PDAnnotation> getAnnotations() throws IOException

Source Link

Document

This will return a list of the annotations for this page.

Usage

From source file:org.pdfmetamodifier.IOHelper.java

License:Apache License

/**
 * Save all Attached (embedded) files to some directory.
 * /*from   ww w  .ja v  a2 s  .  c om*/
 * @param pdfFile
 *            Source PDF file.
 * @param outputDir
 *            Target directory.
 * @throws IOException
 */
/*
 * See:
 *      https://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractEmbeddedFiles.java?view=markup
 */
public static void saveAttachments(final File pdfFile, final File outputDir) throws IOException {
    PDDocument document = null;
    try {
        // Read PDF file.
        document = PDDocument.load(pdfFile);
        if (document.isEncrypted()) {
            throw new IOException("Document is encrypted.");
        }

        // Extract Embedded (attached) files.
        final PDDocumentNameDictionary documentNameDictionary = new PDDocumentNameDictionary(
                document.getDocumentCatalog());
        final PDEmbeddedFilesNameTreeNode embeddedFilesNameTree = documentNameDictionary.getEmbeddedFiles();
        if (embeddedFilesNameTree != null) {
            extractFiles(outputDir, embeddedFilesNameTree.getNames());

            final List<PDNameTreeNode<PDComplexFileSpecification>> kids = embeddedFilesNameTree.getKids();
            if (kids != null) {
                for (PDNameTreeNode<PDComplexFileSpecification> nameTreeNode : kids) {
                    extractFiles(outputDir, nameTreeNode.getNames());
                }
            }
        }

        // Extract Embedded (attached) from annotations.
        for (PDPage page : document.getPages()) {
            for (PDAnnotation annotation : page.getAnnotations()) {
                if (annotation instanceof PDAnnotationFileAttachment) {
                    final PDAnnotationFileAttachment fileAttach = (PDAnnotationFileAttachment) annotation;

                    final PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) fileAttach
                            .getFile();
                    extractFile(outputDir, fileSpec);
                }
            }
        }
    } finally {
        if (document != null) {
            document.close();
        }
    }
}

From source file:org.xwiki.test.misc.PDFTest.java

License:Open Source License

/**
 * Code adapted from http://www.docjar.com/html/api/org/apache/pdfbox/examples/pdmodel/PrintURLs.java.html
 *///  w ww  .j  a  v a 2  s.  c  o m
private Map<String, PDAction> extractLinks(PDPage page) throws Exception {
    Map<String, PDAction> links = new HashMap<String, PDAction>();
    PDFTextStripperByArea stripper = new PDFTextStripperByArea();
    List<PDAnnotation> annotations = page.getAnnotations();
    // First setup the text extraction regions.
    for (int j = 0; j < annotations.size(); j++) {
        PDAnnotation annotation = annotations.get(j);
        if (annotation instanceof PDAnnotationLink) {
            PDAnnotationLink link = (PDAnnotationLink) annotation;
            PDRectangle rect = link.getRectangle();
            // Need to reposition link rectangle to match text space.
            float x = rect.getLowerLeftX();
            float y = rect.getUpperRightY();
            float width = rect.getWidth();
            float height = rect.getHeight();
            int rotation = page.getRotation();
            if (rotation == 0) {
                PDRectangle pageSize = page.getMediaBox();
                y = pageSize.getHeight() - y;
            } else if (rotation == 90) {
                // Do nothing.
            }

            Rectangle2D.Float awtRect = new Rectangle2D.Float(x, y, width, height);
            stripper.addRegion(String.valueOf(j), awtRect);
        }
    }

    stripper.extractRegions(page);

    for (int j = 0; j < annotations.size(); j++) {
        PDAnnotation annotation = annotations.get(j);
        if (annotation instanceof PDAnnotationLink) {
            PDAnnotationLink link = (PDAnnotationLink) annotation;
            String label = stripper.getTextForRegion(String.valueOf(j)).trim();
            links.put(label, link.getAction());
        }
    }

    return links;
}

From source file:uk.ac.leeds.ccg.andyt.rdl.web.RDL_ParsePDF.java

/**
 * https://svn.apache.org/viewvc/pdfbox/trunk/examples/ Based on
 * https://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/PrintURLs.java?view=markup&pathrev=1703066
 *
 * @param f/*from  w  w  w.j  ava  2s . c  o  m*/
 * @param filter
 * @param fis
 * @return
 * @throws IOException
 * @throws TikaException
 * @throws SAXException
 */
public static ArrayList<String[]> parseForLinks(File f, String filter, FileInputStream fis)
        throws IOException, TikaException, SAXException {
    ArrayList<String[]> result;
    result = new ArrayList<String[]>();
    PDDocument doc = PDDocument.load(f);
    int pageNum = 0;
    for (PDPage page : doc.getPages()) {
        pageNum++;

        //            if (pageNum == 11) { //Degug test hack
        System.out.println("Parsing page " + pageNum);
        PDFTextStripperByArea stripper = new PDFTextStripperByArea();
        List<PDAnnotation> annotations = page.getAnnotations();
        //first setup text extraction regions
        for (int j = 0; j < annotations.size(); j++) {
            PDAnnotation annot = annotations.get(j);
            if (annot instanceof PDAnnotationLink) {
                PDAnnotationLink link = (PDAnnotationLink) annot;
                PDRectangle rect = link.getRectangle();
                //need to reposition link rectangle to match text space
                float x = rect.getLowerLeftX();
                float y = rect.getUpperRightY();
                float width = rect.getWidth();
                float height = rect.getHeight();
                int rotation = page.getRotation();
                if (rotation == 0) {
                    PDRectangle pageSize = page.getMediaBox();
                    y = pageSize.getHeight() - y;
                } else if (rotation == 90) {
                    //do nothing
                }

                //Rectangle2D.Float awtRect = new Rectangle2D.Float(x, y, width, height);
                // Rounding here could be a problem!
                Rectangle2D.Double awtRect = new Rectangle2D.Double(x, y, width, height);
                stripper.addRegion("" + j, awtRect);
            }
        }

        stripper.extractRegions(page);

        for (int j = 0; j < annotations.size(); j++) {
            PDAnnotation annot = annotations.get(j);
            if (annot instanceof PDAnnotationLink) {
                PDAnnotationLink link = (PDAnnotationLink) annot;
                PDAction action = link.getAction();
                if (action == null) {
                    System.out.println(link.getContents());
                    System.out.println(annot.getClass().getName());
                    System.out.println(annot.getAnnotationName());
                    //System.out.println(annot.getNormalAppearanceStream().toString());
                    System.out.println(annot.getContents());
                    System.out.println(annot.getSubtype());
                } else {
                    String urlText = stripper.getTextForRegion("" + j);
                    if (action instanceof PDActionURI) {
                        PDActionURI uri = (PDActionURI) action;
                        String url;
                        url = uri.getURI();
                        if (url.contains(filter)) {
                            String[] partResult;
                            partResult = new String[3];
                            partResult[0] = "Page " + pageNum;
                            partResult[1] = "urlText " + urlText;
                            partResult[2] = "URL " + uri.getURI();
                            System.out.println(partResult[0]);
                            System.out.println(partResult[1]);
                            System.out.println(partResult[2]);
                            System.out.println("URL " + uri.getURI());
                            result.add(partResult);
                        } else {
                            System.out.println("URL " + uri.getURI());
                        }
                    } else {
                        System.out.println(action.getType());
                    }
                }
            } else {
                System.out.println(annot.getClass().getName());
                System.out.println(annot.getAnnotationName());
                System.out.println(annot.getContents());
                System.out.println(annot.getSubtype());
            }
        }

        //}
    }
    //       PDDocument doc = PDDocument.load(f);
    //        int pageNum = 0;
    //        for (PDPage page : doc.getPages()) {
    //            pageNum++;
    //            List<PDAnnotation> annotations = page.getAnnotations();
    //
    //            for (PDAnnotation annotation : annotations) {
    //                PDAnnotation annot = annotation;
    //                if (annot instanceof PDAnnotationLink) {
    //                    PDAnnotationLink link = (PDAnnotationLink) annot;
    //                    PDAction action = link.getAction();
    //                    if (action instanceof PDActionURI) {
    //                        PDActionURI uri = (PDActionURI) action;
    //                        String oldURI = uri.getURI();
    //                        String name = annot.getAnnotationName();
    //                        String contents = annot.getContents();
    //                        PDAppearanceStream a = annot.getNormalAppearanceStream();
    //                        //String newURI = "http://pdfbox.apache.org";
    //                        System.out.println(oldURI + " " + name + " " + contents);
    //                        //uri.setURI(newURI);
    //                    }
    //                }
    //            }
    //        }

    //        result = parseWithTika(fis);
    //XMPSchema schema;
    //schema = new XMPSchema();
    //List<String> XMPBagOrSeqList;
    //XMPBagOrSeqList = getXMPBagOrSeqList(XMPSchema schema, String name) {

    //        PDDocument tPDDocument;
    //        tPDDocument = PDDocument.load(f);
    //        COSDocument tCOSDocument;
    //        tCOSDocument = tPDDocument.getDocument();

    //        String header;
    //        header = tCOSDocument.getHeaderString();
    //        System.out.println(header);

    //        PDDocumentCatalog tPDDocumentCatalog;
    //        tPDDocumentCatalog = tPDDocument.getDocumentCatalog();
    //        PDDocumentNameDictionary tPDDocumentNameDictionary;
    //        tPDDocumentNameDictionary = tPDDocumentCatalog.getNames();

    //        COSDictionary tCOSDictionary;
    //        tCOSDictionary = tPDDocumentNameDictionary.getCOSDictionary();
    //tCOSDictionary.
    //        PDPageNode tPDPageNode;
    //        tPDPageNode = tPDDocumentCatalog.getPages();

    //        List<COSObject> tCOSObjects;
    //        tCOSObjects = tCOSDocument.getObjects();
    //        int n;
    //        n = tCOSObjects.size();
    //        System.out.println(n);
    //        COSObject aCOSObject;
    //        String s;
    //        for (int i = 0; i < n; i++) {
    //            aCOSObject = tCOSObjects.get(i);
    //            s = aCOSObject.toString();
    //            System.out.println(s);
    //        }

    //        XMPMetadata tXMPMetadata;
    //        tXMPMetadata = getXMPMetadata(tPDDocument);

    //        Document XMPDocument;
    //        XMPDocument = tXMPMetadata.getXMPDocument();
    //        Node n;
    //        n = XMPDocument.getFirstChild();
    //        parseNode(n);
    return result;
}

From source file:uk.bl.wa.tika.parser.pdf.pdfbox.PDF2XHTML.java

License:Apache License

@Override
protected void endPage(PDPage page) throws IOException {

    try {/*w  ww . j a v  a 2 s  .c  o  m*/
        writeParagraphEnd();
        // TODO: remove once PDFBOX-1143 is fixed:
        if (extractAnnotationText) {
            for (Object o : page.getAnnotations()) {
                if ((o instanceof PDAnnotation)
                        && PDAnnotationMarkup.SUB_TYPE_FREETEXT.equals(((PDAnnotation) o).getSubtype())) {
                    // It's a text annotation:
                    PDAnnotationMarkup annot = (PDAnnotationMarkup) o;
                    String title = annot.getTitlePopup();
                    String subject = annot.getTitlePopup();
                    String contents = annot.getContents();
                    // TODO: maybe also annot.getRichContents()?
                    if (title != null || subject != null || contents != null) {
                        handler.startElement("div", "class", "annotation");

                        if (title != null) {
                            handler.startElement("div", "class", "annotationTitle");
                            handler.characters(title);
                            handler.endElement("div");
                        }

                        if (subject != null) {
                            handler.startElement("div", "class", "annotationSubject");
                            handler.characters(subject);
                            handler.endElement("div");
                        }

                        if (contents != null) {
                            handler.startElement("div", "class", "annotationContents");
                            handler.characters(contents);
                            handler.endElement("div");
                        }

                        handler.endElement("div");
                    }
                }
            }
        }
        handler.endElement("div");
    } catch (SAXException e) {
        throw new IOExceptionWithCause("Unable to end a page", e);
    }
}

From source file:vortext.TextHighlight.java

License:Apache License

@SuppressWarnings("unchecked")
public List<PDAnnotationTextMarkup> highlight(final Pattern pattern, final String subType) throws IOException {
    if (textAggregate == null || document == null) {
        throw new IllegalArgumentException("TextAggregate was not initilized");
    }/*from w  w w.  j  a  va 2  s.  c  o m*/

    final List<PDPage> pages = document.getDocumentCatalog().getAllPages();

    final ArrayList<PDAnnotationTextMarkup> newAnnotations = new ArrayList<PDAnnotationTextMarkup>();

    for (int pageIndex = getStartPage() - 1; pageIndex < getEndPage()
            && pageIndex < pages.size(); pageIndex++) {
        final PDPage page = pages.get(pageIndex);
        final List<PDAnnotation> annotations = page.getAnnotations();

        final List<Match> matches = textAggregate.match(pageIndex + 1, pattern);

        for (final Match match : matches) {
            final List<PDRectangle> textBoundingBoxes = getTextBoundingBoxes(match.positions);

            if (textBoundingBoxes.size() > 0) {
                final PDAnnotationTextMarkup annotation = new PDAnnotationTextMarkup(subType);

                annotation.setRectangle(textBoundingBoxes.get(0));

                final float[] quads = this.getQuads(textBoundingBoxes);

                annotation.setQuadPoints(quads);
                annotation.setContents(match.str);

                annotations.add(annotation);
                newAnnotations.add(annotation);
            }
        }
    }
    return newAnnotations;
}