Example usage for org.apache.pdfbox.pdmodel PDPage getAnnotations

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDPage getAnnotations.

Prototype

public List<PDAnnotation> getAnnotations() throws IOException

Source Link

Document

This will return a list of the annotations for this page.

Usage

From source file:net.yacy.document.parser.pdfParser.java

License:Open Source License

/**
 * extract clickable links from pdf//from w w w.  ja  va  2  s  .co  m
 * @param pdf the document to parse
 * @return all detected links
 */
private Collection<AnchorURL>[] extractPdfLinks(final PDDocument pdf) {
    @SuppressWarnings("unchecked")
    List<PDPage> allPages = pdf.getDocumentCatalog().getAllPages();
    @SuppressWarnings("unchecked")
    Collection<AnchorURL>[] linkCollections = (Collection<AnchorURL>[]) new Collection<?>[allPages.size()];
    int pagecount = 0;
    for (PDPage page : allPages) {
        final Collection<AnchorURL> pdflinks = new ArrayList<AnchorURL>();
        try {
            List<PDAnnotation> annotations = page.getAnnotations();
            if (annotations != null) {
                for (PDAnnotation pdfannotation : annotations) {
                    if (pdfannotation instanceof PDAnnotationLink) {
                        PDAction link = ((PDAnnotationLink) pdfannotation).getAction();
                        if (link != null && link instanceof PDActionURI) {
                            PDActionURI pdflinkuri = (PDActionURI) link;
                            String uristr = pdflinkuri.getURI();
                            AnchorURL url = new AnchorURL(uristr);
                            pdflinks.add(url);
                        }
                    }
                }
            }
        } catch (IOException ex) {
        }
        linkCollections[pagecount++] = pdflinks;
    }
    return linkCollections;
}

From source file:org.apache.fop.render.pdf.pdfbox.PDFBoxAdapter.java

License:Apache License

private void handleAnnotations(PDDocument sourceDoc, PDPage page, AffineTransform at) throws IOException {
    PDDocumentCatalog srcCatalog = sourceDoc.getDocumentCatalog();
    PDAcroForm srcAcroForm = srcCatalog.getAcroForm();
    List pageAnnotations = page.getAnnotations();
    if (srcAcroForm == null && pageAnnotations.isEmpty()) {
        return;/*from   w w w .  ja  v a2  s .  co  m*/
    }

    moveAnnotations(page, pageAnnotations, at);

    //Pseudo-cache the target page in place of the original source page.
    //This essentially replaces the original page reference with the target page.
    COSObject cosPage = null;
    COSDictionary parentDic = (COSDictionary) page.getCOSObject().getDictionaryObject(COSName.PARENT,
            COSName.P);
    COSArray kids = (COSArray) parentDic.getDictionaryObject(COSName.KIDS);
    for (int i = 0; i < kids.size(); i++) {
        //Hopefully safe to cast, as kids need to be indirect objects
        COSObject kid = (COSObject) kids.get(i);
        if (!pageNumbers.containsKey(i)) {
            PDFArray a = new PDFArray();
            a.add(null);
            pdfDoc.assignObjectNumber(a);
            pdfDoc.addTrailerObject(a);
            pageNumbers.put(i, a);
        }
        cacheClonedObject(kid, pageNumbers.get(i));
        if (kid.getObject() == page.getCOSObject()) {
            cosPage = kid;
        }
    }
    if (cosPage == null) {
        throw new IOException("Illegal PDF. Page not part of parent page node.");
    }

    Set<COSObject> fields = copyAnnotations(page);

    boolean formAlreadyCopied = getCachedClone(srcAcroForm) != null;
    PDFRoot catalog = this.pdfDoc.getRoot();
    PDFDictionary destAcroForm = (PDFDictionary) catalog.get(COSName.ACRO_FORM.getName());
    if (formAlreadyCopied) {
        //skip, already copied
    } else if (destAcroForm == null) {
        if (srcAcroForm != null) {
            //With this, only the first PDF's AcroForm is copied over. If later AcroForms have
            //different properties besides the actual fields, these get lost. Only fields
            //get merged.
            Collection exclude = Collections.singletonList(COSName.FIELDS);
            destAcroForm = (PDFDictionary) cloneForNewDocument(srcAcroForm, srcAcroForm, exclude);
        } else {
            //Work-around for incorrectly split PDFs which lack an AcroForm but have widgets
            //on pages. This doesn't handle the case where field dicts have "C" entries
            //(for the "CO" entry), so this may produce problems, but we have almost no chance
            //to guess the calculation order.
            destAcroForm = new PDFDictionary(pdfDoc.getRoot());
        }
        pdfDoc.registerObject(destAcroForm);
        catalog.put(COSName.ACRO_FORM.getName(), destAcroForm);
    }
    PDFArray clonedFields = (PDFArray) destAcroForm.get(COSName.FIELDS.getName());
    if (clonedFields == null) {
        clonedFields = new PDFArray();
        destAcroForm.put(COSName.FIELDS.getName(), clonedFields);
    }
    for (COSObject field : fields) {
        PDFDictionary clone = (PDFDictionary) cloneForNewDocument(field, field, Arrays.asList(COSName.KIDS));
        clonedFields.add(clone);
    }
}

From source file:org.apache.tika.parser.pdf.AbstractPDF2XHTML.java

License:Apache License

@Override
protected void endPage(PDPage page) throws IOException {

    try {//  ww w.ja va 2  s.  c  om
        for (PDAnnotation annotation : page.getAnnotations()) {

            if (annotation instanceof PDAnnotationFileAttachment) {
                PDAnnotationFileAttachment fann = (PDAnnotationFileAttachment) annotation;
                PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) fann.getFile();
                try {
                    AttributesImpl attributes = new AttributesImpl();
                    attributes.addAttribute("", "source", "source", "CDATA", "annotation");
                    extractMultiOSPDEmbeddedFiles(fann.getAttachmentName(), fileSpec, attributes);
                } catch (SAXException e) {
                    throw new IOExceptionWithCause("file embedded in annotation sax exception", e);
                } catch (TikaException e) {
                    throw new IOExceptionWithCause("file embedded in annotation tika exception", e);
                } catch (IOException e) {
                    handleCatchableIOE(e);
                }
            } else if (annotation instanceof PDAnnotationWidget) {
                handleWidget((PDAnnotationWidget) annotation);
            }
            // TODO: remove once PDFBOX-1143 is fixed:
            if (config.getExtractAnnotationText()) {
                if (annotation instanceof PDAnnotationLink) {
                    PDAnnotationLink annotationlink = (PDAnnotationLink) annotation;
                    if (annotationlink.getAction() != null) {
                        PDAction action = annotationlink.getAction();
                        if (action instanceof PDActionURI) {
                            //can't currently associate link to text.
                            //for now, extract link and repeat the link as if it
                            //were the visible text
                            PDActionURI uri = (PDActionURI) action;
                            String link = uri.getURI();
                            if (link != null && link.trim().length() > 0) {
                                xhtml.startElement("div", "class", "annotation");
                                xhtml.startElement("a", "href", link);
                                xhtml.characters(link);
                                xhtml.endElement("a");
                                xhtml.endElement("div");
                            }
                        }
                    }
                }

                if (annotation instanceof PDAnnotationMarkup) {
                    PDAnnotationMarkup annotationMarkup = (PDAnnotationMarkup) annotation;
                    String title = annotationMarkup.getTitlePopup();
                    String subject = annotationMarkup.getSubject();
                    String contents = annotationMarkup.getContents();
                    // TODO: maybe also annotationMarkup.getRichContents()?
                    if (title != null || subject != null || contents != null) {
                        xhtml.startElement("div", "class", "annotation");

                        if (title != null) {
                            xhtml.startElement("div", "class", "annotationTitle");
                            xhtml.characters(title);
                            xhtml.endElement("div");
                        }

                        if (subject != null) {
                            xhtml.startElement("div", "class", "annotationSubject");
                            xhtml.characters(subject);
                            xhtml.endElement("div");
                        }

                        if (contents != null) {
                            xhtml.startElement("div", "class", "annotationContents");
                            xhtml.characters(contents);
                            xhtml.endElement("div");
                        }

                        xhtml.endElement("div");
                    }
                }
            }
        }
        if (config.getOcrStrategy().equals(PDFParserConfig.OCR_STRATEGY.OCR_AND_TEXT_EXTRACTION)) {
            doOCROnCurrentPage();
        }

        PDPageAdditionalActions pageActions = page.getActions();
        if (pageActions != null) {
            handleDestinationOrAction(pageActions.getC(), ActionTrigger.PAGE_CLOSE);
            handleDestinationOrAction(pageActions.getO(), ActionTrigger.PAGE_OPEN);
        }
        xhtml.endElement("div");
    } catch (SAXException | TikaException e) {
        throw new IOExceptionWithCause("Unable to end a page", e);
    } catch (IOException e) {
        exceptions.add(e);
    } finally {
        pageIndex++;
    }
}

From source file:org.apache.tika.parser.pdf.AbstractPDF2XHTMLPureJava.java

License:Apache License

@Override
protected void endPage(PDPage page) throws IOException {

    try {// w  w  w  .  ja v  a2  s . c o  m
        for (PDAnnotation annotation : page.getAnnotations()) {

            if (annotation instanceof PDAnnotationFileAttachment) {
                PDAnnotationFileAttachment fann = (PDAnnotationFileAttachment) annotation;
                PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) fann.getFile();
                try {
                    AttributesImpl attributes = new AttributesImpl();
                    attributes.addAttribute("", "source", "source", "CDATA", "annotation");
                    extractMultiOSPDEmbeddedFiles(fann.getAttachmentName(), fileSpec, attributes);
                } catch (SAXException e) {
                    throw new IOExceptionWithCause("file embedded in annotation sax exception", e);
                } catch (TikaException e) {
                    throw new IOExceptionWithCause("file embedded in annotation tika exception", e);
                } catch (IOException e) {
                    handleCatchableIOE(e);
                }
            } else if (annotation instanceof PDAnnotationWidget) {
                handleWidget((PDAnnotationWidget) annotation);
            }
            // TODO: remove once PDFBOX-1143 is fixed:
            if (config.getExtractAnnotationText()) {
                if (annotation instanceof PDAnnotationLink) {
                    PDAnnotationLink annotationlink = (PDAnnotationLink) annotation;
                    if (annotationlink.getAction() != null) {
                        PDAction action = annotationlink.getAction();
                        if (action instanceof PDActionURI) {
                            //can't currently associate link to text.
                            //for now, extract link and repeat the link as if it
                            //were the visible text
                            PDActionURI uri = (PDActionURI) action;
                            String link = uri.getURI();
                            if (link != null && link.trim().length() > 0) {
                                xhtml.startElement("div", "class", "annotation");
                                xhtml.startElement("a", "href", link);
                                xhtml.characters(link);
                                xhtml.endElement("a");
                                xhtml.endElement("div");
                            }
                        }
                    }
                }

                if (annotation instanceof PDAnnotationMarkup) {
                    PDAnnotationMarkup annotationMarkup = (PDAnnotationMarkup) annotation;
                    String title = annotationMarkup.getTitlePopup();
                    String subject = annotationMarkup.getSubject();
                    String contents = annotationMarkup.getContents();
                    // TODO: maybe also annotationMarkup.getRichContents()?
                    if (title != null || subject != null || contents != null) {
                        xhtml.startElement("div", "class", "annotation");

                        if (title != null) {
                            xhtml.startElement("div", "class", "annotationTitle");
                            xhtml.characters(title);
                            xhtml.endElement("div");
                        }

                        if (subject != null) {
                            xhtml.startElement("div", "class", "annotationSubject");
                            xhtml.characters(subject);
                            xhtml.endElement("div");
                        }

                        if (contents != null) {
                            xhtml.startElement("div", "class", "annotationContents");
                            xhtml.characters(contents);
                            xhtml.endElement("div");
                        }

                        xhtml.endElement("div");
                    }
                }
            }
        }

        PDPageAdditionalActions pageActions = page.getActions();
        if (pageActions != null) {
            handleDestinationOrAction(pageActions.getC(), ActionTrigger.PAGE_CLOSE);
            handleDestinationOrAction(pageActions.getO(), ActionTrigger.PAGE_OPEN);
        }
        xhtml.endElement("div");
    } catch (SAXException | TikaException e) {
        throw new IOExceptionWithCause("Unable to end a page", e);
    } catch (IOException e) {
        exceptions.add(e);
    } finally {
        pageIndex++;
    }
}

From source file:org.apache.tika.parser.pdf.EnhancedPDF2XHTML.java

License:Apache License

@Override
protected void endPage(PDPage page) throws IOException {
    try {//from w  w  w .jav  a2s.c o m
        writeParagraphEnd();

        extractImages(page.getResources());

        EmbeddedDocumentExtractor extractor = getEmbeddedDocumentExtractor();
        for (PDAnnotation annotation : page.getAnnotations()) {

            if (annotation instanceof PDAnnotationFileAttachment) {
                PDAnnotationFileAttachment fann = (PDAnnotationFileAttachment) annotation;
                PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) fann.getFile();
                try {
                    extractMultiOSPDEmbeddedFiles("", fileSpec, extractor);
                } catch (SAXException e) {
                    throw new IOExceptionWithCause("file embedded in annotation sax exception", e);
                } catch (TikaException e) {
                    throw new IOExceptionWithCause("file embedded in annotation tika exception", e);
                }
            }
            // TODO: remove once PDFBOX-1143 is fixed:
            if (config.getExtractAnnotationText()) {
                if (annotation instanceof PDAnnotationLink) {
                    PDAnnotationLink annotationlink = (PDAnnotationLink) annotation;
                    if (annotationlink.getAction() != null) {
                        PDAction action = annotationlink.getAction();
                        if (action instanceof PDActionURI) {
                            PDActionURI uri = (PDActionURI) action;
                            String link = uri.getURI();
                            if (link != null) {
                                handler.startElement("div", "class", "annotation");
                                handler.startElement("a", "href", link);
                                handler.endElement("a");
                                handler.endElement("div");
                            }
                        }
                    }
                }

                if (annotation instanceof PDAnnotationMarkup) {
                    PDAnnotationMarkup annotationMarkup = (PDAnnotationMarkup) annotation;
                    String title = annotationMarkup.getTitlePopup();
                    String subject = annotationMarkup.getSubject();
                    String contents = annotationMarkup.getContents();
                    // TODO: maybe also annotationMarkup.getRichContents()?
                    if (title != null || subject != null || contents != null) {
                        handler.startElement("div", "class", "annotation");

                        if (title != null) {
                            handler.startElement("div", "class", "annotationTitle");
                            handler.characters(title);
                            handler.endElement("div");
                        }

                        if (subject != null) {
                            handler.startElement("div", "class", "annotationSubject");
                            handler.characters(subject);
                            handler.endElement("div");
                        }

                        if (contents != null) {
                            handler.startElement("div", "class", "annotationContents");
                            handler.characters(contents);
                            handler.endElement("div");
                        }

                        handler.endElement("div");
                    }
                }
            }
        }

        handler.endElement("div");
    } catch (SAXException e) {
        throw new IOExceptionWithCause("Unable to end a page", e);
    }
}

From source file:org.apache.tika.parser.pdf.PDF2XHTML.java

License:Apache License

@Override
protected void endPage(PDPage page) throws IOException {
    try {//from   w  ww .j av  a  2s.  c o  m
        writeParagraphEnd();

        extractImages(page.getResources(), new HashSet<COSBase>());

        EmbeddedDocumentExtractor extractor = getEmbeddedDocumentExtractor();
        for (PDAnnotation annotation : page.getAnnotations()) {

            if (annotation instanceof PDAnnotationFileAttachment) {
                PDAnnotationFileAttachment fann = (PDAnnotationFileAttachment) annotation;
                PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) fann.getFile();
                try {
                    extractMultiOSPDEmbeddedFiles("", fileSpec, extractor);
                } catch (SAXException e) {
                    throw new IOExceptionWithCause("file embedded in annotation sax exception", e);
                } catch (TikaException e) {
                    throw new IOExceptionWithCause("file embedded in annotation tika exception", e);
                }
            }
            // TODO: remove once PDFBOX-1143 is fixed:
            if (config.getExtractAnnotationText()) {
                if (annotation instanceof PDAnnotationLink) {
                    PDAnnotationLink annotationlink = (PDAnnotationLink) annotation;
                    if (annotationlink.getAction() != null) {
                        PDAction action = annotationlink.getAction();
                        if (action instanceof PDActionURI) {
                            PDActionURI uri = (PDActionURI) action;
                            String link = uri.getURI();
                            if (link != null) {
                                handler.startElement("div", "class", "annotation");
                                handler.startElement("a", "href", link);
                                handler.endElement("a");
                                handler.endElement("div");
                            }
                        }
                    }
                }

                if (annotation instanceof PDAnnotationMarkup) {
                    PDAnnotationMarkup annotationMarkup = (PDAnnotationMarkup) annotation;
                    String title = annotationMarkup.getTitlePopup();
                    String subject = annotationMarkup.getSubject();
                    String contents = annotationMarkup.getContents();
                    // TODO: maybe also annotationMarkup.getRichContents()?
                    if (title != null || subject != null || contents != null) {
                        handler.startElement("div", "class", "annotation");

                        if (title != null) {
                            handler.startElement("div", "class", "annotationTitle");
                            handler.characters(title);
                            handler.endElement("div");
                        }

                        if (subject != null) {
                            handler.startElement("div", "class", "annotationSubject");
                            handler.characters(subject);
                            handler.endElement("div");
                        }

                        if (contents != null) {
                            handler.startElement("div", "class", "annotationContents");
                            handler.characters(contents);
                            handler.endElement("div");
                        }

                        handler.endElement("div");
                    }
                }
            }
        }

        handler.endElement("div");
    } catch (SAXException e) {
        throw new IOExceptionWithCause("Unable to end a page", e);
    }
    page.clear();
}

From source file:org.data2semantics.annotate.D2S_SampleAnnotation.java

License:Apache License

/**
 * This will create a doucument showing various annotations.
 * /*from   w ww  .  j av  a  2  s .  c o  m*/
 * @param args
 *            The command line arguments.
 * 
 * @throws Exception
 *             If there is an error parsing the document.
 */
public static void main(String[] args) throws Exception {

    PDDocument document = new PDDocument();

    try {
        PDPage page = new PDPage();
        document.addPage(page);
        List annotations = page.getAnnotations();

        // Setup some basic reusable objects/constants
        // Annotations themselves can only be used once!

        float inch = 72;
        PDGamma colourRed = new PDGamma();
        colourRed.setR(1);
        PDGamma colourBlue = new PDGamma();
        colourBlue.setB(1);
        PDGamma colourBlack = new PDGamma();

        PDBorderStyleDictionary borderThick = new PDBorderStyleDictionary();
        borderThick.setWidth(inch / 12); // 12th inch
        PDBorderStyleDictionary borderThin = new PDBorderStyleDictionary();
        borderThin.setWidth(inch / 72); // 1 point
        PDBorderStyleDictionary borderULine = new PDBorderStyleDictionary();
        borderULine.setStyle(PDBorderStyleDictionary.STYLE_UNDERLINE);
        borderULine.setWidth(inch / 72); // 1 point

        float pw = page.getMediaBox().getUpperRightX();
        float ph = page.getMediaBox().getUpperRightY();

        // First add some text, two lines we'll add some annotations to this
        // later

        PDFont font = PDType1Font.HELVETICA_BOLD;

        PDPageContentStream contentStream = new PDPageContentStream(document, page);
        contentStream.beginText();
        contentStream.setFont(font, 18);
        contentStream.moveTextPositionByAmount(inch, ph - inch - 18);
        contentStream.drawString("PDFBox");
        contentStream.moveTextPositionByAmount(0, -(inch / 2));
        contentStream.drawString("Click Here");
        contentStream.endText();

        contentStream.close();

        // Now add the markup annotation, a highlight to PDFBox text
        PDAnnotationTextMarkup txtMark = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT);
        txtMark.setColour(colourBlue);
        txtMark.setConstantOpacity((float) 0.2); // Make the highlight 20%
        // transparent

        // Set the rectangle containing the markup

        float textWidth = (font.getStringWidth("PDFBox") / 1000) * 18;
        PDRectangle position = new PDRectangle();
        position.setLowerLeftX(inch);
        position.setLowerLeftY(ph - inch - 18);
        position.setUpperRightX(72 + textWidth);
        position.setUpperRightY(ph - inch);
        txtMark.setRectangle(position);

        // work out the points forming the four corners of the annotations
        // set out in anti clockwise form (Completely wraps the text)
        // OK, the below doesn't match that description.
        // It's what acrobat 7 does and displays properly!
        float[] quads = new float[8];

        quads[0] = position.getLowerLeftX(); // x1
        quads[1] = position.getUpperRightY() - 2; // y1
        quads[2] = position.getUpperRightX(); // x2
        quads[3] = quads[1]; // y2
        quads[4] = quads[0]; // x3
        quads[5] = position.getLowerLeftY() - 2; // y3
        quads[6] = quads[2]; // x4
        quads[7] = quads[5]; // y5

        txtMark.setQuadPoints(quads);
        txtMark.setContents("Highlighted since it's important");

        annotations.add(txtMark);

        // Now add the link annotation, so the clickme works
        PDAnnotationLink txtLink = new PDAnnotationLink();
        txtLink.setBorderStyle(borderULine);

        // Set the rectangle containing the link

        textWidth = (font.getStringWidth("Click Here") / 1000) * 18;
        position = new PDRectangle();
        position.setLowerLeftX(inch);
        position.setLowerLeftY(ph - (float) (1.5 * inch) - 20); // down a
        // couple of
        // points
        position.setUpperRightX(72 + textWidth);
        position.setUpperRightY(ph - (float) (1.5 * inch));
        txtLink.setRectangle(position);

        // add an action
        PDActionURI action = new PDActionURI();
        action.setURI("http://www.pdfbox.org");
        txtLink.setAction(action);

        annotations.add(txtLink);

        // Now draw a few more annotations

        PDAnnotationSquareCircle aCircle = new PDAnnotationSquareCircle(
                PDAnnotationSquareCircle.SUB_TYPE_CIRCLE);
        aCircle.setContents("Circle Annotation");
        aCircle.setInteriorColour(colourRed); // Fill in circle in red
        aCircle.setColour(colourBlue); // The border itself will be blue
        aCircle.setBorderStyle(borderThin);

        // Place the annotation on the page, we'll make this 1" round
        // 3" down, 1" in on the page

        position = new PDRectangle();
        position.setLowerLeftX(inch);
        position.setLowerLeftY(ph - (3 * inch) - inch); // 1" height, 3"
        // down
        position.setUpperRightX(2 * inch); // 1" in, 1" width
        position.setUpperRightY(ph - (3 * inch)); // 3" down
        aCircle.setRectangle(position);

        // add to the annotations on the page
        annotations.add(aCircle);

        // Now a square annotation

        PDAnnotationSquareCircle aSquare = new PDAnnotationSquareCircle(
                PDAnnotationSquareCircle.SUB_TYPE_SQUARE);
        aSquare.setContents("Square Annotation");
        aSquare.setColour(colourRed); // Outline in red, not setting a fill
        aSquare.setBorderStyle(borderThick);

        // Place the annotation on the page, we'll make this 1" (72points)
        // square
        // 3.5" down, 1" in from the right on the page

        position = new PDRectangle(); // Reuse the variable, but note it's a
        // new object!
        position.setLowerLeftX(pw - (2 * inch)); // 1" in from right, 1"
        // wide
        position.setLowerLeftY(ph - (float) (3.5 * inch) - inch); // 1" height, 3.5"
        // down
        position.setUpperRightX(pw - inch); // 1" in from right
        position.setUpperRightY(ph - (float) (3.5 * inch)); // 3.5" down
        aSquare.setRectangle(position);

        // add to the annotations on the page
        annotations.add(aSquare);

        // Now we want to draw a line between the two, one end with an open
        // arrow

        PDAnnotationLine aLine = new PDAnnotationLine();

        aLine.setEndPointEndingStyle(PDAnnotationLine.LE_OPEN_ARROW);
        aLine.setContents("Circle->Square");
        aLine.setCaption(true); // Make the contents a caption on the line

        // Set the rectangle containing the line

        position = new PDRectangle(); // Reuse the variable, but note it's a
        // new object!
        position.setLowerLeftX(2 * inch); // 1" in + width of circle
        position.setLowerLeftY(ph - (float) (3.5 * inch) - inch); // 1" height, 3.5"
        // down
        position.setUpperRightX(pw - inch - inch); // 1" in from right, and
        // width of square
        position.setUpperRightY(ph - (3 * inch)); // 3" down (top of circle)
        aLine.setRectangle(position);

        // Now set the line position itself
        float[] linepos = new float[4];
        linepos[0] = 2 * inch; // x1 = rhs of circle
        linepos[1] = ph - (float) (3.5 * inch); // y1 halfway down circle
        linepos[2] = pw - (2 * inch); // x2 = lhs of square
        linepos[3] = ph - (4 * inch); // y2 halfway down square
        aLine.setLine(linepos);

        aLine.setBorderStyle(borderThick);
        aLine.setColour(colourBlack);

        // add to the annotations on the page
        annotations.add(aLine);

        // Finally all done

        document.save("testAnnotation.pdf");
    } finally {
        document.close();
    }
}

From source file:org.nuxeo.pdf.PDFLinks.java

License:Apache License

protected void loadAndPreflightPdf() throws NuxeoException {

    if (pdfDoc == null) {
        pdfDoc = PDFUtils.load(pdfBlob, password);

        @SuppressWarnings("unchecked")
        List<PDPage> allPages = pdfDoc.getDocumentCatalog().getAllPages();
        try {/*from  w ww  .j a v  a  2 s . c  o m*/
            stripper = new PDFTextStripperByArea();
            for (PDPage page : allPages) {
                List<PDAnnotation> annotations = page.getAnnotations();
                for (int j = 0; j < annotations.size(); j++) {
                    PDAnnotation annot = (PDAnnotation) annotations.get(j);
                    if (annot instanceof PDAnnotationLink) {
                        PDAnnotationLink link = (PDAnnotationLink) annot;
                        PDRectangle rect = link.getRectangle();
                        // need to reposition link rectangle to match text space
                        float x = rect.getLowerLeftX();
                        float y = rect.getUpperRightY();
                        float width = rect.getWidth();
                        float height = rect.getHeight();
                        int rotation = page.findRotation();
                        if (rotation == 0) {
                            PDRectangle pageSize = page.findMediaBox();
                            y = pageSize.getHeight() - y;
                        } else if (rotation == 90) {
                            // do nothing
                        }

                        Rectangle2D.Float awtRect = new Rectangle2D.Float(x, y, width, height);
                        stripper.addRegion("" + j, awtRect);
                    }
                }
            }
        } catch (IOException e) {
            throw new NuxeoException("Cannot prefilght and prepare regions", e);
        }
    }
}

From source file:org.nuxeo.pdf.PDFLinks.java

License:Apache License

@SuppressWarnings("unchecked")
protected ArrayList<LinkInfo> parseForLinks(String inSubType) throws IOException {

    PDActionRemoteGoTo goTo;/* ww  w.ja v a 2 s.  c o m*/
    PDActionLaunch launch;
    PDActionURI uri;
    PDFileSpecification fspec;

    ArrayList<LinkInfo> li = new ArrayList<LinkInfo>();

    List<PDPage> allPages;
    allPages = pdfDoc.getDocumentCatalog().getAllPages();
    int pageNum = 0;
    for (PDPage page : allPages) {
        pageNum += 1;

        stripper.extractRegions(page);

        List<PDAnnotation> annotations = page.getAnnotations();
        for (int j = 0; j < annotations.size(); j++) {
            PDAnnotation annot = annotations.get(j);
            if (annot instanceof PDAnnotationLink) {

                PDAnnotationLink link = (PDAnnotationLink) annot;
                PDAction action = link.getAction();
                if (action.getSubType().equals(inSubType)) {
                    String urlText = stripper.getTextForRegion("" + j);
                    String urlValue = null;
                    switch (inSubType) {
                    case PDActionRemoteGoTo.SUB_TYPE:
                        goTo = (PDActionRemoteGoTo) action;
                        fspec = goTo.getFile();
                        urlValue = fspec.getFile();
                        break;

                    case PDActionLaunch.SUB_TYPE:
                        launch = (PDActionLaunch) action;
                        fspec = launch.getFile();
                        urlValue = fspec.getFile();
                        break;

                    case PDActionURI.SUB_TYPE:
                        uri = (PDActionURI) action;
                        urlValue = uri.getURI();
                        break;

                    // . . . Others . . .
                    }

                    if (StringUtils.isNotBlank(urlValue)) {
                        li.add(new LinkInfo(pageNum, inSubType, urlText, urlValue));
                    }
                }
            }
        }

    }

    return li;
}

From source file:org.paxle.parser.pdf.impl.PdfParser.java

License:Open Source License

/**
 * A function to extract embedded URIs from the PDF-document.
 * //from  ww  w.  ja  va2  s . com
 */
protected void extractURLs(IParserDocument parserDoc, PDDocument pddDoc) throws IOException {
    final PDDocumentCatalog pddDocCatalog = pddDoc.getDocumentCatalog();
    if (pddDocCatalog == null)
        return;

    @SuppressWarnings("unchecked")
    final List<PDPage> allPages = pddDocCatalog.getAllPages();
    if (allPages == null || allPages.isEmpty())
        return;

    for (int i = 0; i < allPages.size(); i++) {
        final PDFTextStripperByArea stripper = new PDFTextStripperByArea();
        final PDPage page = (PDPage) allPages.get(i);

        @SuppressWarnings("unchecked")
        final List<PDAnnotation> annotations = page.getAnnotations();
        if (annotations == null || annotations.isEmpty())
            return;

        //first setup text extraction regions
        for (int j = 0; j < annotations.size(); j++) {
            final PDAnnotation annot = (PDAnnotation) annotations.get(j);
            if (annot instanceof PDAnnotationLink) {
                final PDAnnotationLink link = (PDAnnotationLink) annot;
                final PDRectangle rect = link.getRectangle();

                //need to reposition link rectangle to match text space
                float x = rect.getLowerLeftX();
                float y = rect.getUpperRightY();
                float width = rect.getWidth();
                float height = rect.getHeight();
                int rotation = page.findRotation();
                if (rotation == 0) {
                    PDRectangle pageSize = page.findMediaBox();
                    y = pageSize.getHeight() - y;
                } else if (rotation == 90) {
                    //do nothing
                }

                Rectangle2D.Float awtRect = new Rectangle2D.Float(x, y, width, height);
                stripper.addRegion("" + j, awtRect);
            }
        }

        stripper.extractRegions(page);

        for (int j = 0; j < annotations.size(); j++) {
            final PDAnnotation annot = (PDAnnotation) annotations.get(j);
            if (annot instanceof PDAnnotationLink) {
                final PDAnnotationLink link = (PDAnnotationLink) annot;
                final PDAction action = link.getAction();
                final String urlText = stripper.getTextForRegion("" + j);

                if (action instanceof PDActionURI) {
                    final PDActionURI embeddedUri = (PDActionURI) action;
                    final URI temp = URI.create(embeddedUri.getURI());

                    parserDoc.addReference(temp, urlText, Constants.SERVICE_PID + ":" + PID);
                }
            }
        }
    }
}