Example usage for org.apache.pdfbox.pdmodel PDPage getAnnotations

List of usage examples for org.apache.pdfbox.pdmodel PDPage getAnnotations

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDPage getAnnotations.

Prototype

public List<PDAnnotation> getAnnotations() throws IOException 

Source Link

Document

This will return a list of the annotations for this page.

Usage

From source file:net.yacy.document.parser.pdfParser.java

License:Open Source License

/**
 * extract clickable links from pdf//from w w w.  ja  va  2  s  .co  m
 * @param pdf the document to parse
 * @return all detected links
 */
private Collection<AnchorURL>[] extractPdfLinks(final PDDocument pdf) {
    @SuppressWarnings("unchecked")
    List<PDPage> allPages = pdf.getDocumentCatalog().getAllPages();
    @SuppressWarnings("unchecked")
    Collection<AnchorURL>[] linkCollections = (Collection<AnchorURL>[]) new Collection<?>[allPages.size()];
    int pagecount = 0;
    for (PDPage page : allPages) {
        final Collection<AnchorURL> pdflinks = new ArrayList<AnchorURL>();
        try {
            List<PDAnnotation> annotations = page.getAnnotations();
            if (annotations != null) {
                for (PDAnnotation pdfannotation : annotations) {
                    if (pdfannotation instanceof PDAnnotationLink) {
                        PDAction link = ((PDAnnotationLink) pdfannotation).getAction();
                        if (link != null && link instanceof PDActionURI) {
                            PDActionURI pdflinkuri = (PDActionURI) link;
                            String uristr = pdflinkuri.getURI();
                            AnchorURL url = new AnchorURL(uristr);
                            pdflinks.add(url);
                        }
                    }
                }
            }
        } catch (IOException ex) {
        }
        linkCollections[pagecount++] = pdflinks;
    }
    return linkCollections;
}

From source file:org.apache.fop.render.pdf.pdfbox.PDFBoxAdapter.java

License:Apache License

private void handleAnnotations(PDDocument sourceDoc, PDPage page, AffineTransform at) throws IOException {
    PDDocumentCatalog srcCatalog = sourceDoc.getDocumentCatalog();
    PDAcroForm srcAcroForm = srcCatalog.getAcroForm();
    List pageAnnotations = page.getAnnotations();
    if (srcAcroForm == null && pageAnnotations.isEmpty()) {
        return;/*from   w w w .  ja  v a2  s .  co  m*/
    }

    moveAnnotations(page, pageAnnotations, at);

    //Pseudo-cache the target page in place of the original source page.
    //This essentially replaces the original page reference with the target page.
    COSObject cosPage = null;
    COSDictionary parentDic = (COSDictionary) page.getCOSObject().getDictionaryObject(COSName.PARENT,
            COSName.P);
    COSArray kids = (COSArray) parentDic.getDictionaryObject(COSName.KIDS);
    for (int i = 0; i < kids.size(); i++) {
        //Hopefully safe to cast, as kids need to be indirect objects
        COSObject kid = (COSObject) kids.get(i);
        if (!pageNumbers.containsKey(i)) {
            PDFArray a = new PDFArray();
            a.add(null);
            pdfDoc.assignObjectNumber(a);
            pdfDoc.addTrailerObject(a);
            pageNumbers.put(i, a);
        }
        cacheClonedObject(kid, pageNumbers.get(i));
        if (kid.getObject() == page.getCOSObject()) {
            cosPage = kid;
        }
    }
    if (cosPage == null) {
        throw new IOException("Illegal PDF. Page not part of parent page node.");
    }

    Set<COSObject> fields = copyAnnotations(page);

    boolean formAlreadyCopied = getCachedClone(srcAcroForm) != null;
    PDFRoot catalog = this.pdfDoc.getRoot();
    PDFDictionary destAcroForm = (PDFDictionary) catalog.get(COSName.ACRO_FORM.getName());
    if (formAlreadyCopied) {
        //skip, already copied
    } else if (destAcroForm == null) {
        if (srcAcroForm != null) {
            //With this, only the first PDF's AcroForm is copied over. If later AcroForms have
            //different properties besides the actual fields, these get lost. Only fields
            //get merged.
            Collection exclude = Collections.singletonList(COSName.FIELDS);
            destAcroForm = (PDFDictionary) cloneForNewDocument(srcAcroForm, srcAcroForm, exclude);
        } else {
            //Work-around for incorrectly split PDFs which lack an AcroForm but have widgets
            //on pages. This doesn't handle the case where field dicts have "C" entries
            //(for the "CO" entry), so this may produce problems, but we have almost no chance
            //to guess the calculation order.
            destAcroForm = new PDFDictionary(pdfDoc.getRoot());
        }
        pdfDoc.registerObject(destAcroForm);
        catalog.put(COSName.ACRO_FORM.getName(), destAcroForm);
    }
    PDFArray clonedFields = (PDFArray) destAcroForm.get(COSName.FIELDS.getName());
    if (clonedFields == null) {
        clonedFields = new PDFArray();
        destAcroForm.put(COSName.FIELDS.getName(), clonedFields);
    }
    for (COSObject field : fields) {
        PDFDictionary clone = (PDFDictionary) cloneForNewDocument(field, field, Arrays.asList(COSName.KIDS));
        clonedFields.add(clone);
    }
}

From source file:org.apache.tika.parser.pdf.AbstractPDF2XHTML.java

License:Apache License

@Override
protected void endPage(PDPage page) throws IOException {

    try {//  ww w.ja va 2  s.  c  om
        for (PDAnnotation annotation : page.getAnnotations()) {

            if (annotation instanceof PDAnnotationFileAttachment) {
                PDAnnotationFileAttachment fann = (PDAnnotationFileAttachment) annotation;
                PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) fann.getFile();
                try {
                    AttributesImpl attributes = new AttributesImpl();
                    attributes.addAttribute("", "source", "source", "CDATA", "annotation");
                    extractMultiOSPDEmbeddedFiles(fann.getAttachmentName(), fileSpec, attributes);
                } catch (SAXException e) {
                    throw new IOExceptionWithCause("file embedded in annotation sax exception", e);
                } catch (TikaException e) {
                    throw new IOExceptionWithCause("file embedded in annotation tika exception", e);
                } catch (IOException e) {
                    handleCatchableIOE(e);
                }
            } else if (annotation instanceof PDAnnotationWidget) {
                handleWidget((PDAnnotationWidget) annotation);
            }
            // TODO: remove once PDFBOX-1143 is fixed:
            if (config.getExtractAnnotationText()) {
                if (annotation instanceof PDAnnotationLink) {
                    PDAnnotationLink annotationlink = (PDAnnotationLink) annotation;
                    if (annotationlink.getAction() != null) {
                        PDAction action = annotationlink.getAction();
                        if (action instanceof PDActionURI) {
                            //can't currently associate link to text.
                            //for now, extract link and repeat the link as if it
                            //were the visible text
                            PDActionURI uri = (PDActionURI) action;
                            String link = uri.getURI();
                            if (link != null && link.trim().length() > 0) {
                                xhtml.startElement("div", "class", "annotation");
                                xhtml.startElement("a", "href", link);
                                xhtml.characters(link);
                                xhtml.endElement("a");
                                xhtml.endElement("div");
                            }
                        }
                    }
                }

                if (annotation instanceof PDAnnotationMarkup) {
                    PDAnnotationMarkup annotationMarkup = (PDAnnotationMarkup) annotation;
                    String title = annotationMarkup.getTitlePopup();
                    String subject = annotationMarkup.getSubject();
                    String contents = annotationMarkup.getContents();
                    // TODO: maybe also annotationMarkup.getRichContents()?
                    if (title != null || subject != null || contents != null) {
                        xhtml.startElement("div", "class", "annotation");

                        if (title != null) {
                            xhtml.startElement("div", "class", "annotationTitle");
                            xhtml.characters(title);
                            xhtml.endElement("div");
                        }

                        if (subject != null) {
                            xhtml.startElement("div", "class", "annotationSubject");
                            xhtml.characters(subject);
                            xhtml.endElement("div");
                        }

                        if (contents != null) {
                            xhtml.startElement("div", "class", "annotationContents");
                            xhtml.characters(contents);
                            xhtml.endElement("div");
                        }

                        xhtml.endElement("div");
                    }
                }
            }
        }
        if (config.getOcrStrategy().equals(PDFParserConfig.OCR_STRATEGY.OCR_AND_TEXT_EXTRACTION)) {
            doOCROnCurrentPage();
        }

        PDPageAdditionalActions pageActions = page.getActions();
        if (pageActions != null) {
            handleDestinationOrAction(pageActions.getC(), ActionTrigger.PAGE_CLOSE);
            handleDestinationOrAction(pageActions.getO(), ActionTrigger.PAGE_OPEN);
        }
        xhtml.endElement("div");
    } catch (SAXException | TikaException e) {
        throw new IOExceptionWithCause("Unable to end a page", e);
    } catch (IOException e) {
        exceptions.add(e);
    } finally {
        pageIndex++;
    }
}

From source file:org.apache.tika.parser.pdf.AbstractPDF2XHTMLPureJava.java

License:Apache License

@Override
protected void endPage(PDPage page) throws IOException {

    try {// w  w  w  .  ja v  a2  s . c o  m
        for (PDAnnotation annotation : page.getAnnotations()) {

            if (annotation instanceof PDAnnotationFileAttachment) {
                PDAnnotationFileAttachment fann = (PDAnnotationFileAttachment) annotation;
                PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) fann.getFile();
                try {
                    AttributesImpl attributes = new AttributesImpl();
                    attributes.addAttribute("", "source", "source", "CDATA", "annotation");
                    extractMultiOSPDEmbeddedFiles(fann.getAttachmentName(), fileSpec, attributes);
                } catch (SAXException e) {
                    throw new IOExceptionWithCause("file embedded in annotation sax exception", e);
                } catch (TikaException e) {
                    throw new IOExceptionWithCause("file embedded in annotation tika exception", e);
                } catch (IOException e) {
                    handleCatchableIOE(e);
                }
            } else if (annotation instanceof PDAnnotationWidget) {
                handleWidget((PDAnnotationWidget) annotation);
            }
            // TODO: remove once PDFBOX-1143 is fixed:
            if (config.getExtractAnnotationText()) {
                if (annotation instanceof PDAnnotationLink) {
                    PDAnnotationLink annotationlink = (PDAnnotationLink) annotation;
                    if (annotationlink.getAction() != null) {
                        PDAction action = annotationlink.getAction();
                        if (action instanceof PDActionURI) {
                            //can't currently associate link to text.
                            //for now, extract link and repeat the link as if it
                            //were the visible text
                            PDActionURI uri = (PDActionURI) action;
                            String link = uri.getURI();
                            if (link != null && link.trim().length() > 0) {
                                xhtml.startElement("div", "class", "annotation");
                                xhtml.startElement("a", "href", link);
                                xhtml.characters(link);
                                xhtml.endElement("a");
                                xhtml.endElement("div");
                            }
                        }
                    }
                }

                if (annotation instanceof PDAnnotationMarkup) {
                    PDAnnotationMarkup annotationMarkup = (PDAnnotationMarkup) annotation;
                    String title = annotationMarkup.getTitlePopup();
                    String subject = annotationMarkup.getSubject();
                    String contents = annotationMarkup.getContents();
                    // TODO: maybe also annotationMarkup.getRichContents()?
                    if (title != null || subject != null || contents != null) {
                        xhtml.startElement("div", "class", "annotation");

                        if (title != null) {
                            xhtml.startElement("div", "class", "annotationTitle");
                            xhtml.characters(title);
                            xhtml.endElement("div");
                        }

                        if (subject != null) {
                            xhtml.startElement("div", "class", "annotationSubject");
                            xhtml.characters(subject);
                            xhtml.endElement("div");
                        }

                        if (contents != null) {
                            xhtml.startElement("div", "class", "annotationContents");
                            xhtml.characters(contents);
                            xhtml.endElement("div");
                        }

                        xhtml.endElement("div");
                    }
                }
            }
        }

        PDPageAdditionalActions pageActions = page.getActions();
        if (pageActions != null) {
            handleDestinationOrAction(pageActions.getC(), ActionTrigger.PAGE_CLOSE);
            handleDestinationOrAction(pageActions.getO(), ActionTrigger.PAGE_OPEN);
        }
        xhtml.endElement("div");
    } catch (SAXException | TikaException e) {
        throw new IOExceptionWithCause("Unable to end a page", e);
    } catch (IOException e) {
        exceptions.add(e);
    } finally {
        pageIndex++;
    }
}

From source file:org.apache.tika.parser.pdf.EnhancedPDF2XHTML.java

License:Apache License

@Override
protected void endPage(PDPage page) throws IOException {
    try {//from w  w  w .jav  a2s.c o m
        writeParagraphEnd();

        extractImages(page.getResources());

        EmbeddedDocumentExtractor extractor = getEmbeddedDocumentExtractor();
        for (PDAnnotation annotation : page.getAnnotations()) {

            if (annotation instanceof PDAnnotationFileAttachment) {
                PDAnnotationFileAttachment fann = (PDAnnotationFileAttachment) annotation;
                PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) fann.getFile();
                try {
                    extractMultiOSPDEmbeddedFiles("", fileSpec, extractor);
                } catch (SAXException e) {
                    throw new IOExceptionWithCause("file embedded in annotation sax exception", e);
                } catch (TikaException e) {
                    throw new IOExceptionWithCause("file embedded in annotation tika exception", e);
                }
            }
            // TODO: remove once PDFBOX-1143 is fixed:
            if (config.getExtractAnnotationText()) {
                if (annotation instanceof PDAnnotationLink) {
                    PDAnnotationLink annotationlink = (PDAnnotationLink) annotation;
                    if (annotationlink.getAction() != null) {
                        PDAction action = annotationlink.getAction();
                        if (action instanceof PDActionURI) {
                            PDActionURI uri = (PDActionURI) action;
                            String link = uri.getURI();
                            if (link != null) {
                                handler.startElement("div", "class", "annotation");
                                handler.startElement("a", "href", link);
                                handler.endElement("a");
                                handler.endElement("div");
                            }
                        }
                    }
                }

                if (annotation instanceof PDAnnotationMarkup) {
                    PDAnnotationMarkup annotationMarkup = (PDAnnotationMarkup) annotation;
                    String title = annotationMarkup.getTitlePopup();
                    String subject = annotationMarkup.getSubject();
                    String contents = annotationMarkup.getContents();
                    // TODO: maybe also annotationMarkup.getRichContents()?
                    if (title != null || subject != null || contents != null) {
                        handler.startElement("div", "class", "annotation");

                        if (title != null) {
                            handler.startElement("div", "class", "annotationTitle");
                            handler.characters(title);
                            handler.endElement("div");
                        }

                        if (subject != null) {
                            handler.startElement("div", "class", "annotationSubject");
                            handler.characters(subject);
                            handler.endElement("div");
                        }

                        if (contents != null) {
                            handler.startElement("div", "class", "annotationContents");
                            handler.characters(contents);
                            handler.endElement("div");
                        }

                        handler.endElement("div");
                    }
                }
            }
        }

        handler.endElement("div");
    } catch (SAXException e) {
        throw new IOExceptionWithCause("Unable to end a page", e);
    }
}

From source file:org.apache.tika.parser.pdf.PDF2XHTML.java

License:Apache License

@Override
protected void endPage(PDPage page) throws IOException {
    try {//from   w  ww .j av  a  2s.  c o  m
        writeParagraphEnd();

        extractImages(page.getResources(), new HashSet<COSBase>());

        EmbeddedDocumentExtractor extractor = getEmbeddedDocumentExtractor();
        for (PDAnnotation annotation : page.getAnnotations()) {

            if (annotation instanceof PDAnnotationFileAttachment) {
                PDAnnotationFileAttachment fann = (PDAnnotationFileAttachment) annotation;
                PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) fann.getFile();
                try {
                    extractMultiOSPDEmbeddedFiles("", fileSpec, extractor);
                } catch (SAXException e) {
                    throw new IOExceptionWithCause("file embedded in annotation sax exception", e);
                } catch (TikaException e) {
                    throw new IOExceptionWithCause("file embedded in annotation tika exception", e);
                }
            }
            // TODO: remove once PDFBOX-1143 is fixed:
            if (config.getExtractAnnotationText()) {
                if (annotation instanceof PDAnnotationLink) {
                    PDAnnotationLink annotationlink = (PDAnnotationLink) annotation;
                    if (annotationlink.getAction() != null) {
                        PDAction action = annotationlink.getAction();
                        if (action instanceof PDActionURI) {
                            PDActionURI uri = (PDActionURI) action;
                            String link = uri.getURI();
                            if (link != null) {
                                handler.startElement("div", "class", "annotation");
                                handler.startElement("a", "href", link);
                                handler.endElement("a");
                                handler.endElement("div");
                            }
                        }
                    }
                }

                if (annotation instanceof PDAnnotationMarkup) {
                    PDAnnotationMarkup annotationMarkup = (PDAnnotationMarkup) annotation;
                    String title = annotationMarkup.getTitlePopup();
                    String subject = annotationMarkup.getSubject();
                    String contents = annotationMarkup.getContents();
                    // TODO: maybe also annotationMarkup.getRichContents()?
                    if (title != null || subject != null || contents != null) {
                        handler.startElement("div", "class", "annotation");

                        if (title != null) {
                            handler.startElement("div", "class", "annotationTitle");
                            handler.characters(title);
                            handler.endElement("div");
                        }

                        if (subject != null) {
                            handler.startElement("div", "class", "annotationSubject");
                            handler.characters(subject);
                            handler.endElement("div");
                        }

                        if (contents != null) {
                            handler.startElement("div", "class", "annotationContents");
                            handler.characters(contents);
                            handler.endElement("div");
                        }

                        handler.endElement("div");
                    }
                }
            }
        }

        handler.endElement("div");
    } catch (SAXException e) {
        throw new IOExceptionWithCause("Unable to end a page", e);
    }
    page.clear();
}

From source file:org.data2semantics.annotate.D2S_SampleAnnotation.java

License:Apache License

/**
 * This will create a doucument showing various annotations.
 * /*from   w ww  .  j av  a  2  s .  c o  m*/
 * @param args
 *            The command line arguments.
 * 
 * @throws Exception
 *             If there is an error parsing the document.
 */
public static void main(String[] args) throws Exception {

    PDDocument document = new PDDocument();

    try {
        PDPage page = new PDPage();
        document.addPage(page);
        List annotations = page.getAnnotations();

        // Setup some basic reusable objects/constants
        // Annotations themselves can only be used once!

        float inch = 72;
        PDGamma colourRed = new PDGamma();
        colourRed.setR(1);
        PDGamma colourBlue = new PDGamma();
        colourBlue.setB(1);
        PDGamma colourBlack = new PDGamma();

        PDBorderStyleDictionary borderThick = new PDBorderStyleDictionary();
        borderThick.setWidth(inch / 12); // 12th inch
        PDBorderStyleDictionary borderThin = new PDBorderStyleDictionary();
        borderThin.setWidth(inch / 72); // 1 point
        PDBorderStyleDictionary borderULine = new PDBorderStyleDictionary();
        borderULine.setStyle(PDBorderStyleDictionary.STYLE_UNDERLINE);
        borderULine.setWidth(inch / 72); // 1 point

        float pw = page.getMediaBox().getUpperRightX();
        float ph = page.getMediaBox().getUpperRightY();

        // First add some text, two lines we'll add some annotations to this
        // later

        PDFont font = PDType1Font.HELVETICA_BOLD;

        PDPageContentStream contentStream = new PDPageContentStream(document, page);
        contentStream.beginText();
        contentStream.setFont(font, 18);
        contentStream.moveTextPositionByAmount(inch, ph - inch - 18);
        contentStream.drawString("PDFBox");
        contentStream.moveTextPositionByAmount(0, -(inch / 2));
        contentStream.drawString("Click Here");
        contentStream.endText();

        contentStream.close();

        // Now add the markup annotation, a highlight to PDFBox text
        PDAnnotationTextMarkup txtMark = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT);
        txtMark.setColour(colourBlue);
        txtMark.setConstantOpacity((float) 0.2); // Make the highlight 20%
        // transparent

        // Set the rectangle containing the markup

        float textWidth = (font.getStringWidth("PDFBox") / 1000) * 18;
        PDRectangle position = new PDRectangle();
        position.setLowerLeftX(inch);
        position.setLowerLeftY(ph - inch - 18);
        position.setUpperRightX(72 + textWidth);
        position.setUpperRightY(ph - inch);
        txtMark.setRectangle(position);

        // work out the points forming the four corners of the annotations
        // set out in anti clockwise form (Completely wraps the text)
        // OK, the below doesn't match that description.
        // It's what acrobat 7 does and displays properly!
        float[] quads = new float[8];

        quads[0] = position.getLowerLeftX(); // x1
        quads[1] = position.getUpperRightY() - 2; // y1
        quads[2] = position.getUpperRightX(); // x2
        quads[3] = quads[1]; // y2
        quads[4] = quads[0]; // x3
        quads[5] = position.getLowerLeftY() - 2; // y3
        quads[6] = quads[2]; // x4
        quads[7] = quads[5]; // y5

        txtMark.setQuadPoints(quads);
        txtMark.setContents("Highlighted since it's important");

        annotations.add(txtMark);

        // Now add the link annotation, so the clickme works
        PDAnnotationLink txtLink = new PDAnnotationLink();
        txtLink.setBorderStyle(borderULine);

        // Set the rectangle containing the link

        textWidth = (font.getStringWidth("Click Here") / 1000) * 18;
        position = new PDRectangle();
        position.setLowerLeftX(inch);
        position.setLowerLeftY(ph - (float) (1.5 * inch) - 20); // down a
        // couple of
        // points
        position.setUpperRightX(72 + textWidth);
        position.setUpperRightY(ph - (float) (1.5 * inch));
        txtLink.setRectangle(position);

        // add an action
        PDActionURI action = new PDActionURI();
        action.setURI("http://www.pdfbox.org");
        txtLink.setAction(action);

        annotations.add(txtLink);

        // Now draw a few more annotations

        PDAnnotationSquareCircle aCircle = new PDAnnotationSquareCircle(
                PDAnnotationSquareCircle.SUB_TYPE_CIRCLE);
        aCircle.setContents("Circle Annotation");
        aCircle.setInteriorColour(colourRed); // Fill in circle in red
        aCircle.setColour(colourBlue); // The border itself will be blue
        aCircle.setBorderStyle(borderThin);

        // Place the annotation on the page, we'll make this 1" round
        // 3" down, 1" in on the page

        position = new PDRectangle();
        position.setLowerLeftX(inch);
        position.setLowerLeftY(ph - (3 * inch) - inch); // 1" height, 3"
        // down
        position.setUpperRightX(2 * inch); // 1" in, 1" width
        position.setUpperRightY(ph - (3 * inch)); // 3" down
        aCircle.setRectangle(position);

        // add to the annotations on the page
        annotations.add(aCircle);

        // Now a square annotation

        PDAnnotationSquareCircle aSquare = new PDAnnotationSquareCircle(
                PDAnnotationSquareCircle.SUB_TYPE_SQUARE);
        aSquare.setContents("Square Annotation");
        aSquare.setColour(colourRed); // Outline in red, not setting a fill
        aSquare.setBorderStyle(borderThick);

        // Place the annotation on the page, we'll make this 1" (72points)
        // square
        // 3.5" down, 1" in from the right on the page

        position = new PDRectangle(); // Reuse the variable, but note it's a
        // new object!
        position.setLowerLeftX(pw - (2 * inch)); // 1" in from right, 1"
        // wide
        position.setLowerLeftY(ph - (float) (3.5 * inch) - inch); // 1" height, 3.5"
        // down
        position.setUpperRightX(pw - inch); // 1" in from right
        position.setUpperRightY(ph - (float) (3.5 * inch)); // 3.5" down
        aSquare.setRectangle(position);

        // add to the annotations on the page
        annotations.add(aSquare);

        // Now we want to draw a line between the two, one end with an open
        // arrow

        PDAnnotationLine aLine = new PDAnnotationLine();

        aLine.setEndPointEndingStyle(PDAnnotationLine.LE_OPEN_ARROW);
        aLine.setContents("Circle->Square");
        aLine.setCaption(true); // Make the contents a caption on the line

        // Set the rectangle containing the line

        position = new PDRectangle(); // Reuse the variable, but note it's a
        // new object!
        position.setLowerLeftX(2 * inch); // 1" in + width of circle
        position.setLowerLeftY(ph - (float) (3.5 * inch) - inch); // 1" height, 3.5"
        // down
        position.setUpperRightX(pw - inch - inch); // 1" in from right, and
        // width of square
        position.setUpperRightY(ph - (3 * inch)); // 3" down (top of circle)
        aLine.setRectangle(position);

        // Now set the line position itself
        float[] linepos = new float[4];
        linepos[0] = 2 * inch; // x1 = rhs of circle
        linepos[1] = ph - (float) (3.5 * inch); // y1 halfway down circle
        linepos[2] = pw - (2 * inch); // x2 = lhs of square
        linepos[3] = ph - (4 * inch); // y2 halfway down square
        aLine.setLine(linepos);

        aLine.setBorderStyle(borderThick);
        aLine.setColour(colourBlack);

        // add to the annotations on the page
        annotations.add(aLine);

        // Finally all done

        document.save("testAnnotation.pdf");
    } finally {
        document.close();
    }
}

From source file:org.nuxeo.pdf.PDFLinks.java

License:Apache License

protected void loadAndPreflightPdf() throws NuxeoException {

    if (pdfDoc == null) {
        pdfDoc = PDFUtils.load(pdfBlob, password);

        @SuppressWarnings("unchecked")
        List<PDPage> allPages = pdfDoc.getDocumentCatalog().getAllPages();
        try {/*from  w ww  .j a v  a  2 s . c  o m*/
            stripper = new PDFTextStripperByArea();
            for (PDPage page : allPages) {
                List<PDAnnotation> annotations = page.getAnnotations();
                for (int j = 0; j < annotations.size(); j++) {
                    PDAnnotation annot = (PDAnnotation) annotations.get(j);
                    if (annot instanceof PDAnnotationLink) {
                        PDAnnotationLink link = (PDAnnotationLink) annot;
                        PDRectangle rect = link.getRectangle();
                        // need to reposition link rectangle to match text space
                        float x = rect.getLowerLeftX();
                        float y = rect.getUpperRightY();
                        float width = rect.getWidth();
                        float height = rect.getHeight();
                        int rotation = page.findRotation();
                        if (rotation == 0) {
                            PDRectangle pageSize = page.findMediaBox();
                            y = pageSize.getHeight() - y;
                        } else if (rotation == 90) {
                            // do nothing
                        }

                        Rectangle2D.Float awtRect = new Rectangle2D.Float(x, y, width, height);
                        stripper.addRegion("" + j, awtRect);
                    }
                }
            }
        } catch (IOException e) {
            throw new NuxeoException("Cannot prefilght and prepare regions", e);
        }
    }
}

From source file:org.nuxeo.pdf.PDFLinks.java

License:Apache License

@SuppressWarnings("unchecked")
protected ArrayList<LinkInfo> parseForLinks(String inSubType) throws IOException {

    PDActionRemoteGoTo goTo;/* ww  w.ja v a 2 s.  c o m*/
    PDActionLaunch launch;
    PDActionURI uri;
    PDFileSpecification fspec;

    ArrayList<LinkInfo> li = new ArrayList<LinkInfo>();

    List<PDPage> allPages;
    allPages = pdfDoc.getDocumentCatalog().getAllPages();
    int pageNum = 0;
    for (PDPage page : allPages) {
        pageNum += 1;

        stripper.extractRegions(page);

        List<PDAnnotation> annotations = page.getAnnotations();
        for (int j = 0; j < annotations.size(); j++) {
            PDAnnotation annot = annotations.get(j);
            if (annot instanceof PDAnnotationLink) {

                PDAnnotationLink link = (PDAnnotationLink) annot;
                PDAction action = link.getAction();
                if (action.getSubType().equals(inSubType)) {
                    String urlText = stripper.getTextForRegion("" + j);
                    String urlValue = null;
                    switch (inSubType) {
                    case PDActionRemoteGoTo.SUB_TYPE:
                        goTo = (PDActionRemoteGoTo) action;
                        fspec = goTo.getFile();
                        urlValue = fspec.getFile();
                        break;

                    case PDActionLaunch.SUB_TYPE:
                        launch = (PDActionLaunch) action;
                        fspec = launch.getFile();
                        urlValue = fspec.getFile();
                        break;

                    case PDActionURI.SUB_TYPE:
                        uri = (PDActionURI) action;
                        urlValue = uri.getURI();
                        break;

                    // . . . Others . . .
                    }

                    if (StringUtils.isNotBlank(urlValue)) {
                        li.add(new LinkInfo(pageNum, inSubType, urlText, urlValue));
                    }
                }
            }
        }

    }

    return li;
}

From source file:org.paxle.parser.pdf.impl.PdfParser.java

License:Open Source License

/**
 * A function to extract embedded URIs from the PDF-document.
 * //from  ww  w.  ja  va2  s . com
 */
protected void extractURLs(IParserDocument parserDoc, PDDocument pddDoc) throws IOException {
    final PDDocumentCatalog pddDocCatalog = pddDoc.getDocumentCatalog();
    if (pddDocCatalog == null)
        return;

    @SuppressWarnings("unchecked")
    final List<PDPage> allPages = pddDocCatalog.getAllPages();
    if (allPages == null || allPages.isEmpty())
        return;

    for (int i = 0; i < allPages.size(); i++) {
        final PDFTextStripperByArea stripper = new PDFTextStripperByArea();
        final PDPage page = (PDPage) allPages.get(i);

        @SuppressWarnings("unchecked")
        final List<PDAnnotation> annotations = page.getAnnotations();
        if (annotations == null || annotations.isEmpty())
            return;

        //first setup text extraction regions
        for (int j = 0; j < annotations.size(); j++) {
            final PDAnnotation annot = (PDAnnotation) annotations.get(j);
            if (annot instanceof PDAnnotationLink) {
                final PDAnnotationLink link = (PDAnnotationLink) annot;
                final PDRectangle rect = link.getRectangle();

                //need to reposition link rectangle to match text space
                float x = rect.getLowerLeftX();
                float y = rect.getUpperRightY();
                float width = rect.getWidth();
                float height = rect.getHeight();
                int rotation = page.findRotation();
                if (rotation == 0) {
                    PDRectangle pageSize = page.findMediaBox();
                    y = pageSize.getHeight() - y;
                } else if (rotation == 90) {
                    //do nothing
                }

                Rectangle2D.Float awtRect = new Rectangle2D.Float(x, y, width, height);
                stripper.addRegion("" + j, awtRect);
            }
        }

        stripper.extractRegions(page);

        for (int j = 0; j < annotations.size(); j++) {
            final PDAnnotation annot = (PDAnnotation) annotations.get(j);
            if (annot instanceof PDAnnotationLink) {
                final PDAnnotationLink link = (PDAnnotationLink) annot;
                final PDAction action = link.getAction();
                final String urlText = stripper.getTextForRegion("" + j);

                if (action instanceof PDActionURI) {
                    final PDActionURI embeddedUri = (PDActionURI) action;
                    final URI temp = URI.create(embeddedUri.getURI());

                    parserDoc.addReference(temp, urlText, Constants.SERVICE_PID + ":" + PID);
                }
            }
        }
    }
}