List of usage examples for org.apache.pdfbox.pdmodel PDPage getCropBox
public PDRectangle getCropBox()
From source file:de.redsix.pdfcompare.CompareResult.java
License:Apache License
protected void addPageToDocument(final PDDocument document, final ImageWithDimension diffImage, final ImageWithDimension actualImage, final ImageWithDimension expectedImage) throws IOException { PDPage page = new PDPage(new PDRectangle(diffImage.width, diffImage.height)); document.addPage(page);/*from ww w . j a v a 2 s . c om*/ final PDImageXObject diffXImage = LosslessFactory.createFromImage(document, diffImage.bufferedImage); final PDImageXObject actualXObject = LosslessFactory.createFromImage(document, actualImage.bufferedImage); final PDImageXObject expectedXObject = LosslessFactory.createFromImage(document, expectedImage.bufferedImage); try (PDPageContentStream contentStream = new PDPageContentStream(document, page)) { int x = (int) page.getCropBox().getWidth() / 3; System.out.println("X value " + x); int y = (int) page.getCropBox().getHeight(); System.out.println("Y value " + y); contentStream.drawImage(diffXImage, 0, 0, x, y); contentStream.setLineWidth(0.5F); contentStream.moveTo(x, 2); contentStream.lineTo(x, y); contentStream.drawImage(actualXObject, x + 2, 0, x, y); contentStream.moveTo(x + x, 2); contentStream.lineTo(x + x, y + y); contentStream.drawImage(expectedXObject, x + x + 2, 0, x, y); contentStream.stroke(); } }
From source file:model.objects.Project.java
License:Apache License
/** * //from w w w .j av a 2 s .com * @param _doc * @param _bi * @param _pageindex index of page to which the BufferedImage is * inserted. * If it is equal to -1, new page is created. * */ public void attatchToPDF(final PDDocument _doc, final BufferedImage _bi, final int _pageindex) { PDPage page = null; try { if (_pageindex == -1) { page = new PDPage(new PDRectangle(State.getImageSize().width, State.getImageSize().height)); _doc.addPage(page); } else { page = _doc.getPage(_pageindex); // page.setCropBox(new PDRectangle(State.getImageSize().width , // State.getImageSize().height )); } int width = (int) page.getCropBox().getWidth(); int height = (int) page.getCropBox().getHeight(); PDImageXObject ximage = LosslessFactory.createFromImage(_doc, // _bi); Utils.resizeImage(width, height, _bi)); PDPageContentStream content = new PDPageContentStream(_doc, page, true, true); // contentStream.drawImage(ximage, 20, 20 ); // better method inspired by http://stackoverflow.com/a/22318681/535646 // reduce this value if the image is too large float scale = 1f; content.drawImage(ximage, 20, 20, ximage.getWidth() * scale, ximage.getHeight() * scale); content.close(); // LosslessFactory.createFromImage(doc, bim) // content.drawImage(ximage, 0, 0); // content.close(); } catch (IOException ie) { //handle exception } }
From source file:net.bookinaction.ExtractAnnotations.java
License:Apache License
public void doJob(String job, Float[] pA) throws IOException { PDDocument document = null;//www . jav a2 s. c o m Stamper s = new Stamper(); // utility class final String job_file = job + ".pdf"; final String dic_file = job + "-dict.txt"; final String new_job = job + "-new.pdf"; PrintWriter writer = new PrintWriter(dic_file); ImageLocationListener imageLocationsListener = new ImageLocationListener(); AnnotationMaker annotMaker = new AnnotationMaker(); try { document = PDDocument.load(new File(job_file)); int pageNum = 0; for (PDPage page : document.getPages()) { pageNum++; PDRectangle cropBox = page.getCropBox(); List<PDAnnotation> annotations = page.getAnnotations(); // extract image locations List<Rectangle2D> imageRects = new ArrayList<Rectangle2D>(); imageLocationsListener.setImageRects(imageRects); imageLocationsListener.processPage(page); int im = 0; for (Rectangle2D pdImageRect : imageRects) { s.recordImage(writer, pageNum, "[im" + im + "]", (Rectangle2D.Float) pdImageRect); annotations.add(annotMaker.squareAnnotation(Color.YELLOW, (Rectangle2D.Float) pdImageRect, "[im" + im + "]")); im++; } PDFTextStripperByArea stripper = new PDFTextStripperByArea(); int j = 0; List<PDAnnotation> viableAnnots = new ArrayList(); for (PDAnnotation annot : annotations) { if (annot instanceof PDAnnotationTextMarkup || annot instanceof PDAnnotationLink) { stripper.addRegion(Integer.toString(j++), s.getAwtRect( s.adjustedRect(annot.getRectangle(), pA[0], pA[1], pA[2], pA[3]), cropBox)); viableAnnots.add(annot); } else if (annot instanceof PDAnnotationPopup || annot instanceof PDAnnotationText) { viableAnnots.add(annot); } } stripper.extractRegions(page); List<PDRectangle> rects = new ArrayList<PDRectangle>(); List<String> comments = new ArrayList<String>(); List<String> highlightTexts = new ArrayList<String>(); j = 0; for (PDAnnotation viableAnnot : viableAnnots) { if (viableAnnot instanceof PDAnnotationTextMarkup) { String highlightText = stripper.getTextForRegion(Integer.toString(j++)); String withoutCR = highlightText.replace((char) 0x0A, '^'); String comment = viableAnnot.getContents(); String colorString = String.format("%06x", viableAnnot.getColor().toRGB()); PDRectangle aRect = s.adjustedRect(viableAnnot.getRectangle(), pA[4], pA[5], pA[6], pA[7]); rects.add(aRect); comments.add(comment); highlightTexts.add(highlightText); s.recordTextMarkup(writer, pageNum, comment, withoutCR, aRect, colorString); } else if (viableAnnot instanceof PDAnnotationText) { String comment = viableAnnot.getContents(); String colorString = String.format("%06x", viableAnnot.getColor().toRGB()); for (Rectangle2D pdImageRect : imageRects) { if (pdImageRect.contains(viableAnnot.getRectangle().getLowerLeftX(), viableAnnot.getRectangle().getLowerLeftY())) { s.recordTextMarkup(writer, pageNum, comment, "", (Rectangle2D.Float) pdImageRect, colorString); annotations.add(annotMaker.squareAnnotation(Color.GREEN, (Rectangle2D.Float) pdImageRect, comment)); } ; } } } PDPageContentStream canvas = new PDPageContentStream(document, page, true, true, true); int i = 0; for (PDRectangle pdRect : rects) { String comment = comments.get(i); String highlightText = highlightTexts.get(i); //annotations.add(linkAnnotation(pdRect, comment, highlightText)); //annotations.add(annotationSquareCircle(pdRect, BLUE)); s.showBox(canvas, new Rectangle2D.Float(pdRect.getLowerLeftX(), pdRect.getUpperRightY(), pdRect.getWidth(), pdRect.getHeight()), cropBox, Color.BLUE); i++; } canvas.close(); } writer.close(); document.save(new_job); } finally { if (document != null) { document.close(); } } }
From source file:net.bookinaction.TextInfoExtractor.java
License:Apache License
public static void getTextPositionFromPage(PDDocument document, StripperParam stripperParam, int pageNum, PrintWriter writer, boolean testMode) throws IOException { //System.out.println(String.format("getPage: %d", pageNum)); PDPage page = document.getPage(pageNum - 1); // pdfbox uses the 0-base index PDRectangle cropBox = page.getCropBox(); // extract image locations ImageLocationListener imageLocationsListener = new ImageLocationListener(); List<Rectangle2D> imageRects = new ArrayList<Rectangle2D>(); imageLocationsListener.setImageRects(imageRects); imageLocationsListener.processPage(page); // extract Text locations StripString stripString = new StripString(); TextLocationListener stripper = new TextLocationListener(stripperParam, stripString); stripper.setSortByPosition(true);/*from w w w. ja v a 2s . com*/ List<StripLine> stripLines = new ArrayList<StripLine>(); stripper.setStartPage(pageNum); stripper.setEndPage(pageNum); try { stripper.writeText(document, new OutputStreamWriter(new ByteArrayOutputStream())); } catch (IOException e) { return; } if (page.getContents() != null) stripper.processPage(page); // declare canvas and keep this position PDPageContentStream canvas = new PDPageContentStream(document, page, true, true, true); Stamper s = new Stamper(); // utility class if (testMode) { // draw the bounding box of each character for (int i = 0; i < stripString.size(); i++) { // original Rectangle s.showBox(canvas, stripString.boundingRect(i), cropBox, Color.GRAY80); } } s.recordPageSize(writer, pageNum, cropBox); // splits into lines int lineNum = 1; int lineStart = 0, lineEnd = 0; String[] splits = stripString.toString().split("\r"); SimpleTokenizer simpleTokenizer = new SimpleTokenizer(); for (String lineText : splits) { if (lineText.length() < 1) continue; lineEnd = lineStart + lineText.length(); Rectangle2D mergedRect = stripString.boundingRect(lineStart, lineEnd - 1); String sub = stripString.substring(lineStart, lineEnd); stripLines.add(new StripLine(pageNum, lineNum, lineStart, lineEnd, mergedRect)); //System.out.println(String.format("%d-%d: %s - [%.0f %.0f %.0f %.0f]", pageNum, lineNum, sub, // mergedRect.getX(), mergedRect.getY(), mergedRect.getWidth(), mergedRect.getHeight())); if (testMode) { s.showBox(canvas, mergedRect, cropBox, Color.GREEN); } s.recordTextPosition(writer, sub, pageNum, mergedRect, "LINE"); /******* get words in the line *********/ List<Token> tokens = simpleTokenizer.getTokens(sub); for (String pattern : circles_patterns) { List<Token> symbolTokens = PatternAnalyzer.getTokensByPattern(sub, pattern); tokens.addAll(symbolTokens); } for (Token t : tokens) { mergedRect = stripString.boundingRect(lineStart + t.getStart(), lineStart + t.getEnd() - 1); //System.out.println(String.format("%d-%d: %s - [%.0f %.0f %.0f %.0f]", pageNum, lineNum, t.getStem(), mergedRect.getX(), mergedRect.getY(), mergedRect.getWidth(), mergedRect.getHeight())); s.recordTextPosition(writer, t.getStem(), pageNum, mergedRect, "TEXT"); if (testMode) { s.showBox(canvas, mergedRect, cropBox, Color.RED); } } lineStart += lineText.length() + 1; lineNum++; } // ------------------- // markup textMark annotation to the image int imageNum = 1; for (Rectangle2D imRect : imageRects) { //page.getAnnotations().add(annotationMaker.textMarkupAnnotation(Color.YELLOW, (Rectangle2D.Float) imRect, "image"+imageNum)); if (testMode) { s.showBox(canvas, imRect, cropBox, Color.YELLOW); } s.recordTextPosition(writer, "[image" + imageNum + "]", pageNum, imRect, "IMAGE"); imageNum++; } canvas.close(); }
From source file:org.apache.fop.render.pdf.pdfbox.PDFBoxAdapter.java
License:Apache License
/** * Creates a stream (from FOP's PDF library) from a PDF page parsed with PDFBox. * @param sourceDoc the source PDF the given page to be copied belongs to * @param page the page to transform into a stream * @param key value to use as key for the stream * @param atdoc adjustment for stream//ww w. j a va2 s. c o m * @param fontinfo fonts * @param pos rectangle * @return the stream * @throws IOException if an I/O error occurs */ public String createStreamFromPDFBoxPage(PDDocument sourceDoc, PDPage page, String key, AffineTransform atdoc, FontInfo fontinfo, Rectangle pos) throws IOException { handleAnnotations(sourceDoc, page, atdoc); if (pageNumbers.containsKey(targetPage.getPageIndex())) { pageNumbers.get(targetPage.getPageIndex()).set(0, targetPage.makeReference()); } PDResources sourcePageResources = page.getResources(); PDStream pdStream = getContents(page); COSDictionary fonts = (COSDictionary) sourcePageResources.getCOSObject().getDictionaryObject(COSName.FONT); COSDictionary fontsBackup = null; UniqueName uniqueName = new UniqueName(key, sourcePageResources); String newStream = null; if (fonts != null && pdfDoc.isMergeFontsEnabled()) { fontsBackup = new COSDictionary(fonts); MergeFontsPDFWriter m = new MergeFontsPDFWriter(fonts, fontinfo, uniqueName, parentFonts, currentMCID); newStream = m.writeText(pdStream); // if (newStream != null) { // for (Object f : fonts.keySet().toArray()) { // COSDictionary fontdata = (COSDictionary)fonts.getDictionaryObject((COSName)f); // if (getUniqueFontName(fontdata) != null) { // fonts.removeItem((COSName)f); // } // } // } } if (newStream == null) { PDFWriter writer = new PDFWriter(uniqueName, currentMCID); newStream = writer.writeText(pdStream); currentMCID = writer.getCurrentMCID(); } pdStream = new PDStream(sourceDoc, new ByteArrayInputStream(newStream.getBytes("ISO-8859-1"))); mergeXObj(sourcePageResources.getCOSObject(), fontinfo, uniqueName); PDFDictionary pageResources = (PDFDictionary) cloneForNewDocument(sourcePageResources.getCOSObject()); PDFDictionary fontDict = (PDFDictionary) pageResources.get("Font"); if (fontDict != null && pdfDoc.isMergeFontsEnabled()) { for (Map.Entry<String, Typeface> fontEntry : fontinfo.getUsedFonts().entrySet()) { Typeface font = fontEntry.getValue(); if (font instanceof FOPPDFFont) { FOPPDFFont pdfFont = (FOPPDFFont) font; if (pdfFont.getRef() == null) { pdfFont.setRef(new PDFDictionary()); pdfDoc.assignObjectNumber(pdfFont.getRef()); } fontDict.put(fontEntry.getKey(), pdfFont.getRef()); } } } updateXObj(sourcePageResources.getCOSObject(), pageResources); if (fontsBackup != null) { sourcePageResources.getCOSObject().setItem(COSName.FONT, fontsBackup); } COSStream originalPageContents = pdStream.getCOSObject(); bindOptionalContent(sourceDoc); PDFStream pageStream; Set filter; // if (originalPageContents instanceof COSStreamArray) { // COSStreamArray array = (COSStreamArray)originalPageContents; // pageStream = new PDFStream(); // InputStream in = array.getUnfilteredStream(); // OutputStream out = pageStream.getBufferOutputStream(); // IOUtils.copyLarge(in, out); // filter = FILTER_FILTER; // } else { pageStream = (PDFStream) cloneForNewDocument(originalPageContents); filter = Collections.EMPTY_SET; // } if (pageStream == null) { pageStream = new PDFStream(); } if (originalPageContents != null) { transferDict(originalPageContents, pageStream, filter); } transferPageDict(fonts, uniqueName, sourcePageResources); PDRectangle mediaBox = page.getMediaBox(); PDRectangle cropBox = page.getCropBox(); PDRectangle viewBox = cropBox != null ? cropBox : mediaBox; //Handle the /Rotation entry on the page dict int rotation = PDFUtil.getNormalizedRotation(page); //Transform to FOP's user space float w = (float) pos.getWidth() / 1000f; float h = (float) pos.getHeight() / 1000f; if (rotation == 90 || rotation == 270) { float tmp = w; w = h; h = tmp; } atdoc.setTransform(AffineTransform.getScaleInstance(w / viewBox.getWidth(), h / viewBox.getHeight())); atdoc.translate(0, viewBox.getHeight()); atdoc.rotate(-Math.PI); atdoc.scale(-1, 1); atdoc.translate(-viewBox.getLowerLeftX(), -viewBox.getLowerLeftY()); rotate(rotation, viewBox, atdoc); StringBuilder boxStr = new StringBuilder(); boxStr.append(PDFNumber.doubleOut(mediaBox.getLowerLeftX())).append(' ') .append(PDFNumber.doubleOut(mediaBox.getLowerLeftY())).append(' ') .append(PDFNumber.doubleOut(mediaBox.getWidth())).append(' ') .append(PDFNumber.doubleOut(mediaBox.getHeight())).append(" re W n\n"); return boxStr.toString() + IOUtils.toString(pdStream.createInputStream(null), "ISO-8859-1"); }
From source file:org.apache.fop.render.pdf.pdfbox.PDFBoxAdapter.java
License:Apache License
private void moveAnnotations(PDPage page, List pageAnnotations, AffineTransform at) { PDRectangle mediaBox = page.getMediaBox(); PDRectangle cropBox = page.getCropBox(); PDRectangle viewBox = cropBox != null ? cropBox : mediaBox; for (Object obj : pageAnnotations) { PDAnnotation annot = (PDAnnotation) obj; PDRectangle rect = annot.getRectangle(); float translateX = (float) (at.getTranslateX() - viewBox.getLowerLeftX()); float translateY = (float) (at.getTranslateY() - viewBox.getLowerLeftY()); if (rect != null) { rect.setUpperRightX(rect.getUpperRightX() + translateX); rect.setLowerLeftX(rect.getLowerLeftX() + translateX); rect.setUpperRightY(rect.getUpperRightY() + translateY); rect.setLowerLeftY(rect.getLowerLeftY() + translateY); annot.setRectangle(rect);//from ww w . j av a 2 s . c om } // COSArray vertices = (COSArray) annot.getCOSObject().getDictionaryObject("Vertices"); // if (vertices != null) { // Iterator iter = vertices.iterator(); // while (iter.hasNext()) { // COSFloat x = (COSFloat) iter.next(); // COSFloat y = (COSFloat) iter.next(); // x.setValue(x.floatValue() + translateX); // y.setValue(y.floatValue() + translateY); // } // } } }
From source file:org.apache.fop.render.pdf.pdfbox.PreloaderPDF.java
License:Apache License
private ImageInfo loadPDF(String uri, Source src, ImageContext context) throws IOException, ImageException { int selectedPage = ImageUtil.needPageIndexFromURI(uri); URI docURI = deriveDocumentURI(src.getSystemId()); PDDocument pddoc = getDocument(context, docURI, src); pddoc = Interceptors.getInstance().interceptOnLoad(pddoc, docURI); //Disable the warning about a missing close since we rely on the GC to decide when //the cached PDF shall be disposed off. pddoc.getDocument().setWarnMissingClose(false); int pageCount = pddoc.getNumberOfPages(); if (selectedPage < 0 || selectedPage >= pageCount) { throw new ImageException("Selected page (index: " + selectedPage + ") does not exist in the PDF file. The document has " + pddoc.getNumberOfPages() + " pages."); }/*ww w . j a v a2 s . com*/ PDPage page = pddoc.getDocumentCatalog().getPages().get(selectedPage); PDRectangle mediaBox = page.getMediaBox(); PDRectangle cropBox = page.getCropBox(); PDRectangle viewBox = cropBox != null ? cropBox : mediaBox; int w = Math.round(viewBox.getWidth() * 1000); int h = Math.round(viewBox.getHeight() * 1000); //Handle the /Rotation entry on the page dict int rotation = PDFUtil.getNormalizedRotation(page); if (rotation == 90 || rotation == 270) { //Swap width and height int exch = w; w = h; h = exch; } ImageSize size = new ImageSize(); size.setSizeInMillipoints(w, h); size.setResolution(context.getSourceResolution()); size.calcPixelsFromSize(); ImageInfo info = new ImageInfo(uri, ImagePDF.MIME_PDF); info.setSize(size); info.getCustomObjects().put(ImageInfo.ORIGINAL_IMAGE, new ImagePDF(info, pddoc)); int lastPageIndex = pddoc.getNumberOfPages() - 1; if (selectedPage < lastPageIndex) { info.getCustomObjects().put(ImageInfo.HAS_MORE_IMAGES, Boolean.TRUE); } return info; }
From source file:org.haplo.component.pdfbox.PDF.java
License:Mozilla Public License
/** * Open a PDF and read it's data. close() must be called to clean up nicely. *//* ww w .j a va2s. c o m*/ public PDF(String filename) throws IOException { if (!Operation.isThreadMarkedAsWorker()) { throw new RuntimeException("PDF manipulation can only be performed in a worker process"); } // Not valid by default isValid = false; // Try to load the page try { // Open the PDF for reading this.pdf = PDDocument.load(new File(filename)); this.numberOfPages = this.pdf.getNumberOfPages(); PDPage page = this.pdf.getPage(0); // Width and height PDRectangle cropBox = page.getCropBox(); width = (int) cropBox.getWidth(); height = (int) cropBox.getHeight(); isValid = true; } catch (Exception e) { // Ignore exception, but do clean up nicely close(); } }
From source file:org.haplo.component.pdfbox.PDF.java
License:Mozilla Public License
/** * Render the PDF as an image// w ww.jav a 2s . co m */ public void render(String outFilename, String outFormat, int page, int outWidth, int outHeight) throws IOException { BufferedImage img = null; try { PDPage pdfPage = this.pdf.getPage(page - 1); PDRectangle cropBox = pdfPage.getCropBox(); int pageWidth = (int) cropBox.getWidth(); int pageHeight = (int) cropBox.getHeight(); if (pageHeight <= 0) { pageHeight = 1; } int resolution = (96 * outHeight) / pageHeight; if (resolution < 4) { resolution = 4; } if (resolution > 1000) { resolution = 1000; } if (outHeight < 100 || outWidth < 100) { resolution *= 2; } PDFRenderer pdfRenderer = new PDFRenderer(this.pdf); img = pdfRenderer.renderImageWithDPI(page - 1, resolution, outFormat.equals("png") ? ImageType.ARGB : ImageType.RGB); } catch (Exception e) { Logger.getLogger("org.haplo.app").error("Error rendering PDF: " + e.toString()); throw new RuntimeException("Couldn't render PDF page", e); } // Did it convert? (most likely cause of null return is requested a page which didn't exist) if (img == null) { throw new RuntimeException("Failed to render PDF - did the requested page exist?"); } // Scale the image to the right size BufferedImage original = null; if (img.getWidth() != outWidth || img.getHeight() != outHeight) { original = img; Image scaled = img.getScaledInstance(outWidth, outHeight, Image.SCALE_SMOOTH); img = new BufferedImage(outWidth, outHeight, original.getType()); Graphics2D graphics = img.createGraphics(); graphics.setBackground(Color.WHITE); graphics.clearRect(0, 0, outWidth, outHeight); graphics.drawImage(scaled, 0, 0, null); graphics.dispose(); scaled.flush(); } // Write the image to a file ImageIO.write(img, outFormat, new File(outFilename)); // Free resources img.flush(); if (original != null) { original.flush(); } }