List of usage examples for org.apache.pdfbox.rendering PDFRenderer renderImage
public BufferedImage renderImage(int pageIndex, float scale) throws IOException
From source file:com.apache.pdfbox.ocr.tesseract.BadScan.java
License:Apache License
@Test public void textBadScan() { try {//from w w w .ja va2 s . com PDDocument document = PDDocument.load(new File("src/test/resources/samples/scansmpl.pdf")); PDFRenderer renderer = new PDFRenderer(document); BufferedImage image = renderer.renderImage(0, 3); TessBaseAPI api = new TessBaseAPI(); boolean init = api.init("eng"); api.setBufferedImage(image); String text = api.getUTF8Text(); System.out.println(text); api.end(); assertEquals(init, true); document.close(); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.apache.pdfbox.ocr.tesseract.TessBaseAPITest.java
License:Apache License
@Test public void textBadScan() { try {/*from ww w.j a va2s . co m*/ PDDocument document = PDDocument.load(new File("src/test/resources/samples/scansmpl.pdf")); PDFRenderer renderer = new PDFRenderer(document); BufferedImage image = renderer.renderImage(0, 3); TessBaseAPI api = new TessBaseAPI(); boolean init = api.init("eng"); api.setBufferedImage(image); String text = api.getUTF8Text(); System.out.println(text); api.end(); assertEquals(true, true); document.close(); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.apache.pdfbox.ocr.tesseract.TessBaseAPITest.java
License:Apache License
@Test public void testBufferedImage() { try {//from w w w . java 2 s . co m PDDocument document = PDDocument.load(new File("src/test/resources/samples/pdf1.pdf")); PDFRenderer renderer = new PDFRenderer(document); BufferedImage image = renderer.renderImage(0, 3); TessBaseAPI api = new TessBaseAPI(); boolean init = api.init("eng"); api.setBufferedImage(image); String text = api.getUTF8Text(); System.out.println(text); api.end(); assertEquals(init, true); document.close(); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.apache.pdfbox.ocr.tesseract.TessBaseAPITest.java
License:Apache License
@Test public void testByteStream() { try {/*from www . jav a2 s.co m*/ PDDocument document = PDDocument.load(new File("src/test/resources/samples/pdf2.pdf")); PDFRenderer renderer = new PDFRenderer(document); BufferedImage image = renderer.renderImage(0, 3); int width = image.getWidth(); int height = image.getHeight(); int bpp = 3; int bpl = width * 3; TessBaseAPI api = new TessBaseAPI(); byte data[] = api.getByteStream(image); boolean init = api.init("eng"); api.setImage(data, width, height, bpp, bpl); String text = api.getUTF8Text(); System.out.println(text); api.end(); assertEquals(init, true); document.close(); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.apache.pdfbox.ocr.tesseract.TessBaseAPITest.java
License:Apache License
@Test public void testIterator() { try {/*from w w w . jav a 2 s.c om*/ PDDocument document = PDDocument.load(new File("src/test/resources/samples/pdf3.pdf")); PDFRenderer renderer = new PDFRenderer(document); BufferedImage image = renderer.renderImage(0, 3); TessBaseAPI api = new TessBaseAPI(); boolean init = api.init("eng"); api.setBufferedImage(image); api.getResultIterator(); if (api.isResultIteratorAvailable()) { do { System.out.println(api.getWord().trim()); String result = api.getBoundingBox(); System.out.println(result); } while (api.resultIteratorNext()); } api.end(); assertEquals(init, true); document.close(); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.formkiq.core.service.conversion.PdfToPngFormatConverter.java
License:Apache License
@Override public ConversionResult convert(final Object data, final WorkflowOutputDocumentType inputType, final WorkflowOutputDocumentType outputType) throws IOException { PDDocument doc = (PDDocument) data;/*from w ww . j av a 2s . co m*/ PDFRenderer pdfRenderer = new PDFRenderer(doc); BufferedImage[] images = new BufferedImage[doc.getNumberOfPages()]; for (int page = 0; page < doc.getNumberOfPages(); ++page) { BufferedImage bim = pdfRenderer.renderImage(page, SCALE); images[page] = bim; } ConversionResult result = merge(images); List<ConversionField> fields = findSigningButtons(doc, result); result.setFields(fields); return result; }
From source file:com.yiyihealth.tools.test.DrawPrintTextLocations.java
License:Apache License
private void stripPage(int page) throws IOException { PDFRenderer pdfRenderer = new PDFRenderer(document); image = pdfRenderer.renderImage(page, SCALE); PDPage pdPage = document.getPage(page); PDRectangle cropBox = pdPage.getCropBox(); // flip y-axis flipAT = new AffineTransform(); flipAT.translate(0, pdPage.getBBox().getHeight()); flipAT.scale(1, -1);//from www .j a va 2 s . co m // page may be rotated rotateAT = new AffineTransform(); int rotation = pdPage.getRotation(); if (rotation != 0) { PDRectangle mediaBox = pdPage.getMediaBox(); switch (rotation) { case 90: rotateAT.translate(mediaBox.getHeight(), 0); break; case 270: rotateAT.translate(0, mediaBox.getWidth()); break; case 180: rotateAT.translate(mediaBox.getWidth(), mediaBox.getHeight()); break; default: break; } rotateAT.rotate(Math.toRadians(rotation)); } g2d = image.createGraphics(); g2d.setStroke(new BasicStroke(0.1f)); g2d.scale(SCALE, SCALE); setStartPage(page + 1); setEndPage(page + 1); Writer dummy = new OutputStreamWriter(new ByteArrayOutputStream()); writeText(document, dummy); // beads in green g2d.setStroke(new BasicStroke(0.4f)); List<PDThreadBead> pageArticles = pdPage.getThreadBeads(); for (PDThreadBead bead : pageArticles) { PDRectangle r = bead.getRectangle(); GeneralPath p = r .transform(Matrix.getTranslateInstance(-cropBox.getLowerLeftX(), cropBox.getLowerLeftY())); Shape s = flipAT.createTransformedShape(p); s = rotateAT.createTransformedShape(s); g2d.setColor(Color.green); g2d.draw(s); } g2d.dispose(); String imageFilename = filename; int pt = imageFilename.lastIndexOf('.'); imageFilename = imageFilename.substring(0, pt) + "-marked-" + (page + 1) + ".png"; ImageIO.write(image, "png", new File(imageFilename)); }