Example usage for org.apache.pdfbox.rendering PDFRenderer renderImage

List of usage examples for org.apache.pdfbox.rendering PDFRenderer renderImage

Introduction

In this page you can find the example usage for org.apache.pdfbox.rendering PDFRenderer renderImage.

Prototype

public BufferedImage renderImage(int pageIndex, float scale, ImageType imageType) throws IOException 

Source Link

Document

Returns the given page as an RGB or ARGB image at the given scale.

Usage

From source file:org.apache.tika.parser.pdf.AbstractPDF2XHTML.java

License:Apache License

void doOCROnCurrentPage() throws IOException, TikaException, SAXException {
    if (config.getOcrStrategy().equals(NO_OCR)) {
        return;/*w  w w  .  j a va  2 s. co m*/
    }
    TesseractOCRConfig tesseractConfig = context.get(TesseractOCRConfig.class, DEFAULT_TESSERACT_CONFIG);

    TesseractOCRParser tesseractOCRParser = new TesseractOCRParser();
    if (!tesseractOCRParser.hasTesseract(tesseractConfig)) {
        throw new TikaException("Tesseract is not available. "
                + "Please set the OCR_STRATEGY to NO_OCR or configure Tesseract correctly");
    }

    PDFRenderer renderer = new PDFRenderer(pdDocument);
    TemporaryResources tmp = new TemporaryResources();
    try {
        BufferedImage image = renderer.renderImage(pageIndex, 2.0f, config.getOcrImageType());
        Path tmpFile = tmp.createTempFile();
        try (OutputStream os = Files.newOutputStream(tmpFile)) {
            //TODO: get output format from TesseractConfig
            /*ImageIOUtil.writeImage(image, config.getOcrImageFormatName(),
                os, config.getOcrDPI());*/
        }
        try (InputStream is = TikaInputStream.get(tmpFile)) {
            tesseractOCRParser.parseInline(is, xhtml, tesseractConfig);
        }
    } catch (IOException e) {
        handleCatchableIOE(e);
    } catch (SAXException e) {
        throw new IOExceptionWithCause("error writing OCR content from PDF", e);
    } finally {
        tmp.dispose();
    }
}

From source file:ve.zoonosis.utils.PDFCreator.java

License:Apache License

public BufferedImage getImagePage(int index) throws IOException {
    contentStream.close();/*from w  ww  .  j a v a 2 s. co  m*/
    PDFRenderer pdfRenderer = new PDFRenderer(document);
    return pdfRenderer.renderImage(index, 1f, ImageType.RGB);
}