Example usage for com.itextpdf.text Image getDpiX

Introduction

In this page you can find the example usage for com.itextpdf.text Image getDpiX.

Prototype

public int getDpiX()

Source Link

Document

Gets the dots-per-inch in the X direction.

Usage

From source file:com.ephesoft.dcma.imagemagick.MultiPageExecutor.java

License:Open Source License

/**
 * The <code>addImageToPdf</code> method is used to add image to pdf and make it searchable by adding image text in invisible mode
 * w.r.t parameter 'isPdfSearchable' passed.
 * /*from  w  ww  . j  a va2 s .c  o m*/
 * @param pdfWriter {@link PdfWriter} writer of pdf in which image has to be added
 * @param htmlUrl {@link HocrPage} corresponding html file for fetching text and coordinates
 * @param imageUrl {@link String} url of image to be added in pdf
 * @param isPdfSearchable true for searchable pdf else otherwise
 * @param widthOfLine
 */
private void addImageToPdf(PdfWriter pdfWriter, HocrPage hocrPage, String imageUrl, boolean isPdfSearchable,
        final int widthOfLine) {
    if (null != pdfWriter && null != imageUrl && imageUrl.length() > 0) {
        try {
            LOGGER.info("Adding image" + imageUrl + " to pdf using iText");
            Image pageImage = Image.getInstance(imageUrl);
            float dotsPerPointX = pageImage.getDpiX() / PDF_RESOLUTION;
            float dotsPerPointY = pageImage.getDpiY() / PDF_RESOLUTION;
            PdfContentByte pdfContentByte = pdfWriter.getDirectContent();

            pageImage.scaleToFit(pageImage.getWidth() / dotsPerPointX, pageImage.getHeight() / dotsPerPointY);

            pageImage.setAbsolutePosition(0, 0);

            // Add image to pdf
            pdfWriter.getDirectContentUnder().addImage(pageImage);
            pdfWriter.getDirectContentUnder().add(pdfContentByte);

            // If pdf is to be made searchable
            if (isPdfSearchable) {
                LOGGER.info("Adding invisible text for image: " + imageUrl);
                float pageImagePixelHeight = pageImage.getHeight();
                Font defaultFont = FontFactory.getFont(FontFactory.HELVETICA, 8, Font.BOLD, CMYKColor.BLACK);

                // Fetch text and coordinates for image to be added
                Map<String, int[]> textCoordinatesMap = getTextWithCoordinatesMap(hocrPage, widthOfLine);
                Set<String> ketSet = textCoordinatesMap.keySet();

                // Add text at specific location
                for (String key : ketSet) {
                    int[] coordinates = textCoordinatesMap.get(key);
                    float bboxWidthPt = (coordinates[2] - coordinates[0]) / dotsPerPointX;
                    float bboxHeightPt = (coordinates[3] - coordinates[1]) / dotsPerPointY;
                    pdfContentByte.beginText();

                    // To make text added as invisible
                    pdfContentByte.setTextRenderingMode(PdfContentByte.TEXT_RENDER_MODE_INVISIBLE);
                    pdfContentByte.setLineWidth(Math.round(bboxWidthPt));

                    // Ceil is used so that minimum font of any text is 1
                    // For exception of unbalanced beginText() and endText()
                    if (bboxHeightPt > 0.0) {
                        pdfContentByte.setFontAndSize(defaultFont.getBaseFont(),
                                (float) Math.ceil(bboxHeightPt));
                    } else {
                        pdfContentByte.setFontAndSize(defaultFont.getBaseFont(), 1);
                    }
                    float xCoordinate = (float) (coordinates[0] / dotsPerPointX);
                    float yCoordinate = (float) ((pageImagePixelHeight - coordinates[3]) / dotsPerPointY);
                    pdfContentByte.moveText(xCoordinate, yCoordinate);
                    pdfContentByte.showText(key);
                    pdfContentByte.endText();
                }
            }
            pdfContentByte.closePath();
        } catch (BadElementException badElementException) {
            LOGGER.error("Error occurred while adding image" + imageUrl + " to pdf using Itext: "
                    + badElementException.toString());
        } catch (DocumentException documentException) {
            LOGGER.error("Error occurred while adding image" + imageUrl + " to pdf using Itext: "
                    + documentException.toString());
        } catch (MalformedURLException malformedURLException) {
            LOGGER.error("Error occurred while adding image" + imageUrl + " to pdf using Itext: "
                    + malformedURLException.toString());
        } catch (IOException ioException) {
            LOGGER.error("Error occurred while adding image" + imageUrl + " to pdf using Itext: "
                    + ioException.toString());
        }
    }
}

From source file:pdfextract.ExtractInfo.java

public void extractImagesInfo() {
    try {//from   w ww . j  a  v  a 2 s. c  o  m
        PdfReader chartReader = new PdfReader("vv.pdf");
        for (int i = 0; i < chartReader.getXrefSize(); i++) {
            PdfObject pdfobj = chartReader.getPdfObject(i);
            if (pdfobj != null && pdfobj.isStream()) {
                PdfStream stream = (PdfStream) pdfobj;
                PdfObject pdfsubtype = stream.get(PdfName.SUBTYPE);
                //System.out.println("Stream subType: " + pdfsubtype); 
                if (pdfsubtype != null && pdfsubtype.toString().equals(PdfName.IMAGE.toString())) {
                    byte[] image = PdfReader.getStreamBytesRaw((PRStream) stream);
                    Image imageObject = Image.getInstance(image);
                    System.out.println("Resolution" + imageObject.getDpiX());
                    System.out.println("Height" + imageObject.getHeight());
                    System.out.println("Width" + imageObject.getWidth());

                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

}

From source file:pl.marcinmilkowski.hocrtopdf.Main.java

License:Open Source License

/**
 * @param args//from w  w  w .  j a va 2  s .c om
 */
public static void main(String[] args) {
    try {
        if (args.length < 1 || args[0] == "--help" || args[0] == "-h") {
            System.out.print("Usage: java pl.marcinmilkowski.hocrtopdf.Main INPUTURL.html OUTPUTURL.pdf\n"
                    + "\n" + "Converts hOCR files into PDF\n" + "\n"
                    + "Example: java pl.marcinmilkowski.hocrtopdf.Main hocr.html output.pdf\n");
            if (args.length < 1)
                System.exit(-1);
            else
                System.exit(0);
        }
        URL inputHOCRFile = null;
        FileOutputStream outputPDFStream = null;
        try {
            File file = new File(args[0]);
            inputHOCRFile = file.toURI().toURL();
        } catch (MalformedURLException e) {
            System.out.println("The first parameter has to be a valid file.");
            System.out.println("We got an error: " + e.getMessage());
            System.exit(-1);
        }
        try {
            outputPDFStream = new FileOutputStream(args[1]);
        } catch (FileNotFoundException e) {
            System.out.println("The second parameter has to be a valid URL");
            System.exit(-1);
        }

        // The resolution of a PDF file (using iText) is 72pt per inch
        float pointsPerInch = 72.0f;

        // Using the jericho library to parse the HTML file
        Source source = new Source(inputHOCRFile);

        int pageCounter = 1;

        Document pdfDocument = null;
        PdfWriter pdfWriter = null;
        PdfContentByte cb = null;
        RandomAccessFileOrArray ra = null;

        // Find the tag of class ocr_page in order to load the scanned image
        StartTag pageTag = source.getNextStartTag(0, "class", OCRPAGE);
        while (pageTag != null) {
            int prevPos = pageTag.getEnd();
            Pattern imagePattern = Pattern.compile("image\\s+([^;]+)");
            Matcher imageMatcher = imagePattern.matcher(pageTag.getElement().getAttributeValue("title"));
            if (!imageMatcher.find()) {
                System.out.println("Could not find a tag of class \"ocr_page\", aborting.");
                System.exit(-1);
            }
            // Load the image
            Image pageImage = null;
            try {
                File file = new File(imageMatcher.group(1));
                pageImage = Image.getInstance(file.toURI().toURL());
            } catch (MalformedURLException e) {
                System.out.println("Could not load the scanned image from: " + "file://" + imageMatcher.group(1)
                        + ", aborting.");
                System.exit(-1);
            }
            if (pageImage.getOriginalType() == Image.ORIGINAL_TIFF) { // this might
                                                                      // be
                                                                      // multipage
                                                                      // tiff!
                File file = new File(imageMatcher.group(1));
                if (pageCounter == 1 || ra == null) {
                    ra = new RandomAccessFileOrArray(file.toURI().toURL());
                }
                int nPages = TiffImage.getNumberOfPages(ra);
                if (nPages > 0 && pageCounter <= nPages) {
                    pageImage = TiffImage.getTiffImage(ra, pageCounter);
                }
            }
            int dpiX = pageImage.getDpiX();
            if (dpiX == 0) { // for images that don't set the resolution we assume
                             // 300 dpi
                dpiX = 300;
            }
            int dpiY = pageImage.getDpiY();
            if (dpiY == 0) { // as above for dpiX
                dpiY = 300;
            }
            float dotsPerPointX = dpiX / pointsPerInch;
            float dotsPerPointY = dpiY / pointsPerInch;
            float pageImagePixelHeight = pageImage.getHeight();
            if (pdfDocument == null) {
                pdfDocument = new Document(new Rectangle(pageImage.getWidth() / dotsPerPointX,
                        pageImage.getHeight() / dotsPerPointY));
                pdfWriter = PdfWriter.getInstance(pdfDocument, outputPDFStream);
                pdfDocument.open();
                // Put the text behind the picture (reverse for debugging)
                // cb = pdfWriter.getDirectContentUnder();
                cb = pdfWriter.getDirectContent();
            } else {
                pdfDocument.setPageSize(new Rectangle(pageImage.getWidth() / dotsPerPointX,
                        pageImage.getHeight() / dotsPerPointY));
                pdfDocument.newPage();
            }
            // first define a standard font for our text
            BaseFont base = BaseFont.createFont(BaseFont.HELVETICA, BaseFont.CP1250, BaseFont.EMBEDDED);
            Font defaultFont = new Font(base, 8);
            // FontFactory.getFont(FontFactory.HELVETICA, 8, Font.BOLD,
            // CMYKColor.BLACK);

            cb.setHorizontalScaling(1.0f);

            pageImage.scaleToFit(pageImage.getWidth() / dotsPerPointX, pageImage.getHeight() / dotsPerPointY);
            pageImage.setAbsolutePosition(0, 0);
            // Put the image in front of the text (reverse for debugging)
            // pdfWriter.getDirectContent().addImage(pageImage);
            pdfWriter.getDirectContentUnder().addImage(pageImage);

            // In order to place text behind the recognised text snippets we are
            // interested in the bbox property
            Pattern bboxPattern = Pattern.compile("bbox(\\s+\\d+){4}");
            // This pattern separates the coordinates of the bbox property
            Pattern bboxCoordinatePattern = Pattern.compile("(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)");
            // Only tags of the ocr_line class are interesting
            StartTag ocrTag = source.getNextStartTag(prevPos, "class", OCRPAGEORLINE);
            while (ocrTag != null) {
                prevPos = ocrTag.getEnd();
                if ("ocrx_word".equalsIgnoreCase(ocrTag.getAttributeValue("class"))) {
                    net.htmlparser.jericho.Element lineElement = ocrTag.getElement();
                    Matcher bboxMatcher = bboxPattern.matcher(lineElement.getAttributeValue("title"));
                    if (bboxMatcher.find()) {
                        // We found a tag of the ocr_line class containing a bbox property
                        Matcher bboxCoordinateMatcher = bboxCoordinatePattern.matcher(bboxMatcher.group());
                        bboxCoordinateMatcher.find();
                        int[] coordinates = { Integer.parseInt((bboxCoordinateMatcher.group(1))),
                                Integer.parseInt((bboxCoordinateMatcher.group(2))),
                                Integer.parseInt((bboxCoordinateMatcher.group(3))),
                                Integer.parseInt((bboxCoordinateMatcher.group(4))) };
                        String line = lineElement.getContent().getTextExtractor().toString();
                        float bboxWidthPt = (coordinates[2] - coordinates[0]) / dotsPerPointX;
                        float bboxHeightPt = (coordinates[3] - coordinates[1]) / dotsPerPointY;

                        // Put the text into the PDF
                        cb.beginText();
                        // Comment the next line to debug the PDF output (visible Text)
                        cb.setTextRenderingMode(PdfContentByte.TEXT_RENDER_MODE_INVISIBLE);
                        // height
                        cb.setFontAndSize(defaultFont.getBaseFont(), Math.max(Math.round(bboxHeightPt), 1));
                        // width
                        cb.setHorizontalScaling(bboxWidthPt / cb.getEffectiveStringWidth(line, false));
                        cb.moveText((coordinates[0] / dotsPerPointX),
                                ((pageImagePixelHeight - coordinates[3]) / dotsPerPointY));
                        cb.showText(line);
                        cb.endText();
                        cb.setHorizontalScaling(1.0f);
                    }
                } else {
                    if ("ocr_page".equalsIgnoreCase(ocrTag.getAttributeValue("class"))) {
                        pageCounter++;
                        pageTag = ocrTag;
                        break;
                    }
                }
                ocrTag = source.getNextStartTag(prevPos, "class", OCRPAGEORLINE);
            }
            if (ocrTag == null) {
                pdfDocument.close();
                break;
            }
        }
    } catch (DocumentException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}