List of usage examples for com.itextpdf.text Image getDpiX
public int getDpiX()
From source file:com.ephesoft.dcma.imagemagick.MultiPageExecutor.java
License:Open Source License
/** * The <code>addImageToPdf</code> method is used to add image to pdf and make it searchable by adding image text in invisible mode * w.r.t parameter 'isPdfSearchable' passed. * /*from w ww . j a va2 s .c o m*/ * @param pdfWriter {@link PdfWriter} writer of pdf in which image has to be added * @param htmlUrl {@link HocrPage} corresponding html file for fetching text and coordinates * @param imageUrl {@link String} url of image to be added in pdf * @param isPdfSearchable true for searchable pdf else otherwise * @param widthOfLine */ private void addImageToPdf(PdfWriter pdfWriter, HocrPage hocrPage, String imageUrl, boolean isPdfSearchable, final int widthOfLine) { if (null != pdfWriter && null != imageUrl && imageUrl.length() > 0) { try { LOGGER.info("Adding image" + imageUrl + " to pdf using iText"); Image pageImage = Image.getInstance(imageUrl); float dotsPerPointX = pageImage.getDpiX() / PDF_RESOLUTION; float dotsPerPointY = pageImage.getDpiY() / PDF_RESOLUTION; PdfContentByte pdfContentByte = pdfWriter.getDirectContent(); pageImage.scaleToFit(pageImage.getWidth() / dotsPerPointX, pageImage.getHeight() / dotsPerPointY); pageImage.setAbsolutePosition(0, 0); // Add image to pdf pdfWriter.getDirectContentUnder().addImage(pageImage); pdfWriter.getDirectContentUnder().add(pdfContentByte); // If pdf is to be made searchable if (isPdfSearchable) { LOGGER.info("Adding invisible text for image: " + imageUrl); float pageImagePixelHeight = pageImage.getHeight(); Font defaultFont = FontFactory.getFont(FontFactory.HELVETICA, 8, Font.BOLD, CMYKColor.BLACK); // Fetch text and coordinates for image to be added Map<String, int[]> textCoordinatesMap = getTextWithCoordinatesMap(hocrPage, widthOfLine); Set<String> ketSet = textCoordinatesMap.keySet(); // Add text at specific location for (String key : ketSet) { int[] coordinates = textCoordinatesMap.get(key); float bboxWidthPt = (coordinates[2] - coordinates[0]) / dotsPerPointX; float bboxHeightPt = (coordinates[3] - coordinates[1]) / dotsPerPointY; pdfContentByte.beginText(); // To make text added as invisible pdfContentByte.setTextRenderingMode(PdfContentByte.TEXT_RENDER_MODE_INVISIBLE); pdfContentByte.setLineWidth(Math.round(bboxWidthPt)); // Ceil is used so that minimum font of any text is 1 // For exception of unbalanced beginText() and endText() if (bboxHeightPt > 0.0) { pdfContentByte.setFontAndSize(defaultFont.getBaseFont(), (float) Math.ceil(bboxHeightPt)); } else { pdfContentByte.setFontAndSize(defaultFont.getBaseFont(), 1); } float xCoordinate = (float) (coordinates[0] / dotsPerPointX); float yCoordinate = (float) ((pageImagePixelHeight - coordinates[3]) / dotsPerPointY); pdfContentByte.moveText(xCoordinate, yCoordinate); pdfContentByte.showText(key); pdfContentByte.endText(); } } pdfContentByte.closePath(); } catch (BadElementException badElementException) { LOGGER.error("Error occurred while adding image" + imageUrl + " to pdf using Itext: " + badElementException.toString()); } catch (DocumentException documentException) { LOGGER.error("Error occurred while adding image" + imageUrl + " to pdf using Itext: " + documentException.toString()); } catch (MalformedURLException malformedURLException) { LOGGER.error("Error occurred while adding image" + imageUrl + " to pdf using Itext: " + malformedURLException.toString()); } catch (IOException ioException) { LOGGER.error("Error occurred while adding image" + imageUrl + " to pdf using Itext: " + ioException.toString()); } } }
From source file:pdfextract.ExtractInfo.java
public void extractImagesInfo() { try {//from w ww . j a v a 2 s. c o m PdfReader chartReader = new PdfReader("vv.pdf"); for (int i = 0; i < chartReader.getXrefSize(); i++) { PdfObject pdfobj = chartReader.getPdfObject(i); if (pdfobj != null && pdfobj.isStream()) { PdfStream stream = (PdfStream) pdfobj; PdfObject pdfsubtype = stream.get(PdfName.SUBTYPE); //System.out.println("Stream subType: " + pdfsubtype); if (pdfsubtype != null && pdfsubtype.toString().equals(PdfName.IMAGE.toString())) { byte[] image = PdfReader.getStreamBytesRaw((PRStream) stream); Image imageObject = Image.getInstance(image); System.out.println("Resolution" + imageObject.getDpiX()); System.out.println("Height" + imageObject.getHeight()); System.out.println("Width" + imageObject.getWidth()); } } } } catch (Exception e) { e.printStackTrace(); } }
From source file:pl.marcinmilkowski.hocrtopdf.Main.java
License:Open Source License
/** * @param args//from w w w . j a va 2 s .c om */ public static void main(String[] args) { try { if (args.length < 1 || args[0] == "--help" || args[0] == "-h") { System.out.print("Usage: java pl.marcinmilkowski.hocrtopdf.Main INPUTURL.html OUTPUTURL.pdf\n" + "\n" + "Converts hOCR files into PDF\n" + "\n" + "Example: java pl.marcinmilkowski.hocrtopdf.Main hocr.html output.pdf\n"); if (args.length < 1) System.exit(-1); else System.exit(0); } URL inputHOCRFile = null; FileOutputStream outputPDFStream = null; try { File file = new File(args[0]); inputHOCRFile = file.toURI().toURL(); } catch (MalformedURLException e) { System.out.println("The first parameter has to be a valid file."); System.out.println("We got an error: " + e.getMessage()); System.exit(-1); } try { outputPDFStream = new FileOutputStream(args[1]); } catch (FileNotFoundException e) { System.out.println("The second parameter has to be a valid URL"); System.exit(-1); } // The resolution of a PDF file (using iText) is 72pt per inch float pointsPerInch = 72.0f; // Using the jericho library to parse the HTML file Source source = new Source(inputHOCRFile); int pageCounter = 1; Document pdfDocument = null; PdfWriter pdfWriter = null; PdfContentByte cb = null; RandomAccessFileOrArray ra = null; // Find the tag of class ocr_page in order to load the scanned image StartTag pageTag = source.getNextStartTag(0, "class", OCRPAGE); while (pageTag != null) { int prevPos = pageTag.getEnd(); Pattern imagePattern = Pattern.compile("image\\s+([^;]+)"); Matcher imageMatcher = imagePattern.matcher(pageTag.getElement().getAttributeValue("title")); if (!imageMatcher.find()) { System.out.println("Could not find a tag of class \"ocr_page\", aborting."); System.exit(-1); } // Load the image Image pageImage = null; try { File file = new File(imageMatcher.group(1)); pageImage = Image.getInstance(file.toURI().toURL()); } catch (MalformedURLException e) { System.out.println("Could not load the scanned image from: " + "file://" + imageMatcher.group(1) + ", aborting."); System.exit(-1); } if (pageImage.getOriginalType() == Image.ORIGINAL_TIFF) { // this might // be // multipage // tiff! File file = new File(imageMatcher.group(1)); if (pageCounter == 1 || ra == null) { ra = new RandomAccessFileOrArray(file.toURI().toURL()); } int nPages = TiffImage.getNumberOfPages(ra); if (nPages > 0 && pageCounter <= nPages) { pageImage = TiffImage.getTiffImage(ra, pageCounter); } } int dpiX = pageImage.getDpiX(); if (dpiX == 0) { // for images that don't set the resolution we assume // 300 dpi dpiX = 300; } int dpiY = pageImage.getDpiY(); if (dpiY == 0) { // as above for dpiX dpiY = 300; } float dotsPerPointX = dpiX / pointsPerInch; float dotsPerPointY = dpiY / pointsPerInch; float pageImagePixelHeight = pageImage.getHeight(); if (pdfDocument == null) { pdfDocument = new Document(new Rectangle(pageImage.getWidth() / dotsPerPointX, pageImage.getHeight() / dotsPerPointY)); pdfWriter = PdfWriter.getInstance(pdfDocument, outputPDFStream); pdfDocument.open(); // Put the text behind the picture (reverse for debugging) // cb = pdfWriter.getDirectContentUnder(); cb = pdfWriter.getDirectContent(); } else { pdfDocument.setPageSize(new Rectangle(pageImage.getWidth() / dotsPerPointX, pageImage.getHeight() / dotsPerPointY)); pdfDocument.newPage(); } // first define a standard font for our text BaseFont base = BaseFont.createFont(BaseFont.HELVETICA, BaseFont.CP1250, BaseFont.EMBEDDED); Font defaultFont = new Font(base, 8); // FontFactory.getFont(FontFactory.HELVETICA, 8, Font.BOLD, // CMYKColor.BLACK); cb.setHorizontalScaling(1.0f); pageImage.scaleToFit(pageImage.getWidth() / dotsPerPointX, pageImage.getHeight() / dotsPerPointY); pageImage.setAbsolutePosition(0, 0); // Put the image in front of the text (reverse for debugging) // pdfWriter.getDirectContent().addImage(pageImage); pdfWriter.getDirectContentUnder().addImage(pageImage); // In order to place text behind the recognised text snippets we are // interested in the bbox property Pattern bboxPattern = Pattern.compile("bbox(\\s+\\d+){4}"); // This pattern separates the coordinates of the bbox property Pattern bboxCoordinatePattern = Pattern.compile("(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)"); // Only tags of the ocr_line class are interesting StartTag ocrTag = source.getNextStartTag(prevPos, "class", OCRPAGEORLINE); while (ocrTag != null) { prevPos = ocrTag.getEnd(); if ("ocrx_word".equalsIgnoreCase(ocrTag.getAttributeValue("class"))) { net.htmlparser.jericho.Element lineElement = ocrTag.getElement(); Matcher bboxMatcher = bboxPattern.matcher(lineElement.getAttributeValue("title")); if (bboxMatcher.find()) { // We found a tag of the ocr_line class containing a bbox property Matcher bboxCoordinateMatcher = bboxCoordinatePattern.matcher(bboxMatcher.group()); bboxCoordinateMatcher.find(); int[] coordinates = { Integer.parseInt((bboxCoordinateMatcher.group(1))), Integer.parseInt((bboxCoordinateMatcher.group(2))), Integer.parseInt((bboxCoordinateMatcher.group(3))), Integer.parseInt((bboxCoordinateMatcher.group(4))) }; String line = lineElement.getContent().getTextExtractor().toString(); float bboxWidthPt = (coordinates[2] - coordinates[0]) / dotsPerPointX; float bboxHeightPt = (coordinates[3] - coordinates[1]) / dotsPerPointY; // Put the text into the PDF cb.beginText(); // Comment the next line to debug the PDF output (visible Text) cb.setTextRenderingMode(PdfContentByte.TEXT_RENDER_MODE_INVISIBLE); // height cb.setFontAndSize(defaultFont.getBaseFont(), Math.max(Math.round(bboxHeightPt), 1)); // width cb.setHorizontalScaling(bboxWidthPt / cb.getEffectiveStringWidth(line, false)); cb.moveText((coordinates[0] / dotsPerPointX), ((pageImagePixelHeight - coordinates[3]) / dotsPerPointY)); cb.showText(line); cb.endText(); cb.setHorizontalScaling(1.0f); } } else { if ("ocr_page".equalsIgnoreCase(ocrTag.getAttributeValue("class"))) { pageCounter++; pageTag = ocrTag; break; } } ocrTag = source.getNextStartTag(prevPos, "class", OCRPAGEORLINE); } if (ocrTag == null) { pdfDocument.close(); break; } } } catch (DocumentException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }