List of usage examples for com.itextpdf.text Image getHeight
public float getHeight()
From source file:pdfextract.ExtractInfo.java
public void extractImagesInfo() { try {/*from www . j av a 2 s . com*/ PdfReader chartReader = new PdfReader("vv.pdf"); for (int i = 0; i < chartReader.getXrefSize(); i++) { PdfObject pdfobj = chartReader.getPdfObject(i); if (pdfobj != null && pdfobj.isStream()) { PdfStream stream = (PdfStream) pdfobj; PdfObject pdfsubtype = stream.get(PdfName.SUBTYPE); //System.out.println("Stream subType: " + pdfsubtype); if (pdfsubtype != null && pdfsubtype.toString().equals(PdfName.IMAGE.toString())) { byte[] image = PdfReader.getStreamBytesRaw((PRStream) stream); Image imageObject = Image.getInstance(image); System.out.println("Resolution" + imageObject.getDpiX()); System.out.println("Height" + imageObject.getHeight()); System.out.println("Width" + imageObject.getWidth()); } } } } catch (Exception e) { e.printStackTrace(); } }
From source file:pl.marcinmilkowski.hocrtopdf.Main.java
License:Open Source License
/** * @param args//from w w w.j a va 2s .com */ public static void main(String[] args) { try { if (args.length < 1 || args[0] == "--help" || args[0] == "-h") { System.out.print("Usage: java pl.marcinmilkowski.hocrtopdf.Main INPUTURL.html OUTPUTURL.pdf\n" + "\n" + "Converts hOCR files into PDF\n" + "\n" + "Example: java pl.marcinmilkowski.hocrtopdf.Main hocr.html output.pdf\n"); if (args.length < 1) System.exit(-1); else System.exit(0); } URL inputHOCRFile = null; FileOutputStream outputPDFStream = null; try { File file = new File(args[0]); inputHOCRFile = file.toURI().toURL(); } catch (MalformedURLException e) { System.out.println("The first parameter has to be a valid file."); System.out.println("We got an error: " + e.getMessage()); System.exit(-1); } try { outputPDFStream = new FileOutputStream(args[1]); } catch (FileNotFoundException e) { System.out.println("The second parameter has to be a valid URL"); System.exit(-1); } // The resolution of a PDF file (using iText) is 72pt per inch float pointsPerInch = 72.0f; // Using the jericho library to parse the HTML file Source source = new Source(inputHOCRFile); int pageCounter = 1; Document pdfDocument = null; PdfWriter pdfWriter = null; PdfContentByte cb = null; RandomAccessFileOrArray ra = null; // Find the tag of class ocr_page in order to load the scanned image StartTag pageTag = source.getNextStartTag(0, "class", OCRPAGE); while (pageTag != null) { int prevPos = pageTag.getEnd(); Pattern imagePattern = Pattern.compile("image\\s+([^;]+)"); Matcher imageMatcher = imagePattern.matcher(pageTag.getElement().getAttributeValue("title")); if (!imageMatcher.find()) { System.out.println("Could not find a tag of class \"ocr_page\", aborting."); System.exit(-1); } // Load the image Image pageImage = null; try { File file = new File(imageMatcher.group(1)); pageImage = Image.getInstance(file.toURI().toURL()); } catch (MalformedURLException e) { System.out.println("Could not load the scanned image from: " + "file://" + imageMatcher.group(1) + ", aborting."); System.exit(-1); } if (pageImage.getOriginalType() == Image.ORIGINAL_TIFF) { // this might // be // multipage // tiff! File file = new File(imageMatcher.group(1)); if (pageCounter == 1 || ra == null) { ra = new RandomAccessFileOrArray(file.toURI().toURL()); } int nPages = TiffImage.getNumberOfPages(ra); if (nPages > 0 && pageCounter <= nPages) { pageImage = TiffImage.getTiffImage(ra, pageCounter); } } int dpiX = pageImage.getDpiX(); if (dpiX == 0) { // for images that don't set the resolution we assume // 300 dpi dpiX = 300; } int dpiY = pageImage.getDpiY(); if (dpiY == 0) { // as above for dpiX dpiY = 300; } float dotsPerPointX = dpiX / pointsPerInch; float dotsPerPointY = dpiY / pointsPerInch; float pageImagePixelHeight = pageImage.getHeight(); if (pdfDocument == null) { pdfDocument = new Document(new Rectangle(pageImage.getWidth() / dotsPerPointX, pageImage.getHeight() / dotsPerPointY)); pdfWriter = PdfWriter.getInstance(pdfDocument, outputPDFStream); pdfDocument.open(); // Put the text behind the picture (reverse for debugging) // cb = pdfWriter.getDirectContentUnder(); cb = pdfWriter.getDirectContent(); } else { pdfDocument.setPageSize(new Rectangle(pageImage.getWidth() / dotsPerPointX, pageImage.getHeight() / dotsPerPointY)); pdfDocument.newPage(); } // first define a standard font for our text BaseFont base = BaseFont.createFont(BaseFont.HELVETICA, BaseFont.CP1250, BaseFont.EMBEDDED); Font defaultFont = new Font(base, 8); // FontFactory.getFont(FontFactory.HELVETICA, 8, Font.BOLD, // CMYKColor.BLACK); cb.setHorizontalScaling(1.0f); pageImage.scaleToFit(pageImage.getWidth() / dotsPerPointX, pageImage.getHeight() / dotsPerPointY); pageImage.setAbsolutePosition(0, 0); // Put the image in front of the text (reverse for debugging) // pdfWriter.getDirectContent().addImage(pageImage); pdfWriter.getDirectContentUnder().addImage(pageImage); // In order to place text behind the recognised text snippets we are // interested in the bbox property Pattern bboxPattern = Pattern.compile("bbox(\\s+\\d+){4}"); // This pattern separates the coordinates of the bbox property Pattern bboxCoordinatePattern = Pattern.compile("(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)"); // Only tags of the ocr_line class are interesting StartTag ocrTag = source.getNextStartTag(prevPos, "class", OCRPAGEORLINE); while (ocrTag != null) { prevPos = ocrTag.getEnd(); if ("ocrx_word".equalsIgnoreCase(ocrTag.getAttributeValue("class"))) { net.htmlparser.jericho.Element lineElement = ocrTag.getElement(); Matcher bboxMatcher = bboxPattern.matcher(lineElement.getAttributeValue("title")); if (bboxMatcher.find()) { // We found a tag of the ocr_line class containing a bbox property Matcher bboxCoordinateMatcher = bboxCoordinatePattern.matcher(bboxMatcher.group()); bboxCoordinateMatcher.find(); int[] coordinates = { Integer.parseInt((bboxCoordinateMatcher.group(1))), Integer.parseInt((bboxCoordinateMatcher.group(2))), Integer.parseInt((bboxCoordinateMatcher.group(3))), Integer.parseInt((bboxCoordinateMatcher.group(4))) }; String line = lineElement.getContent().getTextExtractor().toString(); float bboxWidthPt = (coordinates[2] - coordinates[0]) / dotsPerPointX; float bboxHeightPt = (coordinates[3] - coordinates[1]) / dotsPerPointY; // Put the text into the PDF cb.beginText(); // Comment the next line to debug the PDF output (visible Text) cb.setTextRenderingMode(PdfContentByte.TEXT_RENDER_MODE_INVISIBLE); // height cb.setFontAndSize(defaultFont.getBaseFont(), Math.max(Math.round(bboxHeightPt), 1)); // width cb.setHorizontalScaling(bboxWidthPt / cb.getEffectiveStringWidth(line, false)); cb.moveText((coordinates[0] / dotsPerPointX), ((pageImagePixelHeight - coordinates[3]) / dotsPerPointY)); cb.showText(line); cb.endText(); cb.setHorizontalScaling(1.0f); } } else { if ("ocr_page".equalsIgnoreCase(ocrTag.getAttributeValue("class"))) { pageCounter++; pageTag = ocrTag; break; } } ocrTag = source.getNextStartTag(prevPos, "class", OCRPAGEORLINE); } if (ocrTag == null) { pdfDocument.close(); break; } } } catch (DocumentException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:util.ImageExample.java
public ImageExample(Calendar date, int settings, ArrayList<String> courses, String time, String eventAddress, int totalPrice, String comments, boolean kunde) throws Exception { this.date = date; this.settings = settings; this.courses = courses; this.time = time; this.eventAddress = eventAddress; this.totalPrice = totalPrice; this.comments = comments; this.fileName = ""; GraphicsEnvironment ge = GraphicsEnvironment.getLocalGraphicsEnvironment(); ge.getAllFonts();//from w ww.j a v a 2s. c om FontFactory.register("C:/Windows/Fonts/ARLRDBD.TTF", "Arial Rounded"); Font font = FontFactory.getFont("Arial Rounded", 22, Font.NORMAL, new BaseColor(51, 102, 102)); Document document = new Document(); System.out.println("Pdf creation startet"); try { if (kunde) { fileName = "grill" + new SimpleDateFormat("dd. MMMMM yyyy hhmm").format(date.getTime()) + ".pdf"; } else { fileName = "grill" + new SimpleDateFormat("dd. MMMMM yyyy hhmm").format(date.getTime()) + "Prisliste.pdf"; } FileOutputStream file = new FileOutputStream(new File("C:/Users/Mark/Desktop", fileName)); PdfWriter writer = PdfWriter.getInstance(document, file); document.open(); Image img = Image.getInstance("src/pictures/cirkles.png"); img.scaleToFit(277, 277); img.setAbsolutePosition(40, PageSize.A4.getHeight() - img.getHeight()); document.add(img); Chunk chunk = new Chunk("Grillmester 'Frankie'", font); chunk.setCharacterSpacing(3); Paragraph header = new Paragraph(chunk); header.setAlignment(Element.ALIGN_RIGHT); header.setSpacingBefore(15); document.add(header); Paragraph title = new Paragraph( "Tilbud angende d. " + new SimpleDateFormat("dd. MMMMM yyyy").format(date.getTime()) + ".", new Font(Font.FontFamily.TIMES_ROMAN, 20, Font.BOLD)); title.setAlignment(Element.ALIGN_LEFT); title.setIndentationLeft(235); title.setSpacingBefore(100); title.setLeading(17); document.add(title); Paragraph subtitle = new Paragraph("(Arrangement til " + settings + " personer).", new Font(Font.FontFamily.TIMES_ROMAN, 18, Font.ITALIC)); subtitle.setAlignment(Element.ALIGN_LEFT); subtitle.setIndentationLeft(235); subtitle.setLeading(17); document.add(subtitle); Paragraph body = new Paragraph("\n\nDer er aftalt:\n\n", new Font(Font.FontFamily.TIMES_ROMAN, 18)); body.setAlignment(Element.ALIGN_LEFT); body.setIndentationLeft(235); body.setLeading(17); for (String course : courses) { body.add(course + "\n\n"); } body.add("\nServeres klokken " + time + " i " + eventAddress + "."); document.add(body); Paragraph ending = new Paragraph("\n\n\nPris: " + totalPrice + ",-kr.", new Font(Font.FontFamily.TIMES_ROMAN, 18, Font.BOLD)); ending.setAlignment(Element.ALIGN_LEFT); ending.setIndentationLeft(235); ending.setLeading(17); document.add(ending); if (!comments.equals("null")) { if (!comments.equals("")) { Paragraph comment = new Paragraph("\n\nKommentarer\n" + comments, new Font(Font.FontFamily.TIMES_ROMAN, 18)); comment.setAlignment(Element.ALIGN_LEFT); comment.setIndentationLeft(235); comment.setLeading(17); document.add(comment); } } Image footerImg = Image.getInstance("src/pictures/grillmester.png"); footerImg.scaleToFit(210, 210); footerImg.setAbsolutePosition(40, 40); document.add(footerImg); Image img2 = Image.getInstance("src/pictures/Contact.png"); img2.scaleToFit(250, 250); img2.setAbsolutePosition(20, PageSize.A4.getHeight() - img.getHeight() - 250); document.add(img2); document.close(); System.out.println("Pdf finish"); } catch (Exception e) { e.printStackTrace(); System.out.println(e.getLocalizedMessage()); } }