List of usage examples for com.itextpdf.text.pdf PdfReader PdfReader
public PdfReader(final PdfReader reader)
From source file:pdf_text_extract.Main.java
License:Open Source License
public static void main(String[] argv) throws IOException { if (argv.length != 3 && argv.length != 2) { usage();//from www. j a v a2 s. co m return; } PdfReader reader; if ("-".equals(argv[0])) { reader = new PdfReader(System.in); } else { File pdf = new File(argv[0]); if (!pdf.canRead() || !pdf.isFile()) { System.err.println("cannot read input file " + pdf.getAbsolutePath()); return; } reader = new PdfReader(pdf.getAbsolutePath()); } PdfReaderContentParser parser = new PdfReaderContentParser(reader); Integer pageNumber; String outputFilename; if (argv.length == 3) { pageNumber = Integer.parseInt(argv[1]); outputFilename = argv[2]; } else { pageNumber = null; outputFilename = argv[1]; } PrintWriter out; if ("-".equals(outputFilename)) { out = new PrintWriter(new OutputStreamWriter(System.out, "UTF-8")); } else { File outputFile = new File(outputFilename); out = new PrintWriter(outputFile, "UTF-8"); } RenderListener dumper = new DumpTextFragmentPositions(out); if (pageNumber != null) { parser.processContent(pageNumber, dumper); } else { int pages = reader.getNumberOfPages(); for (int p = 0; p < pages; p++) parser.processContent(p + 1, dumper); } out.close(); reader.close(); }
From source file:pl.edu.icm.cermine.structure.ITextCharacterExtractor.java
License:Open Source License
/** * Extracts text chunks from PDF using iText and stores them in BxDocument object. * Depending on parsed PDF, extracted text chunks may or may not be individual glyphs, * they correspond to single string operands of PDF's text-showing operators * (Tj, TJ, ' and ")./*from www . ja v a 2 s.c om*/ * @param stream PDF's stream * @return BxDocument containing pages with extracted chunks stored as BxChunk lists * @throws AnalysisException AnalysisException */ @Override public BxDocument extractCharacters(InputStream stream) throws AnalysisException { try { BxDocumentCreator documentCreator = new BxDocumentCreator(); PdfReader reader = new PdfReader(stream); PdfContentStreamProcessor processor = new PdfContentStreamProcessor(documentCreator); for (int pageNumber = 1; pageNumber <= reader.getNumberOfPages(); pageNumber++) { if (frontPagesLimit > 0 && backPagesLimit > 0 && pageNumber > frontPagesLimit && pageNumber < reader.getNumberOfPages() - 1 - backPagesLimit) { continue; } documentCreator.processNewBxPage(reader.getPageSize(pageNumber)); PdfDictionary resources = reader.getPageN(pageNumber).getAsDict(PdfName.RESOURCES); processAlternativeFontNames(resources); processAlternativeColorSpace(resources); processor.reset(); processor.processContent(ContentByteUtils.getContentBytesForPage(reader, pageNumber), resources); TimeoutRegister.get().check(); } BxDocument doc = filterComponents(removeDuplicateChunks(documentCreator.document)); if (doc.getFirstChild() == null) { throw new AnalysisException("Document contains no pages"); } return doc; } catch (InvalidPdfException ex) { throw new AnalysisException("Invalid PDF file", ex); } catch (IOException ex) { throw new AnalysisException("Cannot extract characters from PDF file", ex); } }
From source file:printom.PDFCreator.java
public static void createLabel(int aLabelType, int aJobNum, char aJobIdentifier, String aCustName, String aItemName, String aItemCode, String aDate, String aPOrderNum, int aInputPcs) { String myJobNum = String.valueOf(aJobNum); char myJobIdentifier = aJobIdentifier; String myCustName = aCustName; String myItemName = aItemName; String myItemCode = aItemCode; String myDate = aDate;/* w w w . j av a2 s. c o m*/ String myPOrderNum = aPOrderNum; String myInputPcs = String.valueOf(aInputPcs); try { String src = ""; if (aLabelType == 1) { src = CTNLABEL; } String dest = RESULTLABEL; Font timesJob = new Font(Font.FontFamily.TIMES_ROMAN, 16, Font.BOLD, BaseColor.WHITE); Font timesDef = new Font(Font.FontFamily.TIMES_ROMAN, 16, Font.NORMAL, BaseColor.BLACK); PdfReader reader = new PdfReader(src); PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(dest)); PdfContentByte canvas = stamper.getOverContent(1); for (int k = 0; k < 3; k++) { //Positions int[] x = { 298, 350, 125, 80, 80, 80, 80, 120 }; int[] y = { 562, 562, 518, 498, 479, 459, 440, 420 }; if (k == 1) { for (int j = 0; j < 8; j++) { y[j] = y[j] - 186; } } if (k == 2) { for (int j = 0; j < 8; j++) { y[j] = y[j] - 372; } } for (int i = 0; i < 2; i++) { if (i == 1) { for (int j = 0; j < 8; j++) { x[j] = x[j] + 372; } } ColumnText.showTextAligned(canvas, Element.ALIGN_LEFT, new Phrase(myJobNum, timesJob), x[0], y[0], 0); ColumnText.showTextAligned(canvas, Element.ALIGN_LEFT, new Phrase(String.valueOf(myJobIdentifier), timesDef), x[1], y[1], 0); ColumnText.showTextAligned(canvas, Element.ALIGN_LEFT, new Phrase(myCustName, timesDef), x[2], y[2], 0); ColumnText.showTextAligned(canvas, Element.ALIGN_LEFT, new Phrase(myItemName, timesDef), x[3], y[3], 0); ColumnText.showTextAligned(canvas, Element.ALIGN_LEFT, new Phrase(myItemCode, timesDef), x[4], y[4], 0); ColumnText.showTextAligned(canvas, Element.ALIGN_LEFT, new Phrase(myDate, timesDef), x[5], y[5], 0); ColumnText.showTextAligned(canvas, Element.ALIGN_LEFT, new Phrase(myPOrderNum, timesDef), x[6], y[6], 0); ColumnText.showTextAligned(canvas, Element.ALIGN_LEFT, new Phrase(myInputPcs, timesDef), x[7], y[7], 0); } } stamper.close(); reader.close(); } catch (IOException | DocumentException ex) { Logger.getLogger(PDFCreator.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:printom.PDFCreator.java
public static void createReport(int aJobNum, char aJobIdentifier, String aCustName, String aItemName, String aItemCode, String aDate, String aPOrderNum, int aInputPcs, int aQtyWithOvers, int aNumContainers, String aInputContainer) { String myJobNum = String.valueOf(aJobNum); char myJobIdentifier = aJobIdentifier; String myCustName = aCustName; String myItemName = aItemName; String myItemCode = aItemCode; String myDate = aDate;/*from w w w .j a v a 2 s . c o m*/ String myPOrderNum = aPOrderNum; String myInputPcs = String.valueOf(aInputPcs); int myQtyWithOvers = aQtyWithOvers; int myNumContainers = aNumContainers; String myInputContainer = aInputContainer; double amount = myQtyWithOvers; DecimalFormat formatter = new DecimalFormat("#,###"); String myStrQtyWithOvers = formatter.format(amount); try { String src = REPORT; String dest = RESULTREPORT; Font timesJob = new Font(Font.FontFamily.TIMES_ROMAN, 18, Font.BOLD, BaseColor.WHITE); Font timesDef = new Font(Font.FontFamily.TIMES_ROMAN, 16, Font.NORMAL, BaseColor.BLACK); PdfReader reader = new PdfReader(src); PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(dest)); PdfContentByte canvas = stamper.getOverContent(1); int[] x = { 441, 510, 426, 87, 87, 87, 426, 307, 218, 325, 426 }; int[] y = { 547, 547, 473, 450, 428, 473, 450, 325, 385, 385, 428 }; ColumnText.showTextAligned(canvas, Element.ALIGN_LEFT, new Phrase(myJobNum, timesJob), x[0], y[0], 0); ColumnText.showTextAligned(canvas, Element.ALIGN_LEFT, new Phrase(String.valueOf(myJobIdentifier), timesDef), x[1], y[1], 0); ColumnText.showTextAligned(canvas, Element.ALIGN_LEFT, new Phrase(myCustName, timesDef), x[2], y[2], 0); ColumnText.showTextAligned(canvas, Element.ALIGN_LEFT, new Phrase(myItemName, timesDef), x[3], y[3], 0); ColumnText.showTextAligned(canvas, Element.ALIGN_LEFT, new Phrase(myItemCode, timesDef), x[4], y[4], 0); ColumnText.showTextAligned(canvas, Element.ALIGN_LEFT, new Phrase(myDate, timesDef), x[5], y[5], 0); ColumnText.showTextAligned(canvas, Element.ALIGN_LEFT, new Phrase(myPOrderNum, timesDef), x[6], y[6], 0); ColumnText.showTextAligned(canvas, Element.ALIGN_LEFT, new Phrase(myInputPcs, timesDef), x[7], y[7], 0); ColumnText.showTextAligned(canvas, Element.ALIGN_LEFT, new Phrase(myStrQtyWithOvers, timesDef), x[8], y[8], 0); ColumnText.showTextAligned(canvas, Element.ALIGN_LEFT, new Phrase("(" + String.valueOf(myNumContainers) + " cases with overs)", timesDef), x[9], y[9], 0); ColumnText.showTextAligned(canvas, Element.ALIGN_LEFT, new Phrase(myInputContainer, timesDef), x[10], y[10], 0); stamper.close(); reader.close(); } catch (IOException | DocumentException ex) { Logger.getLogger(PDFCreator.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:Project.data.preparation.ExtractPageContentArea.java
public void parsePdf(String pdf, int pageNum, int upper_x, int upper_y, int lower_x, int lower_y) throws IOException { PdfReader reader = new PdfReader(pdf); // System.out.println("(" + upper_x + " , " + upper_y + ") to ( " + lower_x + " , " + lower_y + ")"); rect = new Rectangle(upper_x, upper_y, lower_x, lower_y); RenderFilter filter = new RegionTextRenderFilter(getRect()); TextExtractionStrategy strategy;/*from w w w.j av a 2s. c o m*/ strategy = new FilteredTextRenderListener(new LocationTextExtractionStrategy(), filter); TextCropped = PdfTextExtractor.getTextFromPage(reader, pageNum, strategy); setTextCropped(TextCropped); reader.close(); }
From source file:renamergroup.FileFinderThread.java
public void run() { while (!renamerframe.fileExploreEnded() || !fileList.isEmpty()) { try {//from ww w . ja v a 2 s. c om String fileDir = fileList.poll(); if (fileDir == null) continue; PdfReader reader = new PdfReader(fileDir); Map<String, String> info = reader.getInfo(); String author = info.get("Author"); String title = info.get("Title"); String subject = info.get("Subject"); String keywords = info.get("Keywords"); if (mainframe.isMatched(author, "author") || mainframe.isMatched(title, "title") || mainframe.isMatched(subject, "subject") || mainframe.isMatched(keywords, "keywords")) { renamerframe.initListToRename(fileDir); } info.clear(); reader.close(); } catch (IOException ex) { System.out.println(ex.getMessage()); } } }
From source file:sampletree.PdfReaderSample.java
public ArrayList<Integer> ReadPdfDocument() throws IOException { String[] lstrwordlist = {};//from w w w.j av a 2s.com ArrayList<Integer> lobjlist = new ArrayList<Integer>(); PdfReader reader = new PdfReader("C:/Users/Varnith/Desktop/TestFilewithNumbers.pdf"); System.out.println("This PDF has " + reader.getNumberOfPages() + " pages."); for (int i = 0; i < reader.getNumberOfPages(); i++) { String page = PdfTextExtractor.getTextFromPage(reader, i + 1); lstrwordlist = page.trim().split("\n"); //System.out.println("hello"); for (int j = 0; j < lstrwordlist.length; j++) { lobjlist.add(Integer.valueOf(lstrwordlist[j].trim())); } //lobjlist.add(lstrwordlist[0]); //System.out.println("Page Content:\n\n"+page+"\n\n"); } System.out.println("length of element :" + lobjlist.size()); return lobjlist; }
From source file:se.billes.pdf.renderer.process.TemplatePageRenderer.java
License:Open Source License
public void render(PdfWriter writer, Document document) throws PdfRenderException { writer.setPageEvent(page);//w w w.ja v a 2s . c om Template template = page.getTemplate(); try { PdfReader reader = new PdfReader(template.getTemplatePath()); PdfImportedPage pageImportedPage = writer.getImportedPage(reader, template.getPage()); Image pdfMirror; float width = SizeFactory.CUT_MARK; float height = SizeFactory.CUT_MARK; pdfMirror = Image.getInstance(pageImportedPage); pdfMirror.setAbsolutePosition(SizeFactory.millimetersToPostscriptPoints(width), SizeFactory.millimetersToPostscriptPoints(height)); document.newPage(); ImageFactory.getInstances().add(new ImageInstance(pdfMirror, reader)); PdfContentByte cb = writer.getDirectContent(); try { cb.addImage(pdfMirror); } catch (DocumentException e) { e.printStackTrace(); } if (page.getBlocks() != null) { for (BaseElement block : page.getBlocks()) { block.onRender(cb); } } } catch (BadElementException e) { e.printStackTrace(); throw new PdfRenderException(e); } catch (IOException e) { throw new PdfRenderException(e); } }
From source file:se.billes.pdf.renderer.request.factory.ImageFactory.java
License:Open Source License
public ImageInstance getImageByFile(PdfContentByte cb, File file) throws IOException, BadElementException { Image image = null;/* ww w. j av a 2 s . c o m*/ ImageInstance instance = null; if (file.getName().toLowerCase().endsWith(".pdf")) { PdfReader reader = new PdfReader(file.getAbsolutePath()); PdfImportedPage p = cb.getPdfWriter().getImportedPage(reader, 1); image = Image.getInstance(p); instance = new ImageInstance(image, reader); } else { image = Image.getInstance(file.getAbsolutePath()); instance = new ImageInstance(image, null); } instances.add(instance); return instance; }
From source file:se.inera.intyg.intygstyper.fk7263.pdf.PdfDefaultGenerator.java
License:Open Source License
protected PdfDefaultGenerator(Utlatande intyg, List<Status> statuses, ApplicationOrigin applicationOrigin, boolean flatten) throws PdfGeneratorException { try {//from w w w .ja v a 2s . c om this.intyg = intyg; outputStream = new ByteArrayOutputStream(); PdfReader pdfReader = new PdfReader(PDF_TEMPLATE); PdfStamper pdfStamper = new PdfStamper(pdfReader, this.outputStream); fields = pdfStamper.getAcroFields(); generatePdf(); switch (applicationOrigin) { case MINA_INTYG: // perform additional decoration for MI originated pdf maskSendToFkInformation(pdfStamper); markAsElectronicCopy(pdfStamper); createRightMarginText(pdfStamper, pdfReader.getNumberOfPages(), intyg.getId(), MINA_INTYG_MARGIN_TEXT); break; case WEBCERT: // perform additional decoration for WC originated pdf if (isCertificateSentToFK(statuses)) { maskSendToFkInformation(pdfStamper); markAsElectronicCopy(pdfStamper); } createRightMarginText(pdfStamper, pdfReader.getNumberOfPages(), intyg.getId(), WEBCERT_MARGIN_TEXT); createSignatureNotRequiredField(pdfStamper, pdfReader.getNumberOfPages()); break; default: break; } pdfStamper.setFormFlattening(flatten); pdfStamper.close(); } catch (Exception e) { throw new PdfGeneratorException(e); } }