List of usage examples for com.itextpdf.text.pdf PdfReader getNumberOfPages
public int getNumberOfPages()
From source file:pdfreadersample.PdfReadersample.java
/** * @param args the command line arguments */// w w w . jav a 2 s. c o m public static void main(String[] args) throws IOException { // TODO code application logic here String[] lstrwordlist = {}; ArrayList<Integer> lobjlist = new ArrayList<Integer>(); PdfReader reader = new PdfReader("C:/Users/Varnith/Desktop/TestFilewithNumbers.pdf"); System.out.println("This PDF has " + reader.getNumberOfPages() + " pages."); for (int i = 0; i < reader.getNumberOfPages(); i++) { String page = PdfTextExtractor.getTextFromPage(reader, i + 1); lstrwordlist = page.trim().split("\n"); //System.out.println("hello"); for (int j = 0; j < lstrwordlist.length; j++) { lobjlist.add(Integer.valueOf(lstrwordlist[j].trim())); } //lobjlist.add(lstrwordlist[0]); //System.out.println("Page Content:\n\n"+page+"\n\n"); } System.out.println("length of element :" + lobjlist.size()); //System.out.println("Is this document encrypted: "+reader.isEncrypted()); }
From source file:pdftotextconverter.PDFToTextConverter.java
public static void convertPDFToText(String src, String desc) { try {//from w w w .jav a 2 s .c om FileWriter fw = new FileWriter(desc); BufferedWriter bw = new BufferedWriter(fw); PdfReader pr = new PdfReader(src); int pNum = pr.getNumberOfPages(); for (int page = 1; page <= pNum; page++) { String text = PdfTextExtractor.getTextFromPage(pr, page); bw.write(text); bw.newLine(); } bw.flush(); bw.close(); } catch (Exception e) { e.printStackTrace(); } }
From source file:pdf_text_extract.Main.java
License:Open Source License
public static void main(String[] argv) throws IOException { if (argv.length != 3 && argv.length != 2) { usage();/*from w w w .j a va 2 s . co m*/ return; } PdfReader reader; if ("-".equals(argv[0])) { reader = new PdfReader(System.in); } else { File pdf = new File(argv[0]); if (!pdf.canRead() || !pdf.isFile()) { System.err.println("cannot read input file " + pdf.getAbsolutePath()); return; } reader = new PdfReader(pdf.getAbsolutePath()); } PdfReaderContentParser parser = new PdfReaderContentParser(reader); Integer pageNumber; String outputFilename; if (argv.length == 3) { pageNumber = Integer.parseInt(argv[1]); outputFilename = argv[2]; } else { pageNumber = null; outputFilename = argv[1]; } PrintWriter out; if ("-".equals(outputFilename)) { out = new PrintWriter(new OutputStreamWriter(System.out, "UTF-8")); } else { File outputFile = new File(outputFilename); out = new PrintWriter(outputFile, "UTF-8"); } RenderListener dumper = new DumpTextFragmentPositions(out); if (pageNumber != null) { parser.processContent(pageNumber, dumper); } else { int pages = reader.getNumberOfPages(); for (int p = 0; p < pages; p++) parser.processContent(p + 1, dumper); } out.close(); reader.close(); }
From source file:pl.edu.icm.cermine.structure.ITextCharacterExtractor.java
License:Open Source License
/** * Extracts text chunks from PDF using iText and stores them in BxDocument object. * Depending on parsed PDF, extracted text chunks may or may not be individual glyphs, * they correspond to single string operands of PDF's text-showing operators * (Tj, TJ, ' and ")./* www.j a v a 2 s. c om*/ * @param stream PDF's stream * @return BxDocument containing pages with extracted chunks stored as BxChunk lists * @throws AnalysisException AnalysisException */ @Override public BxDocument extractCharacters(InputStream stream) throws AnalysisException { try { BxDocumentCreator documentCreator = new BxDocumentCreator(); PdfReader reader = new PdfReader(stream); PdfContentStreamProcessor processor = new PdfContentStreamProcessor(documentCreator); for (int pageNumber = 1; pageNumber <= reader.getNumberOfPages(); pageNumber++) { if (frontPagesLimit > 0 && backPagesLimit > 0 && pageNumber > frontPagesLimit && pageNumber < reader.getNumberOfPages() - 1 - backPagesLimit) { continue; } documentCreator.processNewBxPage(reader.getPageSize(pageNumber)); PdfDictionary resources = reader.getPageN(pageNumber).getAsDict(PdfName.RESOURCES); processAlternativeFontNames(resources); processAlternativeColorSpace(resources); processor.reset(); processor.processContent(ContentByteUtils.getContentBytesForPage(reader, pageNumber), resources); TimeoutRegister.get().check(); } BxDocument doc = filterComponents(removeDuplicateChunks(documentCreator.document)); if (doc.getFirstChild() == null) { throw new AnalysisException("Document contains no pages"); } return doc; } catch (InvalidPdfException ex) { throw new AnalysisException("Invalid PDF file", ex); } catch (IOException ex) { throw new AnalysisException("Cannot extract characters from PDF file", ex); } }
From source file:sampletree.PdfReaderSample.java
public ArrayList<Integer> ReadPdfDocument() throws IOException { String[] lstrwordlist = {};/*from w w w . j a v a 2s .c o m*/ ArrayList<Integer> lobjlist = new ArrayList<Integer>(); PdfReader reader = new PdfReader("C:/Users/Varnith/Desktop/TestFilewithNumbers.pdf"); System.out.println("This PDF has " + reader.getNumberOfPages() + " pages."); for (int i = 0; i < reader.getNumberOfPages(); i++) { String page = PdfTextExtractor.getTextFromPage(reader, i + 1); lstrwordlist = page.trim().split("\n"); //System.out.println("hello"); for (int j = 0; j < lstrwordlist.length; j++) { lobjlist.add(Integer.valueOf(lstrwordlist[j].trim())); } //lobjlist.add(lstrwordlist[0]); //System.out.println("Page Content:\n\n"+page+"\n\n"); } System.out.println("length of element :" + lobjlist.size()); return lobjlist; }
From source file:se.inera.intyg.intygstyper.fk7263.pdf.PdfDefaultGenerator.java
License:Open Source License
protected PdfDefaultGenerator(Utlatande intyg, List<Status> statuses, ApplicationOrigin applicationOrigin, boolean flatten) throws PdfGeneratorException { try {//from w w w . jav a 2 s . c o m this.intyg = intyg; outputStream = new ByteArrayOutputStream(); PdfReader pdfReader = new PdfReader(PDF_TEMPLATE); PdfStamper pdfStamper = new PdfStamper(pdfReader, this.outputStream); fields = pdfStamper.getAcroFields(); generatePdf(); switch (applicationOrigin) { case MINA_INTYG: // perform additional decoration for MI originated pdf maskSendToFkInformation(pdfStamper); markAsElectronicCopy(pdfStamper); createRightMarginText(pdfStamper, pdfReader.getNumberOfPages(), intyg.getId(), MINA_INTYG_MARGIN_TEXT); break; case WEBCERT: // perform additional decoration for WC originated pdf if (isCertificateSentToFK(statuses)) { maskSendToFkInformation(pdfStamper); markAsElectronicCopy(pdfStamper); } createRightMarginText(pdfStamper, pdfReader.getNumberOfPages(), intyg.getId(), WEBCERT_MARGIN_TEXT); createSignatureNotRequiredField(pdfStamper, pdfReader.getNumberOfPages()); break; default: break; } pdfStamper.setFormFlattening(flatten); pdfStamper.close(); } catch (Exception e) { throw new PdfGeneratorException(e); } }
From source file:se.inera.intyg.intygstyper.fk7263.pdf.PdfEmployeeGenerator.java
License:Open Source License
protected PdfEmployeeGenerator(Utlatande intyg, List<Status> statuses, ApplicationOrigin applicationOrigin, List<String> selectedOptionalFields, boolean flatten) throws PdfGeneratorException { try {/*w ww. j a va 2 s. com*/ this.intyg = intyg; outputStream = new ByteArrayOutputStream(); PdfReader pdfReader = new PdfReader(PDF_TEMPLATE); PdfStamper pdfStamper = new PdfStamper(pdfReader, this.outputStream); fields = pdfStamper.getAcroFields(); switch (applicationOrigin) { case MINA_INTYG: generateMIPdfWithOptionalFields(selectedOptionalFields); // perform additional decoration for MI originated pdf maskSendToFkInformation(pdfStamper); if (!EmployeeOptionalFields.containsAllValues(selectedOptionalFields)) { mark(pdfStamper, WATERMARK_TEXT_CONTENT_IS_CUSTOMIZED, MARK_AS_EMPLOYER_START_X, MARK_AS_EMPLOYER_START_Y, MARK_AS_EMPLOYER_MI_HEIGHT, MARK_AS_EMPLOYER_MI_WIDTH); } else { markAsElectronicCopy(pdfStamper); } createRightMarginText(pdfStamper, pdfReader.getNumberOfPages(), intyg.getId(), MINA_INTYG_MARGIN_TEXT); break; case WEBCERT: generateMinimalPdf(); // perform additional decoration for WC originated pdf maskSendToFkInformation(pdfStamper); mark(pdfStamper, WATERMARK_TEXT_WC_EMPLOYER_MINIMAL_COPY, MARK_AS_EMPLOYER_START_X, MARK_AS_EMPLOYER_START_Y, MARK_AS_EMPLOYER_WC_HEIGTH, MARK_AS_EMPLOYER_WC_WIDTH); createRightMarginText(pdfStamper, pdfReader.getNumberOfPages(), intyg.getId(), WEBCERT_MARGIN_TEXT); break; default: break; } createSignatureNotRequiredField(pdfStamper, pdfReader.getNumberOfPages()); pdfStamper.setFormFlattening(flatten); pdfStamper.close(); } catch (Exception e) { throw new PdfGeneratorException(e); } }
From source file:se.inera.intyg.intygstyper.ts_bas.pdf.PdfGeneratorImpl.java
License:Open Source License
@Override public byte[] generatePDF(Utlatande utlatande, ApplicationOrigin applicationOrigin) throws PdfGeneratorException { try {/*from w ww. j a v a 2 s . com*/ ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); PdfReader pdfReader = new PdfReader(getPdfPath(utlatande)); PdfStamper pdfStamper = new PdfStamper(pdfReader, outputStream); pdfStamper.setFormFlattening(formFlattening); AcroFields fields = pdfStamper.getAcroFields(); populatePdfFields(utlatande, fields, applicationOrigin); // Decorate PDF depending on the origin of the pdf-call switch (applicationOrigin) { case MINA_INTYG: createLeftMarginText(pdfStamper, pdfReader.getNumberOfPages(), utlatande.getId(), MINA_INTYG_MARGIN_TEXT); break; case WEBCERT: createLeftMarginText(pdfStamper, pdfReader.getNumberOfPages(), utlatande.getId(), WEBCERT_MARGIN_TEXT); break; default: break; } pdfStamper.close(); return outputStream.toByteArray(); } catch (Exception e) { throw new PdfGeneratorException(e); } }
From source file:se.inera.intyg.intygstyper.ts_diabetes.pdf.PdfGeneratorImpl.java
License:Open Source License
@Override public byte[] generatePDF(Utlatande utlatande, ApplicationOrigin applicationOrigin) throws PdfGeneratorException { try {/*from w w w .j a v a 2 s .c o m*/ ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); PdfReader pdfReader = new PdfReader(getPdfPath(utlatande)); PdfStamper pdfStamper = new PdfStamper(pdfReader, outputStream); pdfStamper.setFormFlattening(formFlattening); AcroFields fields = pdfStamper.getAcroFields(); populatePdfFields(utlatande, fields); // Decorate PDF depending on the origin of the pdf-call switch (applicationOrigin) { case MINA_INTYG: createLeftMarginText(pdfStamper, pdfReader.getNumberOfPages(), utlatande.getId(), MINA_INTYG_MARGIN_TEXT); break; case WEBCERT: createLeftMarginText(pdfStamper, pdfReader.getNumberOfPages(), utlatande.getId(), WEBCERT_MARGIN_TEXT); break; default: break; } pdfStamper.close(); return outputStream.toByteArray(); } catch (Exception e) { throw new PdfGeneratorException(e); } }
From source file:SettingUp.ImageExtraction.java
public void extractImages() throws IOException, DocumentException { PdfReader reader = new PdfReader(filename.toString()); PdfReaderContentParser parser = new PdfReaderContentParser(reader); ImageRenderListener listener = new ImageRenderListener(imgpath); for (int i = 1; i <= reader.getNumberOfPages(); i++) { parser.processContent(i, listener); }/*from w ww. j av a 2 s.c o m*/ reader.close(); }