List of usage examples for com.itextpdf.text.pdf PdfReader getNumberOfPages
public int getNumberOfPages()
From source file:gravabncertificado007.CarimboCertificado.java
public void aplicaCariboGedi(String BN, String caminhoarquivo) throws DocumentException, IOException, RuntimeException { PdfReader.unethicalreading = true;/*from www . j ava 2s. c o m*/ //Cria o reader para o primeiro PDF PdfReader reader = new PdfReader(caminhoarquivo); Rectangle psize = reader.getPageSize(1); float width = psize.getWidth(); float height = psize.getHeight(); Document document = new Document(new Rectangle(width, height)); PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(caminhoarquivo.substring(0, caminhoarquivo.length() - 4) + "-G.pdf")); document.open(); int i = 0; BN = BN.substring(BN.length() - 13, BN.length() - 4); PdfContentByte cb = writer.getDirectContent(); while (i < reader.getNumberOfPages()) { i++; document.newPage(); PdfContentByte under = writer.getDirectContentUnder(); PdfImportedPage page1 = writer.getImportedPage(reader, i); cb.addTemplate(page1, 0, i * 0.2f); BaseFont bf = BaseFont.createFont(BaseFont.COURIER_BOLD, BaseFont.CP1250, BaseFont.NOT_EMBEDDED); cb.beginText(); cb.setFontAndSize(bf, 14); cb.showTextAligned(PdfContentByte.ALIGN_CENTER, " _________________ ", width / 6, 44, 0); cb.showTextAligned(PdfContentByte.ALIGN_CENTER, " | |", width / 6, 32, 0); cb.showTextAligned(PdfContentByte.ALIGN_CENTER, " | |", width / 6, 22, 0); cb.showTextAligned(PdfContentByte.ALIGN_CENTER, " |Copia Controlada |", width / 6, 28, 0); cb.showTextAligned(PdfContentByte.ALIGN_CENTER, " | |", width / 6, 16, 0); cb.showTextAligned(PdfContentByte.ALIGN_CENTER, " | |", width / 6, 12, 0); cb.showTextAligned(PdfContentByte.ALIGN_CENTER, " |_________________|", width / 6, 14, 0); cb.endText(); } document.close(); writer.close(); reader.close(); }
From source file:helper.PdfText.java
License:Apache License
/** * @param pdfFile this file will be extracted. * @return the plain text of the pdf/*from w w w . j a v a 2 s. c o m*/ */ public String itext(File pdfFile) { PdfReader reader; try { reader = new PdfReader(pdfFile.getAbsolutePath()); PdfReaderContentParser parser = new PdfReaderContentParser(reader); StringBuffer buf = new StringBuffer(); TextExtractionStrategy strategy; for (int i = 1; i <= reader.getNumberOfPages(); i++) { strategy = parser.processContent(i, new SimpleTextExtractionStrategy()); buf.append(strategy.getResultantText()); } return buf.toString(); } catch (IOException e) { throw new HttpArchiveException(500, e); } }
From source file:hsa.awp.admingui.report.printer.MergePDFUtil.java
License:Open Source License
/** * merges PDF's./*w ww.j a v a2 s . c o m*/ * * @param streamOfPDFFiles pdf files as inputstreams. * @param outputStream outputstream where the pdf is written. * @param paginate true if page numbers should be displayed. */ public void concatPDFs(List<InputStream> streamOfPDFFiles, OutputStream outputStream, boolean paginate) { Document document = new Document(); try { List<PdfReader> readers = createPdfReaders(streamOfPDFFiles); int totalPages = countAllPages(readers); // Create a writer for the outputstream writer = PdfWriter.getInstance(document, outputStream); document.open(); cb = getContentByte(writer); // Loop through the PDF files and add to the output. for (PdfReader pdfReader : readers) { int pageOfCurrentReaderPDF = 0; // Create a new page in the target for each source page. while (pageOfCurrentReaderPDF < pdfReader.getNumberOfPages()) { int currentPageNumber = 0; document.newPage(); pageOfCurrentReaderPDF++; currentPageNumber++; PdfImportedPage page = writer.getImportedPage(pdfReader, pageOfCurrentReaderPDF); cb.addTemplate(page, 0, 0); // Code for pagination. if (paginate) { addPagination(totalPages, currentPageNumber); } } pageOfCurrentReaderPDF = 0; } outputStream.flush(); document.close(); outputStream.close(); } catch (Exception e) { e.printStackTrace(); } finally { if (document.isOpen()) { document.close(); } try { if (outputStream != null) { outputStream.close(); } } catch (IOException ioe) { ioe.printStackTrace(); } } }
From source file:hsa.awp.admingui.report.printer.MergePDFUtil.java
License:Open Source License
private int countAllPages(List<PdfReader> readers) { int pages = 0; for (PdfReader reader : readers) { pages += reader.getNumberOfPages(); }//from w ww.ja v a 2s . com return pages; }
From source file:integrator.Pdf.java
/** * Parses a PDF to a plain text file.//from w ww .j av a 2 s. c o m * @param pdf the original PDF * @param txt the resulting text * @throws IOException */ public void parsePdf(String pdf, String txt) throws IOException { PdfReader reader = new PdfReader(pdf); PdfReaderContentParser parser = new PdfReaderContentParser(reader); PrintWriter out = new PrintWriter(new FileOutputStream(txt)); TextExtractionStrategy strategy; for (int i = 1; i <= reader.getNumberOfPages(); i++) { strategy = parser.processContent(i, new SimpleTextExtractionStrategy()); out.println(strategy.getResultantText()); } out.flush(); out.close(); reader.close(); }
From source file:io.github.jonestimd.finance.file.pdf.TextExtractor.java
License:Open Source License
public TextExtractor(InputStream is) throws IOException { PdfReader pdfReader = new PdfReader(is); PdfReaderContentParser parser = new PdfReaderContentParser(pdfReader); int pages = pdfReader.getNumberOfPages(); for (int i = 1; i <= pages; i++) { ImportRenderListener renderListener = new ImportRenderListener(); parser.processContent(i, renderListener); pageText.add(renderListener.text); }//from w w w . j a v a 2 s .c o m }
From source file:itextblast.ITextBlast.java
private static void processQAFile(String qa_filename, Boolean has_frontpage) throws IOException, DocumentException { // use one of the previous examples to create a PDF // new MovieTemplates().createPdf(MovieTemplates.RESULT); // Create a reader; from current existing file // Next time pass it from args .. PdfReader reader = new PdfReader(String.format(ITextBlast.working_dir + SOURCE, qa_filename)); ITextBlast.my_reader = reader;//w w w . j a va2s. co m // We'll create as many new PDFs as there are pages // Document document; // PdfCopy copy; // loop over all the pages in the original PDF int n = reader.getNumberOfPages(); // For test of extraction and regexp; use first 5 pages .. // n = 15; // Text Extraction Strategy here ... // LocationTextExtractionStrategy strategy = new LocationTextExtractionStrategy(); // SimpleTextExtractionStrategy strategy = new SimpleTextExtractionStrategy(); // Both ^ does not work well; weird behavior ... no need so clever .. // START SMART Start Number ******** Pattern smart_start_pattern; smart_start_pattern = Pattern.compile(".*?SOALAN.*?N.*?O.*?(\\d+)\\b+.*", Pattern.CASE_INSENSITIVE); // Extract cover page number as smartly as possible?? String cover_page_content = PdfTextExtractor.getTextFromPage(reader, 1); Matcher smart_start_matcher = smart_start_pattern.matcher(cover_page_content); String smart_start_question_number = null; if (smart_start_matcher.find()) { // Extract the question number based on backreference smart_start_question_number = smart_start_matcher.group(1); // How will it look when using a different strategy? out.println("Matched " + smart_start_matcher.group(0) + " and SMART Start Number: " + smart_start_question_number); } // END SMART Start Number ******** Pattern liberal_found_question_pattern_uno; liberal_found_question_pattern_uno = Pattern.compile(".*N.*O.*SOALAN.*", Pattern.CASE_INSENSITIVE); Pattern liberal_found_question_pattern_dos = Pattern.compile(".*SOALAN.*N.*O.*", Pattern.CASE_INSENSITIVE); Pattern pattern_uno; // pattern = Pattern.compile("^.*NO.*SOALAN.*?(\\d+).*$", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); // pattern = Pattern.compile(".*SOALAN.*?(\\d+).*", Pattern.CASE_INSENSITIVE); pattern_uno = Pattern.compile(".*N.*O.*SOALAN.*?(\\d+)\\b+.*", Pattern.CASE_INSENSITIVE); Pattern pattern_dos = Pattern.compile(".*SOALAN.*N.*O.*?(\\d+)\\b+.*", Pattern.CASE_INSENSITIVE); // OPTION 2 is to try with the next available number between word boundaries .. but may then need non-greedy .. // Init start and end page int start_page = 1; int end_page = 1; String question_number = "0-intro"; // This is for SOALAN LISAN; which has no Front Page // the Start Question Number should then be set to SMART Start Number if (!has_frontpage) { question_number = smart_start_question_number; } for (int i = 1; i < n; i++) { // init found_question_number String found_question_number = null; boolean found_match = false; // PdfDictionary page = reader.getPageN(i); // use location based strategy out.println("Page " + i); out.println("==========="); // out.println(PdfTextExtractor.getTextFromPage(reader, i, strategy)); String content = PdfTextExtractor.getTextFromPage(reader, i); // DEBUG: Uncomment below .. // out.println(content); Matcher liberal_uno_matcher = liberal_found_question_pattern_uno.matcher(content); if (liberal_uno_matcher.find()) { out.println("Matched UNO!"); found_match = true; Matcher matcher = pattern_uno.matcher(content); // Loop to find the digit; it is possible it is not found an dleft as null .. while (matcher.find()) { // Extract the question number based on backreference found_question_number = matcher.group(1); // How will it look when using a different strategy? out.println("Matched " + matcher.group(0) + " and Question Number: " + found_question_number); } } else if (liberal_found_question_pattern_dos.matcher(content).find()) { if ("0-intro".equals(question_number)) { out.println("SMART!!!"); } else { found_match = true; out.println("Matched DOS!"); Matcher matcher = pattern_dos.matcher(content); // Loop to find the digit; it is possible it is not found an dleft as null .. while (matcher.find()) { // Extract the question number based on backreference found_question_number = matcher.group(1); // How will it look when using a different strategy? out.println( "Matched " + matcher.group(0) + " and Question Number: " + found_question_number); } } } // If matched; take out the last start, end if (found_match) { // copy page over and write it down .. end_page = i - 1; if (end_page < 1) { end_page = 1; } if (null == found_question_number) { if ("0-intro".equals(question_number)) { // After intro; if got problem; try the smart start found_question_number = smart_start_question_number; out.println("First question could not determine number; using Q No. => " + found_question_number); // Print out content to debug out.println("*****DEBUG Content*******"); out.println(content); } else { // otherwise; use current question and just append Unix timestamp .. found_question_number = question_number + "_" + (System.currentTimeMillis() / 1000L); out.println( "Unexpectedly could not determine number; using Q No. => " + found_question_number); // Print out content to debug out.println("*****DEBUG Content*******"); out.println(content); } } // Write based on previous confirmed question_number ITextBlast.copySelectedQuestionPage(start_page, end_page, question_number); // re-set to current page start_page = i; end_page = i; question_number = found_question_number; } // out.println(PdfTextExtractor.getTextFromPage(reader, i)); // Pattern RegExp: #^.*NO.*SOALAN.*(\d)+$#im out.println(); out.println(); // use helper file to dump out // Look out for pattern "NO. SOALAN" // Once see pattern or reach end; snip off copy from start to end // reset start/end // else increase the end } // If end of the loop there are still straglers; mark with the special question_number = 999 if (start_page <= end_page) { // Should always happen actually .. ITextBlast.copySelectedQuestionPage(start_page, end_page, question_number); } reader.close(); }
From source file:itextblast.ITextBlast.java
public static void splitByPage(String[] args) throws IOException, DocumentException { // use one of the previous examples to create a PDF // new MovieTemplates().createPdf(MovieTemplates.RESULT); // Create a reader; from current existing file // Next time pass it from args .. PdfReader reader = new PdfReader("./source/imokman.pdf"); // We'll create as many new PDFs as there are pages Document document;// ww w .java 2 s.c o m PdfCopy copy; // loop over all the pages in the original PDF int n = reader.getNumberOfPages(); for (int i = 0; i < n;) { // step 1 document = new Document(); // step 2 copy = new PdfCopy(document, new FileOutputStream(String.format(RESULT, ++i))); // step 3 document.open(); // step 4 copy.addPage(copy.getImportedPage(reader, i)); // step 5 document.close(); } reader.close(); }
From source file:jasperSoft.MergePDF.java
/** * // w w w.j av a2 s. co m * @param streamOfPDFFiles * @param outputStream * @param paginate */ public static void concatPDFs(List<InputStream> streamOfPDFFiles, OutputStream outputStream, boolean paginate) { Document document = new Document(); try { List<InputStream> pdfs = streamOfPDFFiles; List<PdfReader> readers = new ArrayList<PdfReader>(); int totalPages = 0; Iterator<InputStream> iteratorPDFs = pdfs.iterator(); // Create Readers for the pdfs. while (iteratorPDFs.hasNext()) { InputStream pdf = iteratorPDFs.next(); PdfReader pdfReader = new PdfReader(pdf); readers.add(pdfReader); totalPages += pdfReader.getNumberOfPages(); } // Create a writer for the outputstream PdfWriter writer = PdfWriter.getInstance(document, outputStream); document.open(); BaseFont bf = BaseFont.createFont(BaseFont.HELVETICA, BaseFont.CP1252, BaseFont.NOT_EMBEDDED); PdfContentByte cb = writer.getDirectContent(); // Holds the PDF // data PdfImportedPage page; int currentPageNumber = 0; int pageOfCurrentReaderPDF = 0; Iterator<PdfReader> iteratorPDFReader = readers.iterator(); // Loop through the PDF files and add to the output. while (iteratorPDFReader.hasNext()) { PdfReader pdfReader = iteratorPDFReader.next(); // Create a new page in the target for each source page. while (pageOfCurrentReaderPDF < pdfReader.getNumberOfPages()) { document.newPage(); pageOfCurrentReaderPDF++; currentPageNumber++; page = writer.getImportedPage(pdfReader, pageOfCurrentReaderPDF); cb.addTemplate(page, 0, 0); // Code for pagination. if (paginate) { cb.beginText(); cb.setFontAndSize(bf, 9); cb.showTextAligned(PdfContentByte.ALIGN_CENTER, "" + currentPageNumber + " of " + totalPages, 520, 5, 0); cb.endText(); } } pageOfCurrentReaderPDF = 0; } outputStream.flush(); document.close(); outputStream.close(); } catch (Exception e) { e.printStackTrace(); } finally { if (document.isOpen()) { document.close(); } try { if (outputStream != null) { outputStream.close(); } } catch (IOException ioe) { ioe.printStackTrace(); } } }
From source file:jp.nichicom.ndk.affair.nu.nu001.NU001.java
License:Open Source License
/** * ???//from w w w. j a va2 s.c om * @param e * @throws Exception ? */ protected void compareActionPerformed(ActionEvent e) throws Exception { // if (!new File(getPdf1().getText()).exists()) { ACMessageBox.showExclamation("PDF1?????"); return; } if (!new File(getPdf2().getText()).exists()) { ACMessageBox.showExclamation("PDF2?????"); return; } final PdfReader p1 = new PdfReader(getPdf1().getText()); final PdfReader p2 = new PdfReader(getPdf2().getText()); final int pages1 = p1.getNumberOfPages(); final int pages2 = p2.getNumberOfPages(); if (pages1 != pages2) { if (pages1 > pages2) { ACMessageBox.showExclamation("PDF1PDF2??????"); return; } if (ACMessageBox.showOkCancel("PDF?????" + ACConstants.LINE_SEPARATOR + "PDF1??????PDF2?????????") != ACMessageBox.RESULT_OK) { return; } } new Runnable() { private boolean firstLock; public void run() { final int stopKeyCode = KeyEvent.VK_CAPS_LOCK; firstLock = Toolkit.getDefaultToolkit().getLockingKeyState(stopKeyCode); LinkedList<Integer> pages = new LinkedList<Integer>(); for (int i = 1; i <= pages2; i++) { pages.add(new Integer(i)); } errorCount = 0; pageOfProcessed = 0; pageOfCount = pages1; StringBuilder sb = new StringBuilder(); for (int i = 1; i <= pages1; i++) { try { boolean match = false; byte[] b1 = p1.getPageContent(i); Iterator<Integer> it = pages.iterator(); while (it.hasNext()) { int page = ((Integer) it.next()).intValue(); byte[] b2 = p2.getPageContent(page); if (Arrays.equals(b1, b2)) { sb.append("PDF1(" + i + ") = PDF2(" + page + ")" + ACConstants.LINE_SEPARATOR); match = true; it.remove(); break; } } if (!match) { sb.append("PDF1(" + i + ") ???" + ACConstants.LINE_SEPARATOR); errorCount++; } } catch (Exception ex) { sb.append("PDF1(" + i + ") ??" + ACConstants.LINE_SEPARATOR); errorCount++; } setProgress(sb); pageOfProcessed = i; if (Toolkit.getDefaultToolkit().getLockingKeyState(stopKeyCode) != firstLock) { getResult().setText( "?(" + pageOfProcessed + " / " + pageOfCount + ") / ?:" + errorCount + " " + ACConstants.LINE_SEPARATOR + sb.toString()); return; } } getResult().setText("(" + pageOfProcessed + " / " + pageOfCount + ") / ?:" + errorCount + " " + ACConstants.LINE_SEPARATOR + sb.toString()); } }.run(); }