Example usage for com.itextpdf.text.pdf PdfReader getNumberOfPages

List of usage examples for com.itextpdf.text.pdf PdfReader getNumberOfPages

Introduction

In this page you can find the example usage for com.itextpdf.text.pdf PdfReader getNumberOfPages.

Prototype

public int getNumberOfPages() 

Source Link

Document

Gets the number of pages in the document.

Usage

From source file:gravabncertificado007.CarimboCertificado.java

public void aplicaCariboGedi(String BN, String caminhoarquivo)
        throws DocumentException, IOException, RuntimeException {

    PdfReader.unethicalreading = true;/*from www  .  j  ava 2s.  c  o  m*/
    //Cria o reader para o primeiro PDF

    PdfReader reader = new PdfReader(caminhoarquivo);
    Rectangle psize = reader.getPageSize(1);
    float width = psize.getWidth();
    float height = psize.getHeight();

    Document document = new Document(new Rectangle(width, height));

    PdfWriter writer = PdfWriter.getInstance(document,
            new FileOutputStream(caminhoarquivo.substring(0, caminhoarquivo.length() - 4) + "-G.pdf"));

    document.open();

    int i = 0;
    BN = BN.substring(BN.length() - 13, BN.length() - 4);
    PdfContentByte cb = writer.getDirectContent();
    while (i < reader.getNumberOfPages()) {
        i++;

        document.newPage();

        PdfContentByte under = writer.getDirectContentUnder();
        PdfImportedPage page1 = writer.getImportedPage(reader, i);
        cb.addTemplate(page1, 0, i * 0.2f);

        BaseFont bf = BaseFont.createFont(BaseFont.COURIER_BOLD, BaseFont.CP1250, BaseFont.NOT_EMBEDDED);
        cb.beginText();
        cb.setFontAndSize(bf, 14);
        cb.showTextAligned(PdfContentByte.ALIGN_CENTER, "  _________________  ", width / 6, 44, 0);
        cb.showTextAligned(PdfContentByte.ALIGN_CENTER, " |                 |", width / 6, 32, 0);
        cb.showTextAligned(PdfContentByte.ALIGN_CENTER, " |                 |", width / 6, 22, 0);
        cb.showTextAligned(PdfContentByte.ALIGN_CENTER, " |Copia Controlada |", width / 6, 28, 0);
        cb.showTextAligned(PdfContentByte.ALIGN_CENTER, " |                 |", width / 6, 16, 0);
        cb.showTextAligned(PdfContentByte.ALIGN_CENTER, " |                 |", width / 6, 12, 0);
        cb.showTextAligned(PdfContentByte.ALIGN_CENTER, " |_________________|", width / 6, 14, 0);
        cb.endText();
    }

    document.close();
    writer.close();
    reader.close();

}

From source file:helper.PdfText.java

License:Apache License

/**
 * @param pdfFile this file will be extracted.
 * @return the plain text of the pdf/*from   w w w . j  a  v a  2 s. c o  m*/
 */
public String itext(File pdfFile) {

    PdfReader reader;
    try {
        reader = new PdfReader(pdfFile.getAbsolutePath());
        PdfReaderContentParser parser = new PdfReaderContentParser(reader);
        StringBuffer buf = new StringBuffer();
        TextExtractionStrategy strategy;
        for (int i = 1; i <= reader.getNumberOfPages(); i++) {
            strategy = parser.processContent(i, new SimpleTextExtractionStrategy());
            buf.append(strategy.getResultantText());
        }

        return buf.toString();
    } catch (IOException e) {
        throw new HttpArchiveException(500, e);
    }

}

From source file:hsa.awp.admingui.report.printer.MergePDFUtil.java

License:Open Source License

/**
 * merges PDF's./*w ww.j a v  a2  s  . c o m*/
 *
 * @param streamOfPDFFiles pdf files as inputstreams.
 * @param outputStream     outputstream where the pdf is written.
 * @param paginate         true if page numbers should be displayed.
 */
public void concatPDFs(List<InputStream> streamOfPDFFiles, OutputStream outputStream, boolean paginate) {
    Document document = new Document();
    try {
        List<PdfReader> readers = createPdfReaders(streamOfPDFFiles);
        int totalPages = countAllPages(readers);

        // Create a writer for the outputstream
        writer = PdfWriter.getInstance(document, outputStream);

        document.open();
        cb = getContentByte(writer);

        // Loop through the PDF files and add to the output.
        for (PdfReader pdfReader : readers) {
            int pageOfCurrentReaderPDF = 0;
            // Create a new page in the target for each source page.
            while (pageOfCurrentReaderPDF < pdfReader.getNumberOfPages()) {
                int currentPageNumber = 0;

                document.newPage();
                pageOfCurrentReaderPDF++;
                currentPageNumber++;
                PdfImportedPage page = writer.getImportedPage(pdfReader, pageOfCurrentReaderPDF);
                cb.addTemplate(page, 0, 0);

                // Code for pagination.
                if (paginate) {
                    addPagination(totalPages, currentPageNumber);
                }
            }
            pageOfCurrentReaderPDF = 0;
        }
        outputStream.flush();
        document.close();
        outputStream.close();
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        if (document.isOpen()) {
            document.close();
        }
        try {
            if (outputStream != null) {
                outputStream.close();
            }
        } catch (IOException ioe) {
            ioe.printStackTrace();
        }
    }
}

From source file:hsa.awp.admingui.report.printer.MergePDFUtil.java

License:Open Source License

private int countAllPages(List<PdfReader> readers) {
    int pages = 0;
    for (PdfReader reader : readers) {
        pages += reader.getNumberOfPages();
    }//from  w ww.ja v  a  2s .  com

    return pages;
}

From source file:integrator.Pdf.java

/**
 * Parses a PDF to a plain text file.//from w ww .j av a  2  s.  c  o  m
 * @param pdf the original PDF
 * @param txt the resulting text
 * @throws IOException
 */
public void parsePdf(String pdf, String txt) throws IOException {
    PdfReader reader = new PdfReader(pdf);
    PdfReaderContentParser parser = new PdfReaderContentParser(reader);
    PrintWriter out = new PrintWriter(new FileOutputStream(txt));
    TextExtractionStrategy strategy;
    for (int i = 1; i <= reader.getNumberOfPages(); i++) {
        strategy = parser.processContent(i, new SimpleTextExtractionStrategy());
        out.println(strategy.getResultantText());
    }
    out.flush();
    out.close();
    reader.close();
}

From source file:io.github.jonestimd.finance.file.pdf.TextExtractor.java

License:Open Source License

public TextExtractor(InputStream is) throws IOException {
    PdfReader pdfReader = new PdfReader(is);
    PdfReaderContentParser parser = new PdfReaderContentParser(pdfReader);
    int pages = pdfReader.getNumberOfPages();
    for (int i = 1; i <= pages; i++) {
        ImportRenderListener renderListener = new ImportRenderListener();
        parser.processContent(i, renderListener);
        pageText.add(renderListener.text);
    }//from w  w w  .  j a v  a 2  s  .c o  m
}

From source file:itextblast.ITextBlast.java

private static void processQAFile(String qa_filename, Boolean has_frontpage)
        throws IOException, DocumentException {

    // use one of the previous examples to create a PDF
    // new MovieTemplates().createPdf(MovieTemplates.RESULT);
    // Create a reader; from current existing file
    // Next time pass it from args ..
    PdfReader reader = new PdfReader(String.format(ITextBlast.working_dir + SOURCE, qa_filename));
    ITextBlast.my_reader = reader;//w  w w . j  a va2s. co  m
    // We'll create as many new PDFs as there are pages
    // Document document;
    // PdfCopy copy;
    // loop over all the pages in the original PDF
    int n = reader.getNumberOfPages();
    // For test of extraction and regexp; use first 5 pages ..
    // n = 15;
    // Text Extraction Strategy here ...
    // LocationTextExtractionStrategy strategy = new LocationTextExtractionStrategy();
    // SimpleTextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
    // Both ^ does not work well; weird behavior ... no need so clever ..
    // START SMART Start Number ********
    Pattern smart_start_pattern;
    smart_start_pattern = Pattern.compile(".*?SOALAN.*?N.*?O.*?(\\d+)\\b+.*", Pattern.CASE_INSENSITIVE);
    // Extract cover page number as smartly as possible??
    String cover_page_content = PdfTextExtractor.getTextFromPage(reader, 1);
    Matcher smart_start_matcher = smart_start_pattern.matcher(cover_page_content);
    String smart_start_question_number = null;
    if (smart_start_matcher.find()) {
        // Extract the question number based on backreference
        smart_start_question_number = smart_start_matcher.group(1);
        // How will it look when using a different strategy?
        out.println("Matched " + smart_start_matcher.group(0) + " and SMART Start Number: "
                + smart_start_question_number);
    }
    // END SMART Start Number ********
    Pattern liberal_found_question_pattern_uno;
    liberal_found_question_pattern_uno = Pattern.compile(".*N.*O.*SOALAN.*", Pattern.CASE_INSENSITIVE);
    Pattern liberal_found_question_pattern_dos = Pattern.compile(".*SOALAN.*N.*O.*", Pattern.CASE_INSENSITIVE);
    Pattern pattern_uno;
    // pattern = Pattern.compile("^.*NO.*SOALAN.*?(\\d+).*$", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
    // pattern = Pattern.compile(".*SOALAN.*?(\\d+).*", Pattern.CASE_INSENSITIVE);
    pattern_uno = Pattern.compile(".*N.*O.*SOALAN.*?(\\d+)\\b+.*", Pattern.CASE_INSENSITIVE);
    Pattern pattern_dos = Pattern.compile(".*SOALAN.*N.*O.*?(\\d+)\\b+.*", Pattern.CASE_INSENSITIVE);
    // OPTION 2 is to try with the next available number between word boundaries .. but may then need non-greedy ..
    // Init start and end page
    int start_page = 1;
    int end_page = 1;
    String question_number = "0-intro";

    // This is for SOALAN LISAN; which has no Front Page
    // the Start Question Number should then be set to SMART Start Number
    if (!has_frontpage) {
        question_number = smart_start_question_number;
    }

    for (int i = 1; i < n; i++) {
        // init found_question_number
        String found_question_number = null;
        boolean found_match = false;
        // PdfDictionary page = reader.getPageN(i);
        // use location based strategy
        out.println("Page " + i);
        out.println("===========");
        // out.println(PdfTextExtractor.getTextFromPage(reader, i, strategy));
        String content = PdfTextExtractor.getTextFromPage(reader, i);
        // DEBUG: Uncomment below ..
        // out.println(content);
        Matcher liberal_uno_matcher = liberal_found_question_pattern_uno.matcher(content);
        if (liberal_uno_matcher.find()) {
            out.println("Matched UNO!");
            found_match = true;
            Matcher matcher = pattern_uno.matcher(content);
            // Loop to find the digit; it is possible it is not found an dleft as null ..
            while (matcher.find()) {
                // Extract the question number based on backreference
                found_question_number = matcher.group(1);
                // How will it look when using a different strategy?
                out.println("Matched " + matcher.group(0) + " and Question Number: " + found_question_number);
            }
        } else if (liberal_found_question_pattern_dos.matcher(content).find()) {
            if ("0-intro".equals(question_number)) {
                out.println("SMART!!!");
            } else {
                found_match = true;
                out.println("Matched DOS!");
                Matcher matcher = pattern_dos.matcher(content);
                // Loop to find the digit; it is possible it is not found an dleft as null ..
                while (matcher.find()) {
                    // Extract the question number based on backreference
                    found_question_number = matcher.group(1);
                    // How will it look when using a different strategy?
                    out.println(
                            "Matched " + matcher.group(0) + " and Question Number: " + found_question_number);
                }

            }
        }
        // If matched; take out the last start, end 
        if (found_match) {
            // copy page over and write it down ..
            end_page = i - 1;
            if (end_page < 1) {
                end_page = 1;
            }
            if (null == found_question_number) {
                if ("0-intro".equals(question_number)) {
                    // After intro; if got problem; try the smart start
                    found_question_number = smart_start_question_number;
                    out.println("First question could not determine number; using Q No. => "
                            + found_question_number);
                    // Print out content to debug
                    out.println("*****DEBUG Content*******");
                    out.println(content);
                } else {
                    // otherwise; use current question and just append Unix timestamp ..
                    found_question_number = question_number + "_" + (System.currentTimeMillis() / 1000L);
                    out.println(
                            "Unexpectedly could not determine number; using Q No. => " + found_question_number);
                    // Print out content to debug
                    out.println("*****DEBUG Content*******");
                    out.println(content);
                }
            }
            // Write based on previous confirmed question_number
            ITextBlast.copySelectedQuestionPage(start_page, end_page, question_number);
            // re-set to current page
            start_page = i;
            end_page = i;
            question_number = found_question_number;
        }
        // out.println(PdfTextExtractor.getTextFromPage(reader, i));
        // Pattern RegExp:  #^.*NO.*SOALAN.*(\d)+$#im
        out.println();
        out.println();
        // use helper file to dump out        
        // Look out for pattern  "NO. SOALAN"
        // Once see pattern or reach end; snip off copy from start to end
        // reset start/end
        // else increase the end
    }
    // If end of the loop there are still straglers; mark with the special question_number = 999
    if (start_page <= end_page) {
        // Should always happen actually ..
        ITextBlast.copySelectedQuestionPage(start_page, end_page, question_number);
    }
    reader.close();
}

From source file:itextblast.ITextBlast.java

public static void splitByPage(String[] args) throws IOException, DocumentException {

    // use one of the previous examples to create a PDF
    // new MovieTemplates().createPdf(MovieTemplates.RESULT);
    // Create a reader; from current existing file
    // Next time pass it from args ..
    PdfReader reader = new PdfReader("./source/imokman.pdf");
    // We'll create as many new PDFs as there are pages
    Document document;// ww  w  .java 2 s.c  o m
    PdfCopy copy;
    // loop over all the pages in the original PDF
    int n = reader.getNumberOfPages();
    for (int i = 0; i < n;) {
        // step 1
        document = new Document();
        // step 2
        copy = new PdfCopy(document, new FileOutputStream(String.format(RESULT, ++i)));
        // step 3
        document.open();
        // step 4
        copy.addPage(copy.getImportedPage(reader, i));
        // step 5
        document.close();
    }
    reader.close();
}

From source file:jasperSoft.MergePDF.java

/**
 * // w w  w.j av  a2 s. co m
 * @param streamOfPDFFiles
 * @param outputStream
 * @param paginate 
 */
public static void concatPDFs(List<InputStream> streamOfPDFFiles, OutputStream outputStream, boolean paginate) {

    Document document = new Document();
    try {
        List<InputStream> pdfs = streamOfPDFFiles;
        List<PdfReader> readers = new ArrayList<PdfReader>();
        int totalPages = 0;
        Iterator<InputStream> iteratorPDFs = pdfs.iterator();

        // Create Readers for the pdfs.
        while (iteratorPDFs.hasNext()) {
            InputStream pdf = iteratorPDFs.next();
            PdfReader pdfReader = new PdfReader(pdf);
            readers.add(pdfReader);
            totalPages += pdfReader.getNumberOfPages();
        }
        // Create a writer for the outputstream
        PdfWriter writer = PdfWriter.getInstance(document, outputStream);

        document.open();
        BaseFont bf = BaseFont.createFont(BaseFont.HELVETICA, BaseFont.CP1252, BaseFont.NOT_EMBEDDED);
        PdfContentByte cb = writer.getDirectContent(); // Holds the PDF
        // data

        PdfImportedPage page;
        int currentPageNumber = 0;
        int pageOfCurrentReaderPDF = 0;
        Iterator<PdfReader> iteratorPDFReader = readers.iterator();

        // Loop through the PDF files and add to the output.
        while (iteratorPDFReader.hasNext()) {
            PdfReader pdfReader = iteratorPDFReader.next();

            // Create a new page in the target for each source page.
            while (pageOfCurrentReaderPDF < pdfReader.getNumberOfPages()) {
                document.newPage();
                pageOfCurrentReaderPDF++;
                currentPageNumber++;
                page = writer.getImportedPage(pdfReader, pageOfCurrentReaderPDF);
                cb.addTemplate(page, 0, 0);

                // Code for pagination.
                if (paginate) {
                    cb.beginText();
                    cb.setFontAndSize(bf, 9);
                    cb.showTextAligned(PdfContentByte.ALIGN_CENTER,
                            "" + currentPageNumber + " of " + totalPages, 520, 5, 0);
                    cb.endText();
                }
            }
            pageOfCurrentReaderPDF = 0;
        }
        outputStream.flush();
        document.close();
        outputStream.close();
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        if (document.isOpen()) {
            document.close();
        }
        try {
            if (outputStream != null) {
                outputStream.close();
            }
        } catch (IOException ioe) {
            ioe.printStackTrace();
        }
    }
}

From source file:jp.nichicom.ndk.affair.nu.nu001.NU001.java

License:Open Source License

/**
 * ???//from  w w  w.  j a  va2 s.c  om
 * @param e 
 * @throws Exception ?
 */
protected void compareActionPerformed(ActionEvent e) throws Exception {
    // 

    if (!new File(getPdf1().getText()).exists()) {
        ACMessageBox.showExclamation("PDF1?????");
        return;
    }
    if (!new File(getPdf2().getText()).exists()) {
        ACMessageBox.showExclamation("PDF2?????");
        return;
    }

    final PdfReader p1 = new PdfReader(getPdf1().getText());
    final PdfReader p2 = new PdfReader(getPdf2().getText());
    final int pages1 = p1.getNumberOfPages();
    final int pages2 = p2.getNumberOfPages();

    if (pages1 != pages2) {
        if (pages1 > pages2) {
            ACMessageBox.showExclamation("PDF1PDF2??????");
            return;
        }
        if (ACMessageBox.showOkCancel("PDF?????" + ACConstants.LINE_SEPARATOR
                + "PDF1??????PDF2?????????") != ACMessageBox.RESULT_OK) {
            return;
        }
    }

    new Runnable() {
        private boolean firstLock;

        public void run() {
            final int stopKeyCode = KeyEvent.VK_CAPS_LOCK;
            firstLock = Toolkit.getDefaultToolkit().getLockingKeyState(stopKeyCode);

            LinkedList<Integer> pages = new LinkedList<Integer>();
            for (int i = 1; i <= pages2; i++) {
                pages.add(new Integer(i));
            }

            errorCount = 0;
            pageOfProcessed = 0;
            pageOfCount = pages1;
            StringBuilder sb = new StringBuilder();

            for (int i = 1; i <= pages1; i++) {
                try {
                    boolean match = false;
                    byte[] b1 = p1.getPageContent(i);
                    Iterator<Integer> it = pages.iterator();
                    while (it.hasNext()) {
                        int page = ((Integer) it.next()).intValue();
                        byte[] b2 = p2.getPageContent(page);
                        if (Arrays.equals(b1, b2)) {
                            sb.append("PDF1(" + i + ") = PDF2(" + page + ")" + ACConstants.LINE_SEPARATOR);
                            match = true;
                            it.remove();
                            break;
                        }
                    }
                    if (!match) {
                        sb.append("PDF1(" + i + ") ???" + ACConstants.LINE_SEPARATOR);
                        errorCount++;
                    }
                } catch (Exception ex) {
                    sb.append("PDF1(" + i + ") ??" + ACConstants.LINE_SEPARATOR);
                    errorCount++;
                }

                setProgress(sb);
                pageOfProcessed = i;

                if (Toolkit.getDefaultToolkit().getLockingKeyState(stopKeyCode) != firstLock) {
                    getResult().setText(
                            "?(" + pageOfProcessed + " / " + pageOfCount + ") / ?:"
                                    + errorCount + " " + ACConstants.LINE_SEPARATOR + sb.toString());
                    return;
                }

            }

            getResult().setText("(" + pageOfProcessed + " / " + pageOfCount + ") / ?:"
                    + errorCount + " " + ACConstants.LINE_SEPARATOR + sb.toString());

        }

    }.run();

}