Example usage for com.itextpdf.text.pdf PdfReader getNumberOfPages

List of usage examples for com.itextpdf.text.pdf PdfReader getNumberOfPages

Introduction

In this page you can find the example usage for com.itextpdf.text.pdf PdfReader getNumberOfPages.

Prototype

public int getNumberOfPages() 

Source Link

Document

Gets the number of pages in the document.

Usage

From source file:bflows.FattureManagement.java

public void processPDF() {
    //        Document pdf = null;
    BufferedWriter writer = null;
    consumi = new ArrayList<Consumo>();
    lines = new ArrayList<String>();

    try {/*w  w  w. jav a  2s  .c o m*/
        // Salvo file temporaneo per debugging
        //outputFile = new File("C:\\Users\\nklma\\Documents\\NetBeansProjects\\temp", "temp.txt"); 
        //writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputFile)));            
        //FileOutputStream fileOutputStream = new FileOutputStream("extracted.txt");

        // iText Library      
        PdfReader pdfReader = new PdfReader(inputStream);
        for (int page = 1; page <= pdfReader.getNumberOfPages(); page++) {
            SimpleTextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
            String currentText = PdfTextExtractor.getTextFromPage(pdfReader, page, strategy);
            String[] l = currentText.split("\n");
            for (int i = 0; i < l.length; i++)
                lines.add(l[i]);
        }
        pdfReader.close();

        //for(String line : lines)
        //{
        //   writer.write(line);
        //   writer.newLine();
        //}

        //writer.close();    

        boolean startPointFound = false;
        boolean dateFound = false;
        boolean totaleFound = false;
        boolean contributiFound = false;
        boolean prodottiFound = false;
        boolean altriFound = false;
        boolean ivaFound = false;

        Consumo consumo = null;

        for (String line : lines) {
            //recupero data fattura
            if (!dateFound) {
                if (line.contains("Emessa")) {
                    String cleanLine = line.replaceAll("\\s+", " ");
                    String[] splitted = cleanLine.split(" ");
                    for (String s : splitted) {
                        if (s.contains("/")) {
                            s = s.replace("/", "-");
                            data = s;
                        }
                    }
                    dateFound = true;
                }
            }

            //recupero totale fattura con iva
            if (!totaleFound) {
                if (line.contains("IMPORTO")) {
                    String cleanLine = line.replaceAll("\\s+", " ").replaceAll("_", "");
                    String importo = cleanLine.replace("IMPORTO: ", "").replace("Euro", "").trim();
                    totale = Double.parseDouble(importo.replace(".", "").replace(",", "."));
                    totaleFound = true;
                }
            }

            //recupero importo contributi e abbonamenti
            if (!contributiFound) {
                if (line.contains("CONTRIBUTI E ABBONAMENTI")) {
                    String cleanLine = line.replaceAll("\\s+", " ");
                    String importo = cleanLine.replace("CONTRIBUTI E ABBONAMENTI ", "");
                    contributi = Double.parseDouble(importo.replace(".", "").replace(",", "."));
                    contributiFound = true;
                }
            }

            //recupero importo prodotti (noleggi) 
            // SOLO PER 2017+            
            if (data != null && Integer.parseInt(data.split("-")[2]) >= 2017 && !prodottiFound) {
                if (line.contains("PRODOTTI")) {
                    String cleanLine = line.replaceAll("\\s+", " ");
                    String importo = cleanLine.replace("PRODOTTI ", "");
                    prodotti = Double.parseDouble(importo.replace(".", "").replace(",", "."));
                    prodottiFound = true;
                }
            }

            //recupero importo altri addebiti e accrediti
            if (!altriFound) {
                if (line.contains("ALTRI ADDEBITI E ACCREDITI")) {
                    String cleanLine = line.replaceAll("\\s+", " ");
                    String importo = cleanLine.replace("ALTRI ADDEBITI E ACCREDITI ", "");
                    altri = Double.parseDouble(importo.replace(".", "").replace(",", "."));
                    altriFound = true;
                }
            }

            //recupero importo IVA
            if ((contributiFound || altriFound) && !ivaFound) // in questo modo si evitano match con "partita iva" ecc
            {
                if (line.contains("IVA")) {
                    String cleanLine = line.replaceAll("\\s+", " ");
                    String importo = cleanLine.replace("IVA ", "");
                    iva = Double.parseDouble(importo.replace(".", "").replace(",", "."));
                    ivaFound = true;
                }
            }

            //Il primo RIEPILOGO PER UTENZA segna l'inizio della tabella dei consumi da analizzare
            if (!startPointFound && line.contains("RIEPILOGO PER UTENZA"))
                startPointFound = !startPointFound;

            if (!startPointFound)
                continue;

            //SERVIZI OPZIONALI segna la fine della tabella
            if (line.matches("SERVIZI OPZIONALI")) {
                consumi.add(consumo);
                return;
            }

            if (Integer.parseInt(data.split("-")[2]) >= 2017) {
                // ------------------------------
                // PER FATTURE SUCCESSIVE AL 2017
                // ------------------------------
                ArrayList<String> splitted;
                splitted = StringMatcher.matches(line, "\\bLinea\\b\\s((\\d{10}))");
                //if(line.matches("(?:(?:Linea)\\s)(\\d{10})"))
                if (!splitted.isEmpty()) {
                    // Nuovo consumo
                    if (consumo != null) //salvo la precedente
                    {
                        consumi.add(consumo);
                    }
                    //creo un nuovo consumo
                    consumo = new Consumo();
                    StringBuilder str = new StringBuilder(splitted.get(0));
                    str.insert(3, "-");
                    consumo.Telefono = str.toString();
                }

                splitted = StringMatcher.matches(line,
                        "((?:\\w+\\s|\\w+-\\w+\\s)+)(?:\\d{2}\\/\\d{2}\\/\\d{4}\\s)((?:\\D+\\s)+)(?:\\d{2}\\/\\d{2}-\\d{2}\\/\\d{2}\\s)((\\d+,\\d+))$");
                //if(line.matches("(?:(?:(?:\w+\s)+\w+\-)?(?:\w+\s)+)(?:\d{2}\/\d{2}\/\d{4})\s((?:\w+\s)+)(?:\d{2}\/\d{2}\-\d{2}\/\d{2})\s(\d+,\d+)"))
                if (!splitted.isEmpty()) {
                    // Contributi o abbonamenti

                    if (consumo != null) {
                        String lel = splitted.get(1);
                        if (splitted.get(1).contains("Contributi"))
                            consumo.CRB += Double.parseDouble(splitted.get(2).replace(",", "."));
                        else if (splitted.get(1).contains("Abbonamenti"))
                            consumo.ABB += Double.parseDouble(splitted.get(2).replace(",", "."));
                    }
                }

                splitted = StringMatcher.matches(line,
                        "\\bRicariche\\b(?:\\s\\w+)+(?:\\s\\d\\s)((\\d+,\\d+))$");
                if (!splitted.isEmpty())
                //if(line.matches("(Ricariche(?:\\s\\w+)+)(\\s\\d\\s)(\\d+,\\d+)"))
                {
                    // Ricariche
                    //splitted = SplitLine.splitNewRicarica(line);

                    if (consumo != null) {
                        consumo.AAA += Double.parseDouble(splitted.get(0).replace(",", "."));
                    }
                }

                splitted = StringMatcher.matches(line, "\\bTotale\\b\\s((\\d+,\\d+))$");
                //if(line.matches("(Totale\\s+)(\\d+,\\d+)"))
                if (!splitted.isEmpty()) {
                    // Totale
                    //splitted = SplitLine.splitNewTotale(line);

                    if (consumo != null) {
                        consumo.Totale += Double.parseDouble(splitted.get(0).replace(",", "."));
                    }
                }
            } else {
                // ------------------------------
                // PER FATTURE PRECEDENTI AL 2017
                // ------------------------------

                // Linea e consumo
                if (line.matches("(\\d{3}(\\s+)?-(\\s+)?\\d{7})((?:\\s+)(?:\\w+\\s+)+)(\\d+,\\d+)"))
                // (3 digits)(optional whitespaces)-(optional whitespaces)(7 digits)
                // (any number of whitespaces)(any number of words followed by whitespace)(1+ digits),(1+digits)
                {
                    //elimino gli spazi nel numero di telefono
                    line = line.replace(" - ", "-");

                    //se entro qui significa che inizia un consumo
                    ArrayList<String> splitted = SplitLine.splitConsumo1(line);

                    //esiste un consumo con lo stesso numero quindi i dati vanno aggiunti
                    if (consumo != null && splitted.get(0).replaceAll("\\s+", "").equals(consumo.Telefono)) // il continuo del precedente
                    {
                        if (splitted.get(1).contains("Contributi"))
                            consumo.CRB = Double.parseDouble(splitted.get(2).replace(",", "."));
                        else if (splitted.get(1).contains("Altri"))
                            consumo.AAA = Double.parseDouble(splitted.get(2).replace(",", "."));
                        else if (splitted.get(1).contains("Abbonamenti"))
                            consumo.ABB = Double.parseDouble(splitted.get(2).replace(",", "."));
                    } else {
                        //non esiste un consumo con il numero letto
                        if (consumo != null) //salvo la precedente
                        {
                            consumi.add(consumo);
                        }
                        //creo un nuovo consumo
                        consumo = new Consumo();
                        consumo.Telefono = splitted.get(0);

                        if (splitted.get(1).contains("Contributi"))
                            consumo.CRB = Double.parseDouble(splitted.get(2).replace(",", "."));
                        else if (splitted.get(1).contains("Altri"))
                            consumo.AAA = Double.parseDouble(splitted.get(2).replace(",", "."));
                        else if (splitted.get(1).contains("Abbonamenti"))
                            consumo.ABB = Double.parseDouble(splitted.get(2).replace(",", "."));
                    }
                }

                if (line.matches("((?:\\w+\\s+)+)(\\d+,\\d+)"))//(any number of words followed by whitespaces)(1+ digits),(1+ digits)
                {
                    //continua la fattura precedente                    
                    ArrayList<String> splitted = SplitLine.splitConsumo2(line);

                    if (consumo != null) {
                        if (splitted.get(0).contains("Contributi"))
                            consumo.CRB = Double.parseDouble(splitted.get(1).replace(",", "."));
                        else if (splitted.get(0).contains("Altri"))
                            consumo.AAA = Double.parseDouble(splitted.get(1).replace(",", "."));
                        else if (splitted.get(0).contains("Abbonamenti"))
                            consumo.ABB = Double.parseDouble(splitted.get(1).replace(",", "."));
                        else if (splitted.get(0).contains("Totale"))
                            consumo.Totale = Double.parseDouble(splitted.get(1).replace(",", "."));
                    }
                }
            }

        }

        //outputFile.delete();
    } catch (IOException ex) {
        EService.logAndRecover(ex);
        setResult(EService.UNRECOVERABLE_ERROR);
        setErrorMessage("FattureManagement.ProcessPDF(): " + ex.getMessage());
    } catch (NumberFormatException ex) {
        EService.logAndRecover((FatalError) ex);
        setResult(EService.UNRECOVERABLE_ERROR);
        setErrorMessage("FattureManagement.ProcessPDF(): " + ex.getMessage());
    }
}

From source file:bpmlab.invioscript.ConstruirQualis.java

public static List<String> primeiraValidacao() {
    try {//  w w  w . java2  s .c  o  m
        PdfReader pdfReader = new PdfReader(
                "/home/bpmlab/NetBeansProjects/InvioScript/src/main/java/bpmlab/invioscript/Consulta_Webqualis.pdf");
        String[] linha;
        String novaLinha = null;
        List<String> qualis = new ArrayList<>();
        int total = 0;
        int invalidos = 0;
        for (int i = 1; i <= pdfReader.getNumberOfPages(); i++) {
            linha = PdfTextExtractor.getTextFromPage(pdfReader, i).split("\n");
            for (int j = 1; j < linha.length; j++) {
                total++;
                try {
                    if (linha[j].contains("Friday 06 March 2015") || linha[j].contains("TURISMO")
                            || linha[j].contains("INTERNACIONAIS") || linha[j].contains("DEMOGRAFIA")
                            || linha[j].contains("Lado C") || linha[j].contains("y TA Journal of Food C")
                            || linha[j].contains("www.siicsalud.com C NUTRIO Atualizado")
                            || linha[j].contains("ISSN T?TULO ESTRATO ?REA DE AVALIAO STATUS")) {
                        throw new Exception();
                    }

                    if (!linha[j].contains("Atualizado")) {
                        throw new Exception();
                    }

                    int indexFinal = linha[j].indexOf("Atualizado");

                    if (linha[j].contains(" A1 ")) {
                        novaLinha = linha[j].substring(linha[j].indexOf(" A1 ") + 4, indexFinal);
                    } else if (linha[j].contains(" A2 ")) {
                        novaLinha = linha[j].substring(linha[j].indexOf(" A2 ") + 4, indexFinal);
                    } else if (linha[j].contains(" B1 ")) {
                        novaLinha = linha[j].substring(linha[j].indexOf(" B1 ") + 4, indexFinal);
                    } else if (linha[j].contains(" B2 ")) {
                        novaLinha = linha[j].substring(linha[j].indexOf(" B2 ") + 4, indexFinal);
                    } else if (linha[j].contains(" B3 ")) {
                        novaLinha = linha[j].substring(linha[j].indexOf(" B3 ") + 4, indexFinal);
                    } else if (linha[j].contains(" B4 ")) {
                        novaLinha = linha[j].substring(linha[j].indexOf(" B4 ") + 4, indexFinal);
                    } else if (linha[j].contains(" B5 ")) {
                        novaLinha = linha[j].substring(linha[j].indexOf(" B5 ") + 4, indexFinal);
                    } else if (linha[j].contains(" C ")) {
                        novaLinha = linha[j].substring(linha[j].indexOf(" C ") + 3, indexFinal);
                    } else {
                        throw new Exception();
                    }

                    if (!linha[j].substring(0, 9).matches("\\w\\w\\w\\w-\\w\\w\\w\\w")
                            || linha[j].substring(0, 12).matches("\\w\\w\\w\\w-\\w\\w\\w\\w A1")
                            || linha[j].substring(0, 12).matches("\\w\\w\\w\\w-\\w\\w\\w\\w A2")
                            || linha[j].substring(0, 12).matches("\\w\\w\\w\\w-\\w\\w\\w\\w B1")
                            || linha[j].substring(0, 12).matches("\\w\\w\\w\\w-\\w\\w\\w\\w B2")
                            || linha[j].substring(0, 12).matches("\\w\\w\\w\\w-\\w\\w\\w\\w B3")
                            || linha[j].substring(0, 12).matches("\\w\\w\\w\\w-\\w\\w\\w\\w B4")
                            || linha[j].substring(0, 12).matches("\\w\\w\\w\\w-\\w\\w\\w\\w B5")
                            || linha[j].substring(0, 12).matches("\\w\\w\\w\\w-\\w\\w\\w\\w C ")) {
                        throw new Exception();
                    }
                    if (novaLinha != null) {
                        qualis.add(linha[j]);
                    }
                    novaLinha = null;
                } catch (Exception e) {
                    StringBuilder construirLinha;
                    switch (linha[j]) {
                    case "ADMINISTRAO, CINCIAS CONT?BEIS E":
                        construirLinha = new StringBuilder(linha[j + 1]);
                        construirLinha.insert(linha[j + 1].indexOf("Atualizado") - 1,
                                " " + linha[j] + " " + linha[j + 2]);
                        qualis.add(construirLinha.toString());
                        break;
                    case "CINCIA POL?TICA E RELAES":
                        construirLinha = new StringBuilder(linha[j + 1]);
                        construirLinha.insert(linha[j + 1].indexOf("Atualizado") - 1,
                                " " + linha[j] + " " + linha[j + 2]);
                        qualis.add(construirLinha.toString());
                        break;
                    case "PLANEJAMENTO URBANO E REGIONAL /":
                        construirLinha = new StringBuilder(linha[j + 1]);
                        construirLinha.insert(linha[j + 1].indexOf("Atualizado") - 1,
                                " " + linha[j] + " " + linha[j + 2]);
                        qualis.add(construirLinha.toString());
                        break;
                    case "American Journal of Physiology. Regulatory, Integrative and Comparative Physiology":
                        construirLinha = new StringBuilder(linha[j + 1]);
                        construirLinha.insert(9, " " + linha[j]);
                        qualis.add(construirLinha.toString());
                        break;
                    case "Proceedings of the National Academy of Sciences of the United States of America":
                        construirLinha = new StringBuilder(linha[j + 1]);
                        construirLinha.insert(9, " " + linha[j] + linha[j + 2]);
                        qualis.add(construirLinha.toString());
                        break;
                    case "Revista de Clnica e Pesquisa Odontolgica (Impresso) / Journal of Dental Clinical and":
                        construirLinha = new StringBuilder(linha[j + 1]);
                        construirLinha.insert(9, " " + linha[j] + " " + linha[j + 2]);
                        qualis.add(construirLinha.toString());
                        break;
                    default:
                        invalidos++;
                        if (!(linha[j].contains("Friday 06 March") || linha[j].contains("TURISMO")
                                || linha[j].contains("(Online)") || linha[j].contains("Research")
                                || linha[j].contains("INTERNACIONAIS") || linha[j].contains("DEMOGRAFIA"))) {
                            //                                    System.out.println(linha[j]);
                        }
                        break;
                    }
                }
            }
        }
        for (String q : qualis) {
            System.out.println(q);
        }
        System.out.println("TOTAL: " + total);
        System.out.println("VALIDOS: " + qualis.size() + ";" + ((float) qualis.size() * 100 / total) + "%");
        System.out.println("INVALIDOS: " + invalidos + ";" + ((float) invalidos * 100 / total) + "%");
        System.out.println(qualis.size() + invalidos);
        return qualis;
    } catch (IOException ex) {
        return null;
    }
}

From source file:br.com.smarttaco.util.HelenaBarbosa.java

/**
 * pdf2txt/*from  w  w  w. j a  va 2 s  .  c  o  m*/
 *
 * @param pdf
 * @param paginas se for <code>null</code> realiza leitura completa.
 * @param txt
 * @throws FileNotFoundException
 * @throws IOException
 */
private static void pdf2txt(final String pdf, List<Integer> paginas, final String txt)
        throws FileNotFoundException, IOException {
    PdfReader reader = new PdfReader(pdf);
    //System.out.println(reader.getInfo().toString());
    if (paginas != null) {
        reader.selectPages(paginas);
    }
    PdfReaderContentParser parser = new PdfReaderContentParser(reader);
    PrintWriter out = new PrintWriter(txt, "UTF-8");
    TextExtractionStrategy strategy;
    for (int i = 1; i <= reader.getNumberOfPages(); i++) {
        strategy = parser.processContent(i, new SimpleTextExtractionStrategy());
        out.println(strategy.getResultantText());
    }
    out.flush();
    out.close();
    reader.close();
}

From source file:br.jus.jfpr.Divisor1.java

/**
 * Divide um arquivo inicialmente em 2, testa se o tamanho que todos
 * arquivos ficaram dentro do limite, se no ficaram apaga tudo e recomea
 * dividindo por, 3, 4, 5, etc. at que todos arquivo fiquem dentro do
 * limite/*from  w  ww  .  j  a  v  a  2 s . co m*/
 *
 * @param arquivoEntrada O arquivo que ser dividido
 * @param arquivoSaida O nome do arquivo que ser criado
 * @param tamArqSel Tamanho selecionado que devero ficar os arquivos
 * divididos
 * @return String informativa do resultado
 */
public static String dividePDF(File arquivoEntrada, String arquivoSaida, int tamArqSel) {

    int daPagina = 1;
    int paraPagina;
    int tamInicial;
    // FileOutputStream arquivoSair = null;
    String MensagemErro = "ok";
    int fatorDivisao = 2; //Incialmente dividir em 2
    arquivoSaida = arquivoSaida.substring(0, arquivoSaida.indexOf('.')) + "-divido"; //PAra definir o nome do arquivo de sada

    try {
        PdfReader PdfDeEntrada = new PdfReader(arquivoEntrada.getAbsolutePath()); //Para ler o arquivo de entrada
        final int totalPaginas = PdfDeEntrada.getNumberOfPages(); //Verifica o total de pginas
        tamInicial = paraPagina = (totalPaginas / fatorDivisao) + 1; //Define o tamanho (em pginas) do 1 arquivo e em quanto dever incrementar(sero o mesmo)
        PdfImportedPage pagina;

        int i = 1; //Contador simples
        while (i <= fatorDivisao && totalPaginas > fatorDivisao) { //Enquanto no fizer todas as divises
            Document documento = new Document();
            FileOutputStream arquivoSair = new FileOutputStream(arquivoSaida + "-" + i + ".pdf"); //Cria o arquivo, vazio
            PdfWriter writer = PdfWriter.getInstance(documento, arquivoSair); //
            documento.open();
            PdfContentByte PContentByte = writer.getDirectContent();
            while (daPagina <= paraPagina) {
                documento.newPage(); //Aloca uma nova pginA
                pagina = writer.getImportedPage(PdfDeEntrada, daPagina); //Seleciona uma pgina especfica. indicada pelo contador
                PContentByte.addTemplate(pagina, 0, 0); //Adiciona a pagina ao contedo
                daPagina++; //Contador simples
            }
            arquivoSair.flush();
            documento.close(); //Grava o arquivo de sada
            arquivoSair.close();
            File arquivoDest = new File(arquivoSaida + "-" + i + ".pdf");
            if (arquivoDest.length() > tamArqSel) { // O tamanho do arquivo de destino ficou maior do que deveria
                delete(arquivoSaida, i); //Chamar a funo para deletar todos arquivos at a chave i
                daPagina = i = 1; //Para recomear tudo de novo
                fatorDivisao++; //Se um arquivo ficou maior, ento deve aumentar a diviso
                tamInicial = paraPagina = (totalPaginas / fatorDivisao) + 1; //Para recomear tudo de novo
            } else {
                i++; //Continua a divisao
                if (i == fatorDivisao) { //Ou seja, chegou a ultima divisao
                    paraPagina = totalPaginas; //O ultimo arquivo conter mais pginas
                } else {
                    paraPagina += tamInicial; //Cada arquivo ter o mesmo nmero de pginas//Incrementa sempre o tamanho inicial
                }
            }
        }
    } catch (IOException | DocumentException e) {
        System.err.println(e.getMessage());
        MensagemErro = e.getMessage();
    }
    return MensagemErro;
}

From source file:ch.kostceco.tools.kostsimy.comparison.modulepdfa.impl.PdfExtractJpegModuleImpl.java

License:Open Source License

/** Parses a PDF and extracts all the images.
 * /*  w  w  w  .  j  a v  a  2  s  .  c  o m*/
 * @param src
 *          the source PDF
 * @param dest
 *          the resulting Image */
public void extractImages(String srcPdf, String destImage) throws IOException, DocumentException {
    jpegCounter = 0;
    ccittCounter = 0;
    jp2Counter = 0;
    jbig2Counter = 0;
    try {
        PdfReader reader = new PdfReader(srcPdf);
        PdfReaderContentParser parser = new PdfReaderContentParser(reader);
        MyImageRenderListener listener = new MyImageRenderListener(destImage);
        for (int i = 1; i <= reader.getNumberOfPages(); i++) {
            parser.processContent(i, listener);
        }
        reader.close();
    } catch (IOException e) {
        getMessageService().logError(getTextResourceService().getText(MESSAGE_XML_MODUL_PDF_EXTRACT)
                + getTextResourceService().getText(ERROR_XML_UNKNOWN, e.getMessage()));
    }
}

From source file:ch.kostceco.tools.kostval.validation.modulepdfa.impl.ValidationJimageValidationModuleImpl.java

License:Open Source License

/** Parses a PDF and extracts all the images.
 * /*from  ww  w  .  ja  v  a 2  s. co m*/
 * @param src
 *          the source PDF
 * @param dest
 *          the resulting PDF */
public void extractImages(String srcPdf, String destImage) throws IOException, DocumentException {
    PdfReader reader = new PdfReader(srcPdf);
    PdfReaderContentParser parser = new PdfReaderContentParser(reader);
    MyImageRenderListener listener = new MyImageRenderListener(destImage);
    for (int i = 1; i <= reader.getNumberOfPages(); i++) {
        parser.processContent(i, listener);
    }
    reader.close();
}

From source file:com.betel.flowers.pdf.util.RemoveBlankPageFromPDF.java

public static void removeBlankPdfPages(String source, String destination)
        throws IOException, DocumentException {
    PdfReader r = null;
    RandomAccessSourceFactory rasf = null;
    RandomAccessFileOrArray raf = null;//from  ww  w.  j a  v a 2s. c om
    Document document = null;
    PdfCopy writer = null;

    try {
        r = new PdfReader(source);
        // deprecated
        //    RandomAccessFileOrArray raf
        //           = new RandomAccessFileOrArray(pdfSourceFile);
        // itext 5.4.1
        rasf = new RandomAccessSourceFactory();
        raf = new RandomAccessFileOrArray(rasf.createBestSource(source));
        document = new Document(r.getPageSizeWithRotation(1));
        writer = new PdfCopy(document, new FileOutputStream(destination));
        document.open();
        PdfImportedPage page = null;

        for (int i = 1; i <= r.getNumberOfPages(); i++) {
            // first check, examine the resource dictionary for /Font or
            // /XObject keys.  If either are present -> not blank.
            PdfDictionary pageDict = r.getPageN(i);
            PdfDictionary resDict = (PdfDictionary) pageDict.get(PdfName.RESOURCES);
            boolean noFontsOrImages = true;
            if (resDict != null) {
                noFontsOrImages = resDict.get(PdfName.FONT) == null && resDict.get(PdfName.XOBJECT) == null;
            }

            if (!noFontsOrImages) {
                byte bContent[] = r.getPageContent(i, raf);
                ByteArrayOutputStream bs = new ByteArrayOutputStream();
                bs.write(bContent);

                if (bs.size() > BLANK_THRESHOLD) {
                    page = writer.getImportedPage(r, i);
                    writer.addPage(page);
                }
            }
        }
    } finally {
        if (document != null) {
            document.close();
        }
        if (writer != null) {
            writer.close();
        }
        if (raf != null) {
            raf.close();
        }
        if (r != null) {
            r.close();
        }
    }
}

From source file:com.cib.statementstamper.windows.StatementStamperMainWindow.java

License:Open Source License

protected ByteArrayOutputStream doStamper(ByteArrayOutputStream baos) throws IOException, DocumentException {

    map.clear();//  w ww  .  ja  v a2  s  .  c o m
    ByteArrayOutputStream baosFinal = new ByteArrayOutputStream();
    PdfReader reader = new PdfReader(baos.toByteArray());

    PdfContentStreamProcessor processor = new PdfContentStreamProcessor(this);
    for (int i = 1; i <= reader.getNumberOfPages(); i++) {
        actualPage = i;
        PdfDictionary pageDic = reader.getPageN(i);
        PdfDictionary resourcesDic = pageDic.getAsDict(PdfName.RESOURCES);
        processor.processContent(ContentByteUtils.getContentBytesForPage(reader, i), resourcesDic);
    }

    Document newDocument = new Document(PageSize.A4);
    PdfWriter writer = PdfWriter.getInstance(newDocument, baosFinal);

    newDocument.open();
    PdfContentByte canvas = writer.getDirectContent();
    //      Font myFont = FontFactory.getFont(FontFactory.COURIER, 7, Font.BOLD);
    Iterator<Entry<Integer, Map<Float, StringBuffer>>> it = map.entrySet().iterator();

    while (it.hasNext()) {
        Map.Entry<Integer, Map<Float, StringBuffer>> pairs = (Map.Entry<Integer, Map<Float, StringBuffer>>) it
                .next();

        Iterator<Entry<Float, StringBuffer>> iter = pairs.getValue().entrySet().iterator();
        while (iter.hasNext()) {
            Map.Entry<Float, StringBuffer> actualEntry = iter.next();
            canvas.beginText();
            canvas.setFontAndSize(myFontBase, 7);
            canvas.showTextAligned(Element.ALIGN_LEFT, actualEntry.getValue().toString(), 25,
                    actualEntry.getKey() + 60, 0);
            canvas.endText();
        }
        newDocument.newPage();
    }
    newDocument.close();

    reader = new PdfReader(baosFinal.toByteArray());

    PdfReaderContentParser parser = new PdfReaderContentParser(reader);
    TextMarginFinder finder;
    for (int i = 1; i <= reader.getNumberOfPages(); i++) {
        finder = parser.processContent(i, new TextMarginFinder());
        if (finder.getLly() <= 68) {
            getWindow().showNotification("Hiba", "A(z) " + i + ".ik oldalon tl sok a szveg !!!",
                    Notification.TYPE_ERROR_MESSAGE);
            return null;
        }
    }

    reader = new PdfReader(baosFinal.toByteArray());
    PdfStamper stamper = new PdfStamper(reader, baosFinal);
    int n = reader.getNumberOfPages();
    for (int i = 1; i <= n; i++) {
        PdfContentByte overContent = stamper.getOverContent(i);
        overContent.addImage(logo, 131, 0, 0, 32, 44, 775);
        getFooterTable(i, n).writeSelectedRows(0, -1, 27, 68, stamper.getOverContent(i)); // ez a jo
        getIspLogoTable(i, n).writeSelectedRows(0, -1, 425, 45, stamper.getOverContent(i));
    }
    stamper.close();
    reader.close();
    return baosFinal;
}

From source file:com.cloudhub.util.PDFToText.java

License:Apache License

/**
 * Parses a PDF to a plain text file.//from   ww w.j  a  v a  2s .c o  m
 *
 * @param source the original PDF
 * @param destination the resulting text
 * @throws IOException
 */
public static void parsePdf(String source, String destination) throws IOException {
    PdfReader reader = new PdfReader(source);
    PdfReaderContentParser parser = new PdfReaderContentParser(reader);
    PrintWriter out = new PrintWriter(new FileOutputStream(destination));
    TextExtractionStrategy strategy;
    for (int i = 1; i <= reader.getNumberOfPages(); i++) {
        strategy = parser.processContent(i, new SimpleTextExtractionStrategy());
        out.println(strategy.getResultantText());
    }
    out.flush();
    out.close();
}

From source file:com.cyberninjas.invoice.pdf.PdfInvoiceEditor.java

License:Open Source License

/**
 * Write text relative to the matching reference text.
 *
 * <p>/*  w w  w  .  j a  va 2 s .  c o  m*/
 * Note: Due to the way PDF stores text, found blocks of text may contain additional text beyond the reference text.
 * This can cause blocks to be larger than expected requiring a larger or smaller offset to be set to align
 * properly.</p>
 *
 * @param text the text to write.
 * @param referenceText the reference text to write relative to.
 * @param offset the offset to write relative to the reference text.
 * @param align the alignment.
 * @param findAll indicates if text should be written at every occurrence or only the first.
 * @throws IOException on I/O error.
 */
public final void writeTextAtOffset(String text, String referenceText, float offset, final Alignment align,
        boolean findAll) throws IOException {
    PdfReader reader = getReader();

    PdfReaderContentParser parser = new PdfReaderContentParser(reader);

    TextChunkExtractionStrategy strategy;

    for (int pageNum = 1; pageNum <= reader.getNumberOfPages(); pageNum++) {
        strategy = parser.processContent(pageNum, new TextChunkExtractionStrategy());

        if (findAll) {
            for (TextChunk textChunk : strategy.matchAllText(referenceText)) {
                this.writeText(pageNum, text, align, textChunk.getEndLocation().get(Vector.I1) + offset,
                        textChunk.getEndLocation().get(Vector.I2));
            }
        } else {
            TextChunk textChunk = strategy.matchText(referenceText);

            if (textChunk != null) {
                this.writeText(pageNum, text, align, textChunk.getEndLocation().get(Vector.I1) + offset,
                        textChunk.getEndLocation().get(Vector.I2));
            }
        }
    }
}