coviam.pdf.PdfParser.java Source code

Java tutorial

Introduction

Here is the source code for coviam.pdf.PdfParser.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package coviam.pdf;

import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.text.pdf.parser.PdfReaderContentParser;
import com.itextpdf.text.pdf.parser.SimpleTextExtractionStrategy;
import com.itextpdf.text.pdf.parser.TextExtractionStrategy;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.StringTokenizer;

/**
 *
 * @author amit
 */
public class PdfParser {

    public void getText() {

        String pdf = "/home/amit/NetBeansProjects/ResumeParser/data/resumes/ejd1.pdf";
        String text = "/home/amit/NetBeansProjects/ResumeParser/data/resumes/edj1.txt";
        StringBuffer textBuffer = new StringBuffer();
        String resultText = "";
        PdfReader reader;
        try {
            reader = new PdfReader(pdf);
            PdfReaderContentParser contentParser = new PdfReaderContentParser(reader);
            PrintWriter printWriter = new PrintWriter(new FileOutputStream(text));
            TextExtractionStrategy strategy;
            for (int i = 1; i <= reader.getNumberOfPages(); i++) {
                strategy = contentParser.processContent(i, new SimpleTextExtractionStrategy());
                textBuffer.append(strategy.getResultantText());
            }
            resultText = textBuffer.toString();
            resultText = resultText.replaceAll("-\n", "");
            System.out.println("-->" + resultText);

            StringTokenizer stringTokenizer = new StringTokenizer(resultText, "\n");
            PrintWriter lineWriter = new PrintWriter(
                    new FileOutputStream("/home/amit/NetBeansProjects/ResumeParser/data/resumes/edj1.txt"));
            while (stringTokenizer.hasMoreTokens()) {
                String curToken = stringTokenizer.nextToken();
                lineWriter.println("line-->" + curToken);
            }
            lineWriter.flush();
            lineWriter.close();
            System.out.flush();
            System.out.close();
        } catch (IOException ioe) {

        }

    }

}