net.japt.transpdf.TransPDF.java Source code

Java tutorial

Introduction

Here is the source code for net.japt.transpdf.TransPDF.java

Source

package net.japt.transpdf;

import java.io.FileWriter;
import java.io.IOException;

import com.gtranslate.Language;
import com.gtranslate.Translator;
import com.itextpdf.text.pdf.PdfReader;

/**
 * TransPDF 
 *
 * TransPDF Translate PDF Documents
 * 
 * Copyright 2014 Jorge Alberto Ponce Turrubiates
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * 
 * @category   TransPDF
 * @package    net.japt.transpdf
 * @copyright  Copyright 2014 JAPT
 * @license    http://www.apache.org/licenses/LICENSE-2.0
 * @version    1.0.0, 2014-12-05
 * @author     <a href="mailto:the.yorch@gmail.com">Jorge Alberto Ponce Turrubiates</a>
 */
public class TransPDF {
    /**
     * Translate English PDF to Spanish Text
     * 
     * @param pdfFile String Path of the PDF Document
     * @param textFile String Path of the Translated Text Document
     */
    public static void translate(String pdfFile, String textFile) {
        try {
            PdfReader pdfReader = new PdfReader(pdfFile);
            FileWriter transTxtFile = new FileWriter(textFile, true);

            int numPages = pdfReader.getNumberOfPages();

            Translator translate = Translator.getInstance();

            for (int i = 1; i <= numPages; i++) {
                // Pages
                String pageText = com.itextpdf.text.pdf.parser.PdfTextExtractor.getTextFromPage(pdfReader, i);

                // Paragraph
                String[] lines = pageText.split("\n");

                if (lines.length == 0) {
                    lines = new String[1];
                    lines[0] = pageText;
                }

                for (int nl = 0; nl < lines.length; nl++) {
                    String transLine = "";

                    // Points
                    String[] points = lines[nl].split("\\.");

                    if (points.length == 0) {
                        points = new String[1];
                        points[0] = lines[nl];
                    }

                    for (int np = 0; np < points.length; np++) {
                        // Commas
                        String[] commas = points[np].split(",");

                        if (commas.length == 0) {
                            commas = new String[1];
                            commas[0] = points[np];
                        }

                        for (int nc = 0; nc < commas.length; nc++) {
                            if (commas[nc].length() > 0) {
                                String translatedText = translate.translate(commas[nc], Language.ENGLISH,
                                        Language.SPANISH);
                                transLine = transLine + " " + translatedText;
                            }
                        }
                    }

                    transTxtFile.write(transLine + "\n");
                }
            }

            transTxtFile.flush();
            transTxtFile.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}