commentextractor.CommentExtractorApp.java Source code

Introduction

Here is the source code for commentextractor.CommentExtractorApp.java
Source

/*
 *
 * Extracts comments from PDF files using Itext PDF library.
 * This code extracts the comments from the PDF file using
 * the itext and print them on the console.
 *
 *
 * Copyright (c) 2011 Hossein Noorikhah
 * Copyright (c) 2009 Ali Vajahat
 *
 * Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
 * See: <http://www.gnu.org/licenses/gpl-3.0-standalone.html>
 *
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
    
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 *
 */
package commentextractor;

import com.itextpdf.text.pdf.PdfArray;
import com.itextpdf.text.pdf.PdfDictionary;
import com.itextpdf.text.pdf.PdfName;
import com.itextpdf.text.pdf.PdfObject;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfString;
import java.io.FileInputStream;
import java.io.UnsupportedEncodingException;
import java.util.ListIterator;
import java.io.PrintStream;
import java.util.Properties;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.jdesktop.application.Application;
import org.jdesktop.application.SingleFrameApplication;

/**
 * The main class of the application.
 */
public class CommentExtractorApp extends SingleFrameApplication {

    /**
     * At startup create and show the main frame of the application.
     */
    @Override
    protected void startup() {
        show(new CommentExtractorView(this));
    }

    /**
     * This method is to initialize the specified window by injecting resources.
     * Windows shown in our application come fully initialized from the GUI
     * builder, so this additional configuration is not needed.
     */
    @Override
    protected void configureWindow(java.awt.Window root) {
    }

    /**
     * A convenient static getter for the application instance.
     * @return the instance of CommentExtractorApp
     */
    public static CommentExtractorApp getApplication() {
        return Application.getInstance(CommentExtractorApp.class);
    }

    /**
     * Main method launching the application.
     */
    public static void main(String[] args) {
        try {
            int first = 1, last = -1;

            if (args.length == 0) {
                launch(CommentExtractorApp.class, args);
                return;
            } else if (args.length == 1) {
                printUsage();
                return;
            } else if (args.length == 2) {
                last = -1;
            } else if (args.length == 3) {
                first = Integer.parseInt(args[2]);
                last = -1;
            } else if (args.length == 4) {
                first = Integer.parseInt(args[2]);
                last = Integer.parseInt(args[3]);
            } else if (args.length > 3) {
                printUsage();
                return;
            }

            PrintStream out = null;
            try {
                out = new PrintStream(System.out, true, "UTF-8");
            } catch (UnsupportedEncodingException ex) {
                Logger.getLogger(CommentExtractorApp.class.getName()).log(Level.SEVERE, null, ex);
            }
            out.println(extractComments(args[1], first, last));

        } catch (NumberFormatException e) {
            printUsage();
        }

    }

    static void printUsage() {
        System.err.println(
                "Usage: java -cp .;itextpdf-5.1.2.jar" + " CommentsTest filename.pdf [firstPage] [lastPage]");

    }

    static String extractComments(String filename, int first, int last) {
        StringBuffer output = null;
        try {
            PdfReader reader = new PdfReader(filename);

            if (last >= reader.getNumberOfPages() || (last == -1)) {
                last = reader.getNumberOfPages();
            }

            output = new StringBuffer(1024);

            for (int i = first; i <= last; i++) {

                PdfDictionary page = reader.getPageN(i);
                PdfArray annotsArray = null;

                if (page.getAsArray(PdfName.ANNOTS) == null) {
                    continue;
                }

                annotsArray = page.getAsArray(PdfName.ANNOTS);
                for (ListIterator<PdfObject> iter = annotsArray.listIterator(); iter.hasNext();) {
                    PdfDictionary annot = (PdfDictionary) PdfReader.getPdfObject(iter.next());
                    PdfString content = (PdfString) PdfReader.getPdfObject(annot.get(PdfName.CONTENTS));
                    if (content != null) {
                        output.append("----------\n");
                        output.append("Page " + i);
                        output.append("\n");
                        output.append(content.toUnicodeString().replaceAll("\r", "\r\n"));
                        output.append("\n");
                    }
                }
            }
        } catch (Exception e) {
            Logger.getLogger(CommentExtractorApp.class.getName()).log(Level.SEVERE, null, e);
        }
        return new String(output);
    }
}