de.mat.utils.pdftools.PdfMerge.java Source code

Introduction

Here is the source code for de.mat.utils.pdftools.PdfMerge.java
Source

/**
 * <h4>FeatureDomain:</h4>
 *     Publishing
 *
 * <h4>FeatureDescription:</h4>
 *     software for publishing<br>
 *     pdftools
 * 
 * @author Michael Schreiner <michael.schreiner@your-it-fellow.de>
 * @category collaboration
 * @copyright Copyright (c) 2011-2014, Michael Schreiner
 * @license http://mozilla.org/MPL/2.0/ Mozilla Public License 2.0
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */
package de.mat.utils.pdftools;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.log4j.Logger;

import com.itextpdf.text.Document;
import com.itextpdf.text.pdf.PdfCopy;
import com.itextpdf.text.pdf.PdfReader;

/**
 * <h4>FeatureDomain:</h4>
 *     Publishing
 * <h4>FeatureDescription:</h4>
 *     merge pdf-files
 * 
 * @package de.mat.utils.pdftools
 * @author Michael Schreiner <michael.schreiner@your-it-fellow.de>
 * @category Publishing
 * @copyright Copyright (c) 2011-2014, Michael Schreiner
 * @license http://mozilla.org/MPL/2.0/ Mozilla Public License 2.0
 */
public class PdfMerge extends CmdLineJob {

    /**
     * Bookmark with sourcefile-data fro merge and TOC
     * @param <String> - SRC=filename / NAME=label / PAGE=pagenum / PAGES=pagecount /
     *                   TYPE=for styling [ue,ue2,master,img]
     * @param <Object> - all
     */
    public static class Bookmark<String, Object> extends HashMap<String, Object> {
    }

    public PdfMerge(String[] args) {
        super(args);
    }

    // Logger
    private static final Logger LOGGER = Logger.getLogger(PdfMerge.class);

    // define Patterns
    public static final String CONST_PATTERN = "^(.*)\\t(.*)\\t(.*)\\t(.*)$";
    private static final Pattern CONST_BOOKMARK = Pattern.compile(CONST_PATTERN);

    @Override
    protected boolean validateCmdLine(CommandLine cmdLine) throws Throwable {
        if (cmdLine.getArgList().size() < 1) {
            return false;
        }
        return true;
    }

    @Override
    protected void printUsage() throws Throwable {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getJobName() + " DestPdfFile [SrcPdfFile1]... [SrcPdfFileN]",
                "Desc: merge all SrcPdfFile1-N or the files from Option filelist to DestPdfFile."
                        + " If option trim is set -> trim all empty pages. Asresult a htmlTOC will print.",
                this.availiableCmdLineOptions, "");
    }

    @Override
    protected Options genAvailiableCmdLineOptions() throws Throwable {
        Options availiableCmdLineOptions = new Options();

        // Hilfe-Option
        Option helpOption = new Option("h", "help", false, "usage");
        helpOption.setRequired(false);
        availiableCmdLineOptions.addOption(helpOption);

        // Dont Parse List
        Option file = new Option("f", "filelist", true,
                "Parse list of pdffiles from tabseaparated file with structure (PDFFILE " + "LABEL TYPE PAGE)"
                        + "\nTYPE and PAGE can be empty"
                        + "\nTYPE is used to style the TOC 'bookmark_line_TYPE' - ue, ue2, master, img are predefined");
        file.setRequired(false);
        availiableCmdLineOptions.addOption(file);

        // trip empty pages
        Option flgTrim = new Option("t", "trim", false, "Trim empty pages");
        flgTrim.setRequired(false);
        availiableCmdLineOptions.addOption(flgTrim);

        return availiableCmdLineOptions;
    }

    @Override
    public void doJob() throws Throwable {
        String fileNew = this.cmdLine.getArgs()[0];

        String listFile = this.cmdLine.getOptionValue("f", null);
        List<Bookmark> lstBookMarks = new ArrayList<Bookmark>();
        if (listFile != null) {
            // read bookmarks from file
            lstBookMarks = parseBookMarksFromFile(listFile);
        } else {
            // all args are sourcefiles
            for (int zaehler = 1; zaehler < this.cmdLine.getArgs().length; zaehler++) {
                // setup Bookmark
                Bookmark mpBookMark = new Bookmark();
                mpBookMark.put("SRC", this.cmdLine.getArgs()[zaehler]);
                mpBookMark.put("NAME", this.cmdLine.getArgs()[zaehler]);
                lstBookMarks.add(mpBookMark);
            }
        }

        // merge pdfs
        mergePdfs(lstBookMarks, fileNew, this.cmdLine.hasOption("t"));

        // print html to STDOUT
        System.out.println(showBookMarksAsHtml(lstBookMarks));
    }

    /**
     * @param args the command line arguments
     */
    public static void main(String[] args) {
        PdfMerge me = new PdfMerge(args);
        me.startJobProcessing();
    }

    /**
     * <h4>FeatureDomain:</h4>
     *     PublishingTools
     * <h4>FeatureDescription:</h4>
     *     merge pdfs from lstBookMarks to fileNew and trim empty pages if flgTrim 
     *     is set
     * <h4>FeatureResult:</h4>
     *   <ul>
     *     <li>create PDF - fileNew
     *     <li>updates lstBookMarks - updates PAGE (firstPageNum) and 
     *                                PAGES (countPage= per Bookmark
     *   </ul> 
     * <h4>FeatureKeywords:</h4>
     *     PDF Publishing
     * @param lstBookMarks - list of Bookmark (files to merge)
     * @param fileNew - destination PDF filename
     * @param flgTrim - trim empty pages
     * @throws Exception
     */
    public static void mergePdfs(List<Bookmark> lstBookMarks, String fileNew, boolean flgTrim) throws Exception {
        // FirstFile
        Map curBookMark = (Map) lstBookMarks.get(0);
        String curFileName = (String) curBookMark.get("SRC");

        // Neues Dokument anlegen aus 1. Quelldokument anlegen
        PdfReader reader = new PdfReader(curFileName);
        Document documentNew = new Document(reader.getPageSizeWithRotation(1));
        reader.close();
        PdfCopy writerNew = new PdfCopy(documentNew, new FileOutputStream(fileNew));
        documentNew.open();

        int siteNr = 1;
        for (Iterator iter = lstBookMarks.iterator(); iter.hasNext();) {
            curBookMark = (Map) iter.next();
            curFileName = (String) curBookMark.get("SRC");

            if (LOGGER.isInfoEnabled())
                LOGGER.info("add File:" + curFileName);

            // copy Page
            reader = new PdfReader(curFileName);
            int newPages = PdfExtractEmptyPages.addTrimmedPages(curFileName, reader, writerNew, (PdfCopy) null,
                    flgTrim);
            reader.close();

            // update BookMark
            curBookMark.put("PAGE", new Integer(siteNr));
            curBookMark.put("PAGES", new Integer(newPages));
            siteNr += newPages;
        }
        documentNew.close();
        writerNew.close();
    }

    /**
     * <h4>FeatureDomain:</h4>
     *     PublishingTools
     * <h4>FeatureDescription:</h4>
     *     parse bookmarks from file
     * <h4>FeatureResult:</h4>
     *   <ul>
     *     <li>returnValue lstBookMarks - list of Bookmarks (Filename, Label...)
     *   </ul> 
     * <h4>FeatureKeywords:</h4>
     *     PDF Publishing
     * @param listFile - file with the Bookmarks
     * @return - list of Bookmarks
     * @throws Throwable
     */
    public static List<Bookmark> parseBookMarksFromFile(String listFile) throws Throwable {
        // BookMarkliste aus Datei lesen
        List<Bookmark> lstBookMarks = new ArrayList<Bookmark>();
        String fileContent = readFromFile(listFile);
        String[] lines = fileContent.split("\n");
        if (lines != null) {
            // alle Zeilen durchlaufen
            for (int zaehler = 0; zaehler < lines.length; zaehler++) {
                // 
                String line = lines[zaehler];
                line = line.replace("\r", "");
                line = line.replace("\n", "");

                List<MatchResult> matches = findMatches(CONST_BOOKMARK, line);
                if (matches == null || matches.size() <= 0) {
                    continue;
                }

                //iterate matches
                for (MatchResult match : matches) {

                    // if found: add bookmark to list
                    if (match.groupCount() == 4) {
                        Bookmark mpBookMark = new Bookmark();
                        String fileName = match.group(1);
                        fileName = fileName.replaceAll("\\+", "_");
                        fileName = fileName.replaceAll(">", "_");
                        mpBookMark.put("SRC", fileName);
                        mpBookMark.put("NAME", match.group(2));
                        mpBookMark.put("TYPE", match.group(3));
                        mpBookMark.put("PAGE", match.group(4));
                        lstBookMarks.add(mpBookMark);
                    }
                }
            }
        }
        return lstBookMarks;
    }

    /**
     * <h4>FeatureDomain:</h4>
     *     PublishingTools
     * <h4>FeatureDescription:</h4>
     *     create html-TOC from Bookmark-list
     * <h4>FeatureResult:</h4>
     *   <ul>
     *     <li>returnValue htmlnippet - html-snippet for TOC 
     *   </ul> 
     * <h4>FeatureKeywords:</h4>
     *     PDF Publishing
     * @param lstBookMarks - list of Bookmarks (Filename, Label...)
     * @return - html-snippet
     * @throws Throwable
     */
    public static String showBookMarksAsHtml(List<Bookmark> lstBookMarks) throws Throwable {
        String res = "";

        Object curFileName = null;
        Object curName = null;
        Object curPage = null;
        Object curType = null;

        // iterate bookmarks
        for (Bookmark curBookMark : lstBookMarks) {
            // extract data
            curFileName = curBookMark.get("SRC");
            curName = curBookMark.get("NAME");
            curPage = curBookMark.get("PAGE");
            curType = curBookMark.get("TYPE");

            // create html
            String lineStyle = "bookmark_line";
            if (curName.toString().startsWith("Region:")) {
                lineStyle += " bookmark_line_region";
            }
            if (curType != null && curType.toString().equalsIgnoreCase("ue")) {
                lineStyle += " bookmark_line_ue";
            } else if (curType != null && curType.toString().equalsIgnoreCase("ue2")) {
                lineStyle += " bookmark_line_ue2";
            } else if (curType != null && curType.toString().equalsIgnoreCase("master")) {
                lineStyle += " bookmark_line_region_master";
            } else if (curType != null && curType.toString().equalsIgnoreCase("img")) {
                lineStyle += " bookmark_line_img";
            } else if (curType != null) {
                lineStyle += " bookmark_line_" + curType;
            }
            res += "<div class='" + lineStyle + "'>" + "<div class='bookmark_file'>" + curFileName + "</div>"
                    + "<div class='bookmark_name'>" + curName + "</div>" + "<div class='bookmark_page'>" + curPage
                    + "</div>" + "</div>\n";
        }

        return res;
    }

    /**
     * <h4>FeatureDomain:</h4>
     *     PublishingTools
     * <h4>FeatureDescription:</h4>
     *     find macthes for pattern in haystack
     * <h4>FeatureResult:</h4>
     *   <ul>
     *     <li>returnValue list - list of Matches
     *   </ul> 
     * <h4>FeatureKeywords:</h4>
     *     PatternMatching
     * @param pattern - pattern to find
     * @param haystack - haystack
     * @return - list of Matches
     */
    public static List<MatchResult> findMatches(Pattern pattern, CharSequence haystack) {
        List<MatchResult> results = null;

        for (Matcher m = pattern.matcher(haystack); m.find();) {
            if (results == null)
                results = new ArrayList<MatchResult>();
            results.add(m.toMatchResult());
        }

        return results;
    }

    /**
     * <h4>FeatureDomain:</h4>
     *     PublishingTools
     * <h4>FeatureDescription:</h4>
     *     reads the file
     * <h4>FeatureResult:</h4>
     *   <ul>
     *     <li>returnValue String - filecontent
     *   </ul> 
     * <h4>FeatureKeywords:</h4>
     *     Filehandling
     * @param fileName - file to read
     * @return - filecontent
     * @throws Exception
     */
    public static String readFromFile(String fileName) throws Exception {
        // check parameter
        if (fileName == null || fileName.trim().length() <= 0) {
            throw new IllegalArgumentException("FileName must not be empty: '" + fileName + "'");
        }

        // read file
        File file = new File(fileName);
        FileReader fileReader = new FileReader(file);
        BufferedReader br = new BufferedReader(fileReader);
        String fileContent = "";
        String line = null;
        while ((line = br.readLine()) != null) {
            fileContent += line + "\n";
        }
        br.close();

        return fileContent;
    }

}