it.digitalhumanities.dhcpublisher.DHCPublisher.java Source code

Java tutorial

Introduction

Here is the source code for it.digitalhumanities.dhcpublisher.DHCPublisher.java

Source

/*
 * Copyright (c) 2015 Marco Petris
 * License: see LICENSE file
 */
package it.digitalhumanities.dhcpublisher;

import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

import org.apache.commons.io.IOUtils;

import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.tool.xml.Pipeline;
import com.itextpdf.tool.xml.XMLWorker;
import com.itextpdf.tool.xml.XMLWorkerHelper;
import com.itextpdf.tool.xml.html.Tags;
import com.itextpdf.tool.xml.parser.XMLParser;
import com.itextpdf.tool.xml.pipeline.css.CSSResolver;
import com.itextpdf.tool.xml.pipeline.css.CssResolverPipeline;
import com.itextpdf.tool.xml.pipeline.end.PdfWriterPipeline;
import com.itextpdf.tool.xml.pipeline.html.AbstractImageProvider;
import com.itextpdf.tool.xml.pipeline.html.HtmlPipeline;
import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext;

/**
 * A publishing tool for DHC file collections.
 * 
 * 
 * @author marco.petris@web.de
 *
 */
public class DHCPublisher {

    public DHCPublisher() {
    }

    private void convert(String unzippedDirName) {
        File unzippedDir = new File(unzippedDirName);
        File[] subDirs = unzippedDir.listFiles(new FileFilter() {

            @Override
            public boolean accept(File pathname) {
                return pathname.isDirectory();
            }
        });

        for (File subDir : subDirs) {
            System.out.println("Processing directory " + subDir + "...");
            try {
                convertFile(subDir);
                System.out.println("Conversion in " + subDir + " done.");
            } catch (Exception e) {
                e.printStackTrace();
                System.out.println("Could not convert HTML in " + subDir);
            }
        }
    }

    private void convertFile(final File subDir) throws DocumentException, IOException {

        File[] htmlFiles = subDir.listFiles(new FilenameFilter() {

            @Override
            public boolean accept(File dir, String name) {
                return name.endsWith(".html");
            }
        });

        if (htmlFiles.length > 0) {
            File htmlFile = htmlFiles[0];
            String targetName = htmlFile.getName().substring(0, htmlFile.getName().length() - 4) + "pdf";

            Document document = new Document();
            File targetFile = new File(subDir, targetName);
            if (targetFile.exists()) {
                targetFile.delete();
            }

            try (FileOutputStream fos = new FileOutputStream(targetFile)) {
                PdfWriter writer = PdfWriter.getInstance(document, fos);
                writer.getAcroForm().setNeedAppearances(true);
                document.open();

                HtmlPipelineContext htmlContext = new HtmlPipelineContext(null);
                htmlContext.setTagFactory(Tags.getHtmlTagProcessorFactory());
                htmlContext.setImageProvider(new AbstractImageProvider() {
                    public String getImageRootPath() {
                        return subDir.getAbsolutePath();
                    }
                });

                CSSResolver cssResolver = XMLWorkerHelper.getInstance().getDefaultCssResolver(true);

                Pipeline<?> pipeline = new CssResolverPipeline(cssResolver,
                        new HtmlPipeline(htmlContext, new PdfWriterPipeline(document, writer)));

                XMLWorker worker = new XMLWorker(pipeline, true);
                XMLParser p = new XMLParser(worker);

                try (FileInputStream fis = new FileInputStream(htmlFile)) {
                    p.parse(fis);
                } finally {
                    document.close();
                }
            }
        } else {
            throw new IllegalArgumentException(subDir + " does not contain HTML files!");
        }
    }

    private void unzip(String dhcDirName, String targetDirName) throws IOException {
        File dhcDir = new File(dhcDirName);
        if (!dhcDir.exists()) {
            throw new IllegalArgumentException(dhcDirName + " does not exist!");
        }

        File targetDir = new File(targetDirName);
        if (!targetDir.exists()) {
            targetDir.mkdirs();
        }

        File[] dhcFiles = dhcDir.listFiles(new FileFilter() {

            @Override
            public boolean accept(File pathname) {
                return pathname.isFile();
            }
        });
        int counter = 0;
        for (File file : dhcFiles) {
            counter++;
            System.out.println("Unzipping " + counter + ". file " + file + "...");
            try {
                unzipFile(counter, file, targetDir);
                System.out.println("Unzip for " + counter + ". file " + file + " done.");
            } catch (Exception e) {
                e.printStackTrace();
                System.out.println("unable to unzip " + file);
            }
        }
    }

    private void unzipFile(int counter, File file, File targetDir) throws IOException {

        String subDirName = counter + "_" + file.getName().substring(0, file.getName().length() - 4);
        if (subDirName.length() > 60) {
            subDirName = subDirName.substring(0, 60);
        }
        ;

        File subDir = new File(targetDir, subDirName);
        if (!subDir.exists()) {
            subDir.mkdirs();
        }

        try (ZipInputStream zis = new ZipInputStream(new FileInputStream(file))) {

            ZipEntry ze = zis.getNextEntry();

            while (ze != null) {
                File targetFile = new File(subDir, ze.getName());
                targetFile.getParentFile().mkdirs();

                try (FileOutputStream fos = new FileOutputStream(targetFile)) {
                    IOUtils.copy(zis, fos);
                }

                ze = zis.getNextEntry();
            }
        }
    }

    private static void usage() {
        System.out.println("Usage");
        System.out.println("=====");
        System.out.println("Unzip: java -jar DHCPublisher.jar 1 dhcDir targetDir");
        System.out.println("PDF: java -jar DHCPublisher.jar 2 dirWithUnzippedDhcs");
        System.out.println("Unzip+PDF: java -jar DHCPublisher.jar 3 dhcDir targetDir");
    }

    public static void main(String[] args) {

        if (args.length == 0) {
            usage();
        }

        try {
            Integer action = Integer.valueOf(args[0]);
            DHCPublisher dhcPublisher = new DHCPublisher();
            switch (action) {
            case 1: {
                if (args.length == 3) {
                    dhcPublisher.unzip(args[1], args[2]);
                } else {
                    usage();
                }
                break;
            }
            case 2: {
                if (args.length == 2) {
                    dhcPublisher.convert(args[1]);
                } else {
                    usage();
                }
                break;
            }
            case 3: {
                if (args.length == 3) {
                    dhcPublisher.unzip(args[1], args[2]);
                    dhcPublisher.convert(args[2]);
                } else {
                    usage();
                }
                break;
            }
            default: {
                usage();
            }
            }
        } catch (Exception e) {
            e.printStackTrace();
            usage();
        }
    }
}