org.isisaddons.module.docx.dom.DocxService.java Source code

Introduction

Here is the source code for org.isisaddons.module.docx.dom.DocxService.java
Source

/*
 *  Copyright 2013~2014 Dan Haywood
 *
 *  Licensed under the Apache License, Version 2.0 (the
 *  "License"); you may not use this file except in compliance
 *  with the License.  You may obtain a copy of the License at
 *
 *        http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an
 *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 *  KIND, either express or implied.  See the License for the
 *  specific language governing permissions and limitations
 *  under the License.
 */
package org.isisaddons.module.docx.dom;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.List;
import java.util.Map;
import javax.annotation.PostConstruct;
import com.google.common.base.Objects;
import com.google.common.collect.Lists;
import org.apache.commons.io.IOUtils;
import org.docx4j.Docx4J;
import org.docx4j.XmlUtils;
import org.docx4j.convert.out.FOSettings;
import org.docx4j.fonts.IdentityPlusMapper;
import org.docx4j.fonts.Mapper;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.wml.Body;
import org.docx4j.wml.P;
import org.docx4j.wml.R;
import org.docx4j.wml.SdtElement;
import org.docx4j.wml.Tbl;
import org.docx4j.wml.Tc;
import org.docx4j.wml.Tr;
import org.isisaddons.module.docx.dom.traverse.AllMatches;
import org.isisaddons.module.docx.dom.traverse.FirstMatch;
import org.isisaddons.module.docx.dom.util.Docx;
import org.isisaddons.module.docx.dom.util.Jdom2;
import org.isisaddons.module.docx.dom.util.Types;
import org.jdom2.Content;
import org.jdom2.Element;
import org.jdom2.input.DOMBuilder;
import org.apache.isis.applib.annotation.DomainService;
import org.apache.isis.applib.annotation.NatureOfService;
import org.apache.isis.applib.annotation.Programmatic;

@DomainService(nature = NatureOfService.DOMAIN)
public class DocxService {

    @Programmatic
    @PostConstruct
    public void init(final Map<String, String> properties) {
    }

    public enum MatchingPolicy {
        STRICT(false, false), ALLOW_UNMATCHED_INPUT(true, false), ALLOW_UNMATCHED_PLACEHOLDERS(false, true),
        /**
         * Combination of both {@link #ALLOW_UNMATCHED_INPUT} and {@link #ALLOW_UNMATCHED_PLACEHOLDERS}.
         */
        LAX(true, true);
        private final boolean allowUnmatchedInput;
        private final boolean allowUnmatchedPlaceholders;

        private MatchingPolicy(final boolean allowUnmatchedInput, final boolean allowUnmatchedPlaceholders) {
            this.allowUnmatchedInput = allowUnmatchedInput;
            this.allowUnmatchedPlaceholders = allowUnmatchedPlaceholders;
        }

        public void unmatchedInputs(final List<String> unmatched) throws MergeException {
            if (!allowUnmatchedInput && !unmatched.isEmpty()) {
                throw new MergeException("Input elements " + unmatched + " were not matched to placeholders");
            }
        }

        public void unmatchedPlaceholders(final List<String> unmatched) throws MergeException {
            if (!allowUnmatchedPlaceholders && !unmatched.isEmpty()) {
                throw new MergeException("Placeholders " + unmatched + " were not matched to input");
            }
        }
    }

    /**
     * Load and return an in-memory representation of a docx.
     * 
     * <p>
     * This is public API because building the in-memory structure can be
     * quite slow.  Thus, clients can use this method to cache the in-memory
     * structure, and pass in to either
     *  {@link #merge(String, WordprocessingMLPackage, OutputStream, MatchingPolicy)}
     *  or {@link #merge(org.w3c.dom.Document, org.docx4j.openpackaging.packages.WordprocessingMLPackage, java.io.OutputStream, org.isisaddons.module.docx.dom.DocxService.MatchingPolicy, org.isisaddons.module.docx.dom.DocxService.OutputType)}
     */
    @Programmatic
    public WordprocessingMLPackage loadPackage(final InputStream docxTemplate) throws LoadTemplateException {
        final WordprocessingMLPackage docxPkg;
        try {
            docxPkg = WordprocessingMLPackage.load(docxTemplate);
        } catch (final Docx4JException ex) {
            throw new LoadTemplateException("Unable to load docx template from input stream", ex);
        }
        return docxPkg;
    }

    @Programmatic
    public void merge(final String html, final InputStream docxTemplate, final OutputStream docxTarget,
            final MatchingPolicy matchingPolicy) throws LoadInputException, LoadTemplateException, MergeException {
        final org.jdom2.Document htmlJdomDoc = Jdom2.loadInput(html);
        final WordprocessingMLPackage docxPkg = loadPackage(docxTemplate);
        merge(htmlJdomDoc, docxPkg, docxTarget, matchingPolicy, DefensiveCopy.NOT_REQUIRED, OutputType.DOCX);
    }

    @Programmatic
    public void merge(final String html, final WordprocessingMLPackage docxTemplate, final OutputStream docxTarget,
            final MatchingPolicy matchingPolicy) throws MergeException, LoadInputException {
        final org.jdom2.Document htmlJdomDoc = Jdom2.loadInput(html);
        merge(htmlJdomDoc, docxTemplate, docxTarget, matchingPolicy, DefensiveCopy.REQUIRED, OutputType.DOCX);
    }

    @Programmatic
    public void merge(final org.w3c.dom.Document htmlDoc, final InputStream docxTemplate,
            final OutputStream docxTarget, final MatchingPolicy matchingPolicy)
            throws MergeException, LoadTemplateException {
        final WordprocessingMLPackage docxPkg = loadPackage(docxTemplate);
        final org.jdom2.Document htmlJdomDoc = new DOMBuilder().build(htmlDoc);
        merge(htmlJdomDoc, docxPkg, docxTarget, matchingPolicy, DefensiveCopy.NOT_REQUIRED, OutputType.DOCX);
    }

    @Programmatic
    public void merge(final org.w3c.dom.Document htmlDoc, final WordprocessingMLPackage docxTemplate,
            final OutputStream docxTarget, final MatchingPolicy matchingPolicy) throws MergeException {
        final org.jdom2.Document htmlJdomDoc = new DOMBuilder().build(htmlDoc);
        merge(htmlJdomDoc, docxTemplate, docxTarget, matchingPolicy, DefensiveCopy.REQUIRED, OutputType.DOCX);
    }

    @Programmatic
    public void merge(final String html, final InputStream docxTemplate, final OutputStream docxTarget,
            final MatchingPolicy matchingPolicy, final OutputType outputType)
            throws MergeException, LoadInputException, LoadTemplateException {
        final org.jdom2.Document htmlJdomDoc = Jdom2.loadInput(html);
        final WordprocessingMLPackage docxPkg = loadPackage(docxTemplate);
        merge(htmlJdomDoc, docxPkg, docxTarget, matchingPolicy, DefensiveCopy.REQUIRED, outputType);
    }

    @Programmatic
    public void merge(final String html, final WordprocessingMLPackage docxTemplate, final OutputStream docxTarget,
            final MatchingPolicy matchingPolicy, final OutputType outputType)
            throws MergeException, LoadInputException {
        final org.jdom2.Document htmlJdomDoc = Jdom2.loadInput(html);
        merge(htmlJdomDoc, docxTemplate, docxTarget, matchingPolicy, DefensiveCopy.REQUIRED, outputType);
    }

    @Programmatic
    public void merge(final org.w3c.dom.Document htmlDoc, final InputStream docxTemplate,
            final OutputStream docxTarget, final MatchingPolicy matchingPolicy, final OutputType outputType)
            throws MergeException, LoadTemplateException {
        final org.jdom2.Document htmlJdomDoc = new DOMBuilder().build(htmlDoc);
        final WordprocessingMLPackage docxPkg = loadPackage(docxTemplate);
        merge(htmlJdomDoc, docxPkg, docxTarget, matchingPolicy, DefensiveCopy.REQUIRED, outputType);
    }

    @Programmatic
    public void merge(final org.w3c.dom.Document htmlDoc, final WordprocessingMLPackage docxTemplate,
            final OutputStream docxTarget, final MatchingPolicy matchingPolicy, final OutputType outputType)
            throws MergeException {
        final org.jdom2.Document htmlJdomDoc = new DOMBuilder().build(htmlDoc);
        merge(htmlJdomDoc, docxTemplate, docxTarget, matchingPolicy, DefensiveCopy.REQUIRED, outputType);
    }

    private enum DefensiveCopy {
        REQUIRED, NOT_REQUIRED
    }

    /**
     * The type of the file to generate
     */
    public enum OutputType {
        DOCX,
        /**
         * Support for PDF should be considered experimental.
         */
        PDF
    }

    private void merge(final org.jdom2.Document htmlDoc, final WordprocessingMLPackage docxTemplateInput,
            final OutputStream docxTarget, final MatchingPolicy matchingPolicy, final DefensiveCopy defensiveCopy,
            final OutputType outputType) throws MergeException {

        final WordprocessingMLPackage docxTemplate = defensiveCopy == DefensiveCopy.REQUIRED
                ? Docx.clone(docxTemplateInput)
                : docxTemplateInput;

        try {
            final Element bodyEl = Jdom2.htmlBodyFor(htmlDoc);
            final Body docXBody = Docx.docxBodyFor(docxTemplate);

            merge(bodyEl, docXBody, matchingPolicy);

            if (outputType == OutputType.PDF) {

                final FOSettings foSettings = Docx4J.createFOSettings();
                foSettings.setWmlPackage(docxTemplate);

                try {
                    final Mapper fontMapper = new IdentityPlusMapper();
                    docxTemplate.setFontMapper(fontMapper, true);
                } catch (final Exception e) {
                    throw new MergeException("unable to set font mapper for PDF generation", e);
                }

                // according to the documentation/examples the XSL transformation
                // is slower but more feature complete than Docx4J.FLAG_EXPORT_PREFER_NONXSL

                final int flags = Docx4J.FLAG_EXPORT_PREFER_XSL;

                Docx4J.toFO(foSettings, docxTarget, flags);

            } else {
                final File tempTargetFile = createTempFile();
                FileInputStream tempTargetFis = null;
                try {
                    docxTemplate.save(tempTargetFile);
                    tempTargetFis = new FileInputStream(tempTargetFile);
                    IOUtils.copy(tempTargetFis, docxTarget);
                } finally {
                    IOUtils.closeQuietly(tempTargetFis);
                    tempTargetFile.delete();
                }
            }
        } catch (final Docx4JException e) {
            throw new MergeException("unable to write to target file", e);
        } catch (final FileNotFoundException e) {
            throw new MergeException("unable to read back from target file", e);
        } catch (final IOException e) {
            throw new MergeException("unable to generate output stream from temporary file", e);
        }
    }

    private static void merge(final Element htmlBody, final Body docXBody, final MatchingPolicy matchingPolicy)
            throws MergeException {
        final List<String> matchedInputIds = Lists.newArrayList();
        final List<String> unmatchedInputIds = Lists.newArrayList();

        final List<Content> htmlBodyContents = htmlBody.getContent();
        for (final Content input : htmlBodyContents) {
            if (!(input instanceof Element)) {
                continue;
            }
            mergeInto((Element) input, docXBody, matchedInputIds, unmatchedInputIds);
        }

        final List<String> unmatchedPlaceHolders = unmatchedPlaceholders(docXBody, matchedInputIds);

        matchingPolicy.unmatchedInputs(unmatchedInputIds);
        matchingPolicy.unmatchedPlaceholders(unmatchedPlaceHolders);
    }

    private static void mergeInto(final Element input, final Body docXBody, final List<String> matchedInputs,
            final List<String> unmatchedInputs) throws MergeException {

        final String id = Jdom2.attrOf(input, "id");
        if (id == null) {
            throw new MergeException("Missing 'id' attribute for element within body of input HTML");
        }

        final MergeType mergeType = MergeType.lookup(input.getName(), Jdom2.attrOf(input, "class"));
        if (mergeType == null) {
            unmatchedInputs.add(id);
            return;
        }

        final SdtElement docxElement = FirstMatch.matching(docXBody, Docx.withTagVal(id));
        if (docxElement == null) {
            unmatchedInputs.add(id);
            return;
        }

        if (mergeType.merge(input, docxElement)) {
            matchedInputs.add(id);
        } else {
            unmatchedInputs.add(id);
        }
    }

    private enum MergeType {
        PLAIN("p.plain"), RICH("p.rich"), DATE("p.date"), UL("ul") {
            @Override
            boolean merge(final Element htmlUl, final SdtElement sdtElement) {
                final List<Element> htmlLiList = htmlUl.getChildren("li"); // can be empty

                final List<P> docxPOrigList = AllMatches.<P>matching(sdtElement, Types.withType(P.class));
                if (docxPOrigList.isEmpty()) {
                    return false;
                }

                final List<P> docxPNewList = Lists.newArrayList();
                for (int htmlLiNum = 0; htmlLiNum < htmlLiList.size(); htmlLiNum++) {
                    final Element htmlLi = htmlLiList.get(htmlLiNum);

                    final List<Element> htmlPList = htmlLi.getChildren("p");

                    for (int htmlPNum = 0; htmlPNum < htmlPList.size(); htmlPNum++) {
                        final int numDocxPNum = docxPOrigList.size();
                        final int docxPNum = numDocxPNum == 1 || htmlPNum == 0 ? 0 : 1;
                        final P docxP = XmlUtils.deepCopy(docxPOrigList.get(docxPNum));
                        docxPNewList.add(docxP);
                        final R docxR = FirstMatch.<R>matching(docxP, Types.withType(R.class));
                        final Element htmlP = htmlPList.get(htmlPNum);
                        Docx.setText(docxR, Jdom2.textValueOf(htmlP));
                    }
                }

                // remove original and replace with new
                final List<Object> content = sdtElement.getSdtContent().getContent();
                for (final P docxP : docxPOrigList) {
                    content.remove(docxP);
                }
                for (final P docxP : docxPNewList) {
                    content.add(docxP);
                }
                return true;
            }
        },
        TABLE("table") {
            @Override
            boolean merge(final Element htmlTable, final SdtElement sdtElement) {

                final List<Element> htmlTrOrigList = htmlTable.getChildren("tr"); // can be empty

                final List<Object> docxContents = sdtElement.getSdtContent().getContent();
                final Tbl docxTbl = FirstMatch.matching(docxContents, Types.withType(Tbl.class));
                if (docxTbl == null) {
                    return false;
                }
                final List<Tr> docxTrList = AllMatches.matching(docxTbl, Types.withType(Tr.class));
                if (docxTrList.size() < 2) {
                    // require a header row and one other
                    return false;
                }

                final List<Tr> docxTrNewList = Lists.newArrayList();
                for (int htmlRowNum = 0; htmlRowNum < htmlTrOrigList.size(); htmlRowNum++) {
                    final Element htmlTr = htmlTrOrigList.get(htmlRowNum);

                    final int numDocxBodyTr = docxTrList.size() - 1;
                    final int docxTrNum = (htmlRowNum % numDocxBodyTr) + 1;
                    final Tr docxTr = XmlUtils.deepCopy(docxTrList.get(docxTrNum));
                    docxTrNewList.add(docxTr);
                    final List<Tc> docxTcList = AllMatches.matching(docxTr.getContent(), Types.withType(Tc.class));
                    final List<Element> htmlTdList = htmlTr.getChildren("td");
                    final List<String> htmlCellValues = Lists.transform(htmlTdList, Jdom2.textValue());
                    for (int cellNum = 0; cellNum < docxTcList.size(); cellNum++) {
                        final Tc docxTc = docxTcList.get(cellNum);
                        final String value = cellNum < htmlCellValues.size() ? htmlCellValues.get(cellNum) : "";
                        final P docxP = FirstMatch.matching(docxTc.getContent(), Types.withType(P.class));
                        if (docxP == null) {
                            return false;
                        }
                        final R docxR = FirstMatch.matching(docxP, Types.withType(R.class));
                        if (docxR == null) {
                            return false;
                        }
                        Docx.setText(docxR, value);
                    }
                }
                docxReplaceRows(docxTbl, docxTrList, docxTrNewList);
                return true;
            }

            private void docxReplaceRows(final Tbl docxTbl, final List<Tr> docxTrList, final List<Tr> docxTrToAdd) {
                final List<Object> docxTblContent = docxTbl.getContent();
                boolean first = true;
                for (final Tr docxTr : docxTrList) {
                    if (first) {
                        // header, do NOT remove
                        first = false;
                    } else {
                        docxTblContent.remove(docxTr);
                    }
                }
                for (final Tr docxTr : docxTrToAdd) {
                    docxTblContent.add(docxTr);
                }
            }
        };

        boolean merge(final Element htmlElement, final SdtElement docxElement) {
            final String htmlTextValue = Jdom2.textValueOf(htmlElement);
            if (htmlTextValue == null) {
                return false;
            }

            final R docxR = FirstMatch.matching(docxElement, Types.withType(R.class));
            if (docxR == null) {
                return false;
            }
            return Docx.setText(docxR, htmlTextValue);
        }

        private final String type;

        private MergeType(final String type) {
            this.type = type;
        }

        public static MergeType lookup(final String name, final String clazz) {
            final String type = name + (clazz != null ? "." + clazz : "");
            for (final MergeType mt : values()) {
                if (Objects.equal(mt.type, type)) {
                    return mt;
                }
            }
            return null;
        }
    }

    private static List<String> unmatchedPlaceholders(final Body docXBody, final List<String> matchedIds) {
        final List<SdtElement> taggedElements = AllMatches.matching(docXBody, Docx.withAnyTag());
        final List<String> unmatchedPlaceHolders = Lists.transform(taggedElements, Docx.tagToValue());
        unmatchedPlaceHolders.removeAll(matchedIds);
        return unmatchedPlaceHolders;
    }

    private static File createTempFile() throws MergeException {
        try {
            return File.createTempFile("docx", null);
        } catch (final IOException ex) {
            throw new MergeException("Unable to create temporary working file", ex);
        }
    }
}