Java tutorial
/* * Copyright 2013~2014 Dan Haywood * * Licensed under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.isisaddons.module.docx.dom; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.List; import java.util.Map; import javax.annotation.PostConstruct; import com.google.common.base.Objects; import com.google.common.collect.Lists; import org.apache.commons.io.IOUtils; import org.docx4j.Docx4J; import org.docx4j.XmlUtils; import org.docx4j.convert.out.FOSettings; import org.docx4j.fonts.IdentityPlusMapper; import org.docx4j.fonts.Mapper; import org.docx4j.openpackaging.exceptions.Docx4JException; import org.docx4j.openpackaging.packages.WordprocessingMLPackage; import org.docx4j.wml.Body; import org.docx4j.wml.P; import org.docx4j.wml.R; import org.docx4j.wml.SdtElement; import org.docx4j.wml.Tbl; import org.docx4j.wml.Tc; import org.docx4j.wml.Tr; import org.isisaddons.module.docx.dom.traverse.AllMatches; import org.isisaddons.module.docx.dom.traverse.FirstMatch; import org.isisaddons.module.docx.dom.util.Docx; import org.isisaddons.module.docx.dom.util.Jdom2; import org.isisaddons.module.docx.dom.util.Types; import org.jdom2.Content; import org.jdom2.Element; import org.jdom2.input.DOMBuilder; import org.apache.isis.applib.annotation.DomainService; import org.apache.isis.applib.annotation.NatureOfService; import org.apache.isis.applib.annotation.Programmatic; @DomainService(nature = NatureOfService.DOMAIN) public class DocxService { @Programmatic @PostConstruct public void init(final Map<String, String> properties) { } public enum MatchingPolicy { STRICT(false, false), ALLOW_UNMATCHED_INPUT(true, false), ALLOW_UNMATCHED_PLACEHOLDERS(false, true), /** * Combination of both {@link #ALLOW_UNMATCHED_INPUT} and {@link #ALLOW_UNMATCHED_PLACEHOLDERS}. */ LAX(true, true); private final boolean allowUnmatchedInput; private final boolean allowUnmatchedPlaceholders; private MatchingPolicy(final boolean allowUnmatchedInput, final boolean allowUnmatchedPlaceholders) { this.allowUnmatchedInput = allowUnmatchedInput; this.allowUnmatchedPlaceholders = allowUnmatchedPlaceholders; } public void unmatchedInputs(final List<String> unmatched) throws MergeException { if (!allowUnmatchedInput && !unmatched.isEmpty()) { throw new MergeException("Input elements " + unmatched + " were not matched to placeholders"); } } public void unmatchedPlaceholders(final List<String> unmatched) throws MergeException { if (!allowUnmatchedPlaceholders && !unmatched.isEmpty()) { throw new MergeException("Placeholders " + unmatched + " were not matched to input"); } } } /** * Load and return an in-memory representation of a docx. * * <p> * This is public API because building the in-memory structure can be * quite slow. Thus, clients can use this method to cache the in-memory * structure, and pass in to either * {@link #merge(String, WordprocessingMLPackage, OutputStream, MatchingPolicy)} * or {@link #merge(org.w3c.dom.Document, org.docx4j.openpackaging.packages.WordprocessingMLPackage, java.io.OutputStream, org.isisaddons.module.docx.dom.DocxService.MatchingPolicy, org.isisaddons.module.docx.dom.DocxService.OutputType)} */ @Programmatic public WordprocessingMLPackage loadPackage(final InputStream docxTemplate) throws LoadTemplateException { final WordprocessingMLPackage docxPkg; try { docxPkg = WordprocessingMLPackage.load(docxTemplate); } catch (final Docx4JException ex) { throw new LoadTemplateException("Unable to load docx template from input stream", ex); } return docxPkg; } @Programmatic public void merge(final String html, final InputStream docxTemplate, final OutputStream docxTarget, final MatchingPolicy matchingPolicy) throws LoadInputException, LoadTemplateException, MergeException { final org.jdom2.Document htmlJdomDoc = Jdom2.loadInput(html); final WordprocessingMLPackage docxPkg = loadPackage(docxTemplate); merge(htmlJdomDoc, docxPkg, docxTarget, matchingPolicy, DefensiveCopy.NOT_REQUIRED, OutputType.DOCX); } @Programmatic public void merge(final String html, final WordprocessingMLPackage docxTemplate, final OutputStream docxTarget, final MatchingPolicy matchingPolicy) throws MergeException, LoadInputException { final org.jdom2.Document htmlJdomDoc = Jdom2.loadInput(html); merge(htmlJdomDoc, docxTemplate, docxTarget, matchingPolicy, DefensiveCopy.REQUIRED, OutputType.DOCX); } @Programmatic public void merge(final org.w3c.dom.Document htmlDoc, final InputStream docxTemplate, final OutputStream docxTarget, final MatchingPolicy matchingPolicy) throws MergeException, LoadTemplateException { final WordprocessingMLPackage docxPkg = loadPackage(docxTemplate); final org.jdom2.Document htmlJdomDoc = new DOMBuilder().build(htmlDoc); merge(htmlJdomDoc, docxPkg, docxTarget, matchingPolicy, DefensiveCopy.NOT_REQUIRED, OutputType.DOCX); } @Programmatic public void merge(final org.w3c.dom.Document htmlDoc, final WordprocessingMLPackage docxTemplate, final OutputStream docxTarget, final MatchingPolicy matchingPolicy) throws MergeException { final org.jdom2.Document htmlJdomDoc = new DOMBuilder().build(htmlDoc); merge(htmlJdomDoc, docxTemplate, docxTarget, matchingPolicy, DefensiveCopy.REQUIRED, OutputType.DOCX); } @Programmatic public void merge(final String html, final InputStream docxTemplate, final OutputStream docxTarget, final MatchingPolicy matchingPolicy, final OutputType outputType) throws MergeException, LoadInputException, LoadTemplateException { final org.jdom2.Document htmlJdomDoc = Jdom2.loadInput(html); final WordprocessingMLPackage docxPkg = loadPackage(docxTemplate); merge(htmlJdomDoc, docxPkg, docxTarget, matchingPolicy, DefensiveCopy.REQUIRED, outputType); } @Programmatic public void merge(final String html, final WordprocessingMLPackage docxTemplate, final OutputStream docxTarget, final MatchingPolicy matchingPolicy, final OutputType outputType) throws MergeException, LoadInputException { final org.jdom2.Document htmlJdomDoc = Jdom2.loadInput(html); merge(htmlJdomDoc, docxTemplate, docxTarget, matchingPolicy, DefensiveCopy.REQUIRED, outputType); } @Programmatic public void merge(final org.w3c.dom.Document htmlDoc, final InputStream docxTemplate, final OutputStream docxTarget, final MatchingPolicy matchingPolicy, final OutputType outputType) throws MergeException, LoadTemplateException { final org.jdom2.Document htmlJdomDoc = new DOMBuilder().build(htmlDoc); final WordprocessingMLPackage docxPkg = loadPackage(docxTemplate); merge(htmlJdomDoc, docxPkg, docxTarget, matchingPolicy, DefensiveCopy.REQUIRED, outputType); } @Programmatic public void merge(final org.w3c.dom.Document htmlDoc, final WordprocessingMLPackage docxTemplate, final OutputStream docxTarget, final MatchingPolicy matchingPolicy, final OutputType outputType) throws MergeException { final org.jdom2.Document htmlJdomDoc = new DOMBuilder().build(htmlDoc); merge(htmlJdomDoc, docxTemplate, docxTarget, matchingPolicy, DefensiveCopy.REQUIRED, outputType); } private enum DefensiveCopy { REQUIRED, NOT_REQUIRED } /** * The type of the file to generate */ public enum OutputType { DOCX, /** * Support for PDF should be considered experimental. */ PDF } private void merge(final org.jdom2.Document htmlDoc, final WordprocessingMLPackage docxTemplateInput, final OutputStream docxTarget, final MatchingPolicy matchingPolicy, final DefensiveCopy defensiveCopy, final OutputType outputType) throws MergeException { final WordprocessingMLPackage docxTemplate = defensiveCopy == DefensiveCopy.REQUIRED ? Docx.clone(docxTemplateInput) : docxTemplateInput; try { final Element bodyEl = Jdom2.htmlBodyFor(htmlDoc); final Body docXBody = Docx.docxBodyFor(docxTemplate); merge(bodyEl, docXBody, matchingPolicy); if (outputType == OutputType.PDF) { final FOSettings foSettings = Docx4J.createFOSettings(); foSettings.setWmlPackage(docxTemplate); try { final Mapper fontMapper = new IdentityPlusMapper(); docxTemplate.setFontMapper(fontMapper, true); } catch (final Exception e) { throw new MergeException("unable to set font mapper for PDF generation", e); } // according to the documentation/examples the XSL transformation // is slower but more feature complete than Docx4J.FLAG_EXPORT_PREFER_NONXSL final int flags = Docx4J.FLAG_EXPORT_PREFER_XSL; Docx4J.toFO(foSettings, docxTarget, flags); } else { final File tempTargetFile = createTempFile(); FileInputStream tempTargetFis = null; try { docxTemplate.save(tempTargetFile); tempTargetFis = new FileInputStream(tempTargetFile); IOUtils.copy(tempTargetFis, docxTarget); } finally { IOUtils.closeQuietly(tempTargetFis); tempTargetFile.delete(); } } } catch (final Docx4JException e) { throw new MergeException("unable to write to target file", e); } catch (final FileNotFoundException e) { throw new MergeException("unable to read back from target file", e); } catch (final IOException e) { throw new MergeException("unable to generate output stream from temporary file", e); } } private static void merge(final Element htmlBody, final Body docXBody, final MatchingPolicy matchingPolicy) throws MergeException { final List<String> matchedInputIds = Lists.newArrayList(); final List<String> unmatchedInputIds = Lists.newArrayList(); final List<Content> htmlBodyContents = htmlBody.getContent(); for (final Content input : htmlBodyContents) { if (!(input instanceof Element)) { continue; } mergeInto((Element) input, docXBody, matchedInputIds, unmatchedInputIds); } final List<String> unmatchedPlaceHolders = unmatchedPlaceholders(docXBody, matchedInputIds); matchingPolicy.unmatchedInputs(unmatchedInputIds); matchingPolicy.unmatchedPlaceholders(unmatchedPlaceHolders); } private static void mergeInto(final Element input, final Body docXBody, final List<String> matchedInputs, final List<String> unmatchedInputs) throws MergeException { final String id = Jdom2.attrOf(input, "id"); if (id == null) { throw new MergeException("Missing 'id' attribute for element within body of input HTML"); } final MergeType mergeType = MergeType.lookup(input.getName(), Jdom2.attrOf(input, "class")); if (mergeType == null) { unmatchedInputs.add(id); return; } final SdtElement docxElement = FirstMatch.matching(docXBody, Docx.withTagVal(id)); if (docxElement == null) { unmatchedInputs.add(id); return; } if (mergeType.merge(input, docxElement)) { matchedInputs.add(id); } else { unmatchedInputs.add(id); } } private enum MergeType { PLAIN("p.plain"), RICH("p.rich"), DATE("p.date"), UL("ul") { @Override boolean merge(final Element htmlUl, final SdtElement sdtElement) { final List<Element> htmlLiList = htmlUl.getChildren("li"); // can be empty final List<P> docxPOrigList = AllMatches.<P>matching(sdtElement, Types.withType(P.class)); if (docxPOrigList.isEmpty()) { return false; } final List<P> docxPNewList = Lists.newArrayList(); for (int htmlLiNum = 0; htmlLiNum < htmlLiList.size(); htmlLiNum++) { final Element htmlLi = htmlLiList.get(htmlLiNum); final List<Element> htmlPList = htmlLi.getChildren("p"); for (int htmlPNum = 0; htmlPNum < htmlPList.size(); htmlPNum++) { final int numDocxPNum = docxPOrigList.size(); final int docxPNum = numDocxPNum == 1 || htmlPNum == 0 ? 0 : 1; final P docxP = XmlUtils.deepCopy(docxPOrigList.get(docxPNum)); docxPNewList.add(docxP); final R docxR = FirstMatch.<R>matching(docxP, Types.withType(R.class)); final Element htmlP = htmlPList.get(htmlPNum); Docx.setText(docxR, Jdom2.textValueOf(htmlP)); } } // remove original and replace with new final List<Object> content = sdtElement.getSdtContent().getContent(); for (final P docxP : docxPOrigList) { content.remove(docxP); } for (final P docxP : docxPNewList) { content.add(docxP); } return true; } }, TABLE("table") { @Override boolean merge(final Element htmlTable, final SdtElement sdtElement) { final List<Element> htmlTrOrigList = htmlTable.getChildren("tr"); // can be empty final List<Object> docxContents = sdtElement.getSdtContent().getContent(); final Tbl docxTbl = FirstMatch.matching(docxContents, Types.withType(Tbl.class)); if (docxTbl == null) { return false; } final List<Tr> docxTrList = AllMatches.matching(docxTbl, Types.withType(Tr.class)); if (docxTrList.size() < 2) { // require a header row and one other return false; } final List<Tr> docxTrNewList = Lists.newArrayList(); for (int htmlRowNum = 0; htmlRowNum < htmlTrOrigList.size(); htmlRowNum++) { final Element htmlTr = htmlTrOrigList.get(htmlRowNum); final int numDocxBodyTr = docxTrList.size() - 1; final int docxTrNum = (htmlRowNum % numDocxBodyTr) + 1; final Tr docxTr = XmlUtils.deepCopy(docxTrList.get(docxTrNum)); docxTrNewList.add(docxTr); final List<Tc> docxTcList = AllMatches.matching(docxTr.getContent(), Types.withType(Tc.class)); final List<Element> htmlTdList = htmlTr.getChildren("td"); final List<String> htmlCellValues = Lists.transform(htmlTdList, Jdom2.textValue()); for (int cellNum = 0; cellNum < docxTcList.size(); cellNum++) { final Tc docxTc = docxTcList.get(cellNum); final String value = cellNum < htmlCellValues.size() ? htmlCellValues.get(cellNum) : ""; final P docxP = FirstMatch.matching(docxTc.getContent(), Types.withType(P.class)); if (docxP == null) { return false; } final R docxR = FirstMatch.matching(docxP, Types.withType(R.class)); if (docxR == null) { return false; } Docx.setText(docxR, value); } } docxReplaceRows(docxTbl, docxTrList, docxTrNewList); return true; } private void docxReplaceRows(final Tbl docxTbl, final List<Tr> docxTrList, final List<Tr> docxTrToAdd) { final List<Object> docxTblContent = docxTbl.getContent(); boolean first = true; for (final Tr docxTr : docxTrList) { if (first) { // header, do NOT remove first = false; } else { docxTblContent.remove(docxTr); } } for (final Tr docxTr : docxTrToAdd) { docxTblContent.add(docxTr); } } }; boolean merge(final Element htmlElement, final SdtElement docxElement) { final String htmlTextValue = Jdom2.textValueOf(htmlElement); if (htmlTextValue == null) { return false; } final R docxR = FirstMatch.matching(docxElement, Types.withType(R.class)); if (docxR == null) { return false; } return Docx.setText(docxR, htmlTextValue); } private final String type; private MergeType(final String type) { this.type = type; } public static MergeType lookup(final String name, final String clazz) { final String type = name + (clazz != null ? "." + clazz : ""); for (final MergeType mt : values()) { if (Objects.equal(mt.type, type)) { return mt; } } return null; } } private static List<String> unmatchedPlaceholders(final Body docXBody, final List<String> matchedIds) { final List<SdtElement> taggedElements = AllMatches.matching(docXBody, Docx.withAnyTag()); final List<String> unmatchedPlaceHolders = Lists.transform(taggedElements, Docx.tagToValue()); unmatchedPlaceHolders.removeAll(matchedIds); return unmatchedPlaceHolders; } private static File createTempFile() throws MergeException { try { return File.createTempFile("docx", null); } catch (final IOException ex) { throw new MergeException("Unable to create temporary working file", ex); } } }