Java tutorial
/** * Copyright 2012 Hippo. * * This file is part of HST PDF Renderer. * * HST PDF Renderer is free software: you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation, either version 3 of the License, or (at your option) * any later version. * * HST PDF Renderer is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * HST PDF Renderer. If not, see http://www.gnu.org/licenses/. */ package org.onehippo.forge.hst.pdf.renderer; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.Reader; import java.io.StringWriter; import java.net.URI; import java.util.Properties; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.w3c.tidy.Tidy; import org.xhtmlrenderer.extend.UserAgentCallback; import org.xhtmlrenderer.pdf.ITextFontResolver; import org.xhtmlrenderer.pdf.ITextRenderer; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import com.lowagie.text.DocumentException; import com.lowagie.text.pdf.BaseFont; /** * HtmlPDFRenderer * <P> * This class is designed to be used as singleton object. * So, {@link #renderHtmlToPDF(InputStream, String, boolean, OutputStream)} and {@link #renderHtmlToPDF(Reader, boolean, OutputStream)} should be thread-safe. * </P> */ public class HtmlPDFRenderer { private static Logger log = LoggerFactory.getLogger(HtmlPDFRenderer.class); private final Properties tidyProps; private boolean removeExistingCssLinks = true; private URI[] cssURIs; private int bufferSize = 4096; private UserAgentCallback userAgentCallback; private String[] fontPaths; private boolean useFullyQualifiedLinks = true; public HtmlPDFRenderer() { this(new Properties()); } public HtmlPDFRenderer(final Properties tidyProps) { this.tidyProps = tidyProps; } public boolean isRemoveExistingCssLinks() { return removeExistingCssLinks; } public void setRemoveExistingCssLinks(boolean removeExistingCssLinks) { this.removeExistingCssLinks = removeExistingCssLinks; } public URI[] getCssURIs() { return cssURIs; } public void setCssURIs(URI[] cssURIs) { this.cssURIs = cssURIs; } public int getBufferSize() { return bufferSize; } public void setBufferSize(int bufferSize) { this.bufferSize = bufferSize; } public UserAgentCallback getUserAgentCallback() { return userAgentCallback; } public void setUserAgentCallback(UserAgentCallback userAgentCallback) { this.userAgentCallback = userAgentCallback; } public String[] getFontPaths() { return fontPaths; } public void setFontPaths(String[] fontPaths) { this.fontPaths = fontPaths; } public boolean isUseFullyQualifiedLinks() { return useFullyQualifiedLinks; } public void setUseFullyQualifiedLinks(boolean useFullyQualifiedLinks) { this.useFullyQualifiedLinks = useFullyQualifiedLinks; } public void renderHtmlToPDF(InputStream htmlInput, String inputHtmlEncoding, boolean convertToXHTML, OutputStream pdfOutput, String documentURL, String externalLinkBaseURL) throws IOException { InputStreamReader htmlReader = new InputStreamReader(htmlInput, inputHtmlEncoding); renderHtmlToPDF(htmlReader, convertToXHTML, pdfOutput, documentURL, externalLinkBaseURL); } public void renderHtmlToPDF(Reader htmlInput, boolean convertToXHTML, OutputStream pdfOutput, String documentURL, String externalLinkBaseURL) throws IOException { Reader xhtmlReader = null; try { if (convertToXHTML) { xhtmlReader = convertHtmlReaderToXhtmlReader(htmlInput); } else { xhtmlReader = htmlInput; } ITextRenderer renderer = new ITextRenderer(); if (fontPaths != null && fontPaths.length > 0) { ITextFontResolver fontResolver = renderer.getFontResolver(); for (String fontPath : fontPaths) { fontResolver.addFont(fontPath, BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED); } } DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); Document document = builder.parse(new InputSource(xhtmlReader)); if (removeExistingCssLinks) { removeExistingCssLinks(document); } if (cssURIs != null && cssURIs.length > 0) { appendCssLinkElementToXhtmlDocument(document, cssURIs); } if (useFullyQualifiedLinks && !StringUtils.isEmpty(externalLinkBaseURL)) { replaceLinksByFullyQualifiedURLs(document, "a", externalLinkBaseURL, documentURL); replaceLinksByFullyQualifiedURLs(document, "A", externalLinkBaseURL, documentURL); } if (userAgentCallback != null) { renderer.getSharedContext().setUserAgentCallback(userAgentCallback); } renderer.setDocument(document, documentURL); renderer.layout(); renderer.createPDF(pdfOutput); } catch (ParserConfigurationException e) { log.error("Parse configuration exception.", e); } catch (SAXException e) { log.error("XML parsing exception.", e); } catch (DocumentException e) { log.error("pdf generation exception.", e); } finally { if (xhtmlReader != htmlInput) { IOUtils.closeQuietly(xhtmlReader); } } } private Reader convertHtmlReaderToXhtmlReader(Reader htmlReader) throws IOException { Tidy tidy = new Tidy(); tidy.setConfigurationFromProps(tidyProps); if (log.isDebugEnabled()) { StringWriter writer = new StringWriter(); tidy.getConfiguration().printConfigOptions(writer, true); log.debug("Tidy configuration: \n{}", writer.toString()); } ByteArrayOutputStream tidyOut = null; OutputStreamWriter osw = null; byte[] bytes = null; try { tidyOut = new ByteArrayOutputStream(bufferSize); osw = new OutputStreamWriter(tidyOut, "UTF-8"); tidy.parse(htmlReader, osw); osw.flush(); bytes = tidyOut.toByteArray(); } finally { IOUtils.closeQuietly(osw); IOUtils.closeQuietly(tidyOut); } return new InputStreamReader(new ByteArrayInputStream(bytes), "UTF-8"); } private static Element getFirstChildElement(Element base, String nodeName) { NodeList childNodeList = base.getChildNodes(); if (childNodeList != null) { int length = childNodeList.getLength(); for (int i = 0; i < length; i++) { Node childNode = childNodeList.item(i); if (childNode.getNodeType() == Node.ELEMENT_NODE) { if (nodeName == null) { return (Element) childNode; } else if (StringUtils.equalsIgnoreCase(childNode.getNodeName(), nodeName)) { return (Element) childNode; } } } } return null; } private static void removeExistingCssLinks(Document document) { Element headElem = getFirstChildElement(document.getDocumentElement(), "head"); if (headElem == null) { return; } NodeList nodeList = headElem.getChildNodes(); if (nodeList != null) { int length = nodeList.getLength(); for (int i = length - 1; i >= 0; i--) { Node childNode = nodeList.item(i); if (childNode.getNodeType() == Node.ELEMENT_NODE) { Element childElem = (Element) childNode; if (StringUtils.equalsIgnoreCase("link", childElem.getNodeName())) { if (StringUtils.equalsIgnoreCase("text/css", childElem.getAttribute("type"))) { headElem.removeChild(childElem); } } } } } } private static void appendCssLinkElementToXhtmlDocument(Document document, URI[] cssURIs) { Element headElem = getFirstChildElement(document.getDocumentElement(), "head"); if (headElem == null) { return; } for (URI cssURI : cssURIs) { Element linkElem = document.createElement("link"); linkElem.setAttribute("type", "text/css"); linkElem.setAttribute("rel", "stylesheet"); linkElem.setAttribute("href", cssURI.toString()); linkElem.setAttribute("media", "print"); headElem.appendChild(linkElem); } } private static void replaceLinksByFullyQualifiedURLs(Document document, String linkTagName, String externalLinkBaseURL, String documentURL) { URI documentURI = null; NodeList linkList = document.getElementsByTagName(linkTagName); if (linkList != null) { int length = linkList.getLength(); for (int i = 0; i < length; i++) { Node linkNode = linkList.item(i); if (linkNode.getNodeType() != Node.ELEMENT_NODE) { continue; } Element linkElem = (Element) linkNode; String href = StringUtils.trim(linkElem.getAttribute("href")); if (StringUtils.isEmpty(href)) { href = StringUtils.trim(linkElem.getAttribute("HREF")); } if (StringUtils.isEmpty(href)) { continue; } if (StringUtils.startsWith(href, "http:") || StringUtils.startsWith(href, "https:")) { continue; } if (StringUtils.startsWith(href, "/")) { linkElem.setAttribute("href", externalLinkBaseURL + href); } else { if (documentURI == null) { documentURI = URI.create(documentURL); } String documentURIPath = documentURI.getPath(); String basePath = externalLinkBaseURL + StringUtils.substringBeforeLast(documentURIPath, "/"); linkElem.setAttribute("href", basePath + "/" + href); } } } } }