Java tutorial
/* * Copyright (c) 2006-2014 by Public Library of Science * * http://plos.org * http://ambraproject.org * * Licensed under the Apache License, Version 2.0 (the "License"); * You may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.ambraproject.service.xml; import org.apache.commons.configuration.Configuration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Required; import org.ambraproject.ApplicationException; import org.ambraproject.xml.transform.cache.CachedSource; import org.w3c.dom.Document; import org.xml.sax.EntityResolver; import org.xml.sax.InputSource; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.*; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamSource; import java.io.*; import java.net.URISyntaxException; import java.net.URL; import java.util.Map; /** * Convenience class to aggregate common methods used to deal with XML transforms on articles. * Used to transform article with annotations, captions of tables/figures, and citation information. * * @author Bill OConnor * @author Stephen Cheng * @author Alex Worden * @author Joe Osowski * */ public class XMLServiceImpl implements XMLService { private Configuration configuration; private static final Logger log = LoggerFactory.getLogger(XMLServiceImpl.class); /** * The DTD URL referenced in the article XML. If validateArticleXml is false, we make sure not to * validate the XML against this DTD for performance reasons. */ public static final String NLM_DTD_URL = "http://dtd.nlm.nih.gov/publishing/3.0/journalpublishing3.dtd"; private String xslDefaultTemplate; private Map<String, String> xslTemplateMap; private DocumentBuilderFactory factory; private String articleRep; private Map<String, String> xmlFactoryProperty; private boolean validateArticleXml = true; // designed for Singleton use, set in init(), then Templates are threadsafe for reuse private Templates translet; // initialized from xslTemplate, per bean property /** * Initialization method called by Spring. * * @throws org.ambraproject.ApplicationException On Template creation Exceptions. */ public void init() throws ApplicationException { // set JAXP properties System.getProperties().putAll(xmlFactoryProperty); // Create a document builder factory and set the defaults factory = DocumentBuilderFactory.newInstance(); factory.setNamespaceAware(true); factory.setValidating(false); // set the Templates final TransformerFactory tFactory = TransformerFactory.newInstance(); //Because we have XSL sheets with import statements. I override the URI resolver //here so the factory knows to look inside the jar files for these files tFactory.setURIResolver(new XMLServiceURIResolver()); try { log.debug("Loading XSL: {}", xslDefaultTemplate); translet = tFactory.newTemplates(getResourceAsStreamSource(xslDefaultTemplate)); } catch (TransformerConfigurationException ex) { throw new ApplicationException(ex.getMessage(), ex); } catch (IOException ex) { throw new ApplicationException(ex.getMessage(), ex); } } /** * Pass in an XML string fragment, and will return back a string representing the document after * going through the XSL transform. * * @param description * @return Transformed document as a String * @throws org.ambraproject.ApplicationException */ @Override public String getTransformedDocument(String description) throws ApplicationException { try { final DocumentBuilder builder = createDocBuilder(); Document desc = builder.parse(new InputSource(new StringReader("<desc>" + description + "</desc>"))); return getTransformedDocument(desc); } catch (Exception e) { if (log.isErrorEnabled()) { log.error("Could not transform document", e); } throw new ApplicationException(e); } } /** * Given an XML Document as input, will return an XML string representing the document after * transformation. * * @param doc * @return XML String of transformed document * @throws org.ambraproject.ApplicationException */ @Override public String getTransformedDocument(Document doc) throws ApplicationException { String transformedString; try { if (log.isDebugEnabled()) log.debug("Applying XSLT transform to the document..."); final DOMSource domSource = new DOMSource(doc); final Transformer transformer = getTranslet(doc); final Writer writer = new StringWriter(1000); transformer.transform(domSource, new StreamResult(writer)); transformedString = writer.toString(); } catch (Exception e) { throw new ApplicationException(e); } return transformedString; } @Override public byte[] getTransformedByArray(byte[] xml) throws ApplicationException { try { Document doc = createDocBuilder().parse(new ByteArrayInputStream(xml)); Transformer transformer = this.getTranslet(doc); DOMSource domSource = new DOMSource(doc); ByteArrayOutputStream bs = new ByteArrayOutputStream(); transformer.transform(domSource, new StreamResult(bs)); return bs.toByteArray(); } catch (Exception e) { throw new ApplicationException(e); } } @Override public InputStream getTransformedInputStream(InputStream xml) throws ApplicationException { try { final Writer writer = new StringWriter(1000); Document doc = createDocBuilder().parse(xml); Transformer transformer = this.getTranslet(doc); DOMSource domSource = new DOMSource(doc); transformer.transform(domSource, new StreamResult(writer)); return new ByteArrayInputStream(writer.toString().getBytes("UTF-8")); } catch (Exception e) { throw new ApplicationException(e); } } /** * Given a string as input, will return an XML string representing the document after * transformation. * * @param description * @return string * @throws org.ambraproject.ApplicationException */ @Override public String getTransformedDescription(String description) throws ApplicationException { String transformedString; try { final DocumentBuilder builder = createDocBuilder(); Document desc = builder.parse(new InputSource(new StringReader("<desc>" + description + "</desc>"))); final DOMSource domSource = new DOMSource(desc); final Transformer transformer = getTranslet(desc); final Writer writer = new StringWriter(); transformer.transform(domSource, new StreamResult(writer)); transformedString = writer.toString(); } catch (Exception e) { throw new ApplicationException(e); } // Ambra stylesheet leaves "END_TITLE" as a marker for other processes transformedString = transformedString.replace("END_TITLE", ""); return transformedString; } /** * Convenience method to create a DocumentBuilder with the factory configs * * @return Document Builder * @throws javax.xml.parsers.ParserConfigurationException */ @Override public DocumentBuilder createDocBuilder() throws ParserConfigurationException { // Create the builder and parse the file final DocumentBuilder builder = factory.newDocumentBuilder(); EntityResolver resolver = validateArticleXml ? CachedSource.getResolver() : CachedSource.getResolver(NLM_DTD_URL); builder.setEntityResolver(resolver); return builder; } /** * Get a translet, compiled stylesheet, for the xslTemplate. If the doc is null * use the default template. If the doc is not null then get the DTD version. * IF the DTD version does not exist use the default template else use the * template associated with that version. * * @param doc the dtd version of document * @return Translet for the xslTemplate. * @throws javax.xml.transform.TransformerException TransformerException. */ private Transformer getTranslet(Document doc) throws TransformerException, URISyntaxException, IOException { Transformer transformer; // key is "" if the Attribute does not exist String key = (doc == null) ? "default" : doc.getDocumentElement().getAttribute("dtd-version").trim(); if ((!xslTemplateMap.containsKey(key)) || (key.equalsIgnoreCase(""))) { transformer = this.translet.newTransformer(); } else { Templates translet; String templateName = xslTemplateMap.get(key); // set the Templates final TransformerFactory tFactory = TransformerFactory.newInstance(); //Because we have XSL sheets with import statements. I override the URI resolver //here so the factory knows to look inside the jar files for these files tFactory.setURIResolver(new XMLServiceURIResolver()); //TODO: (performace) We should cache the translets when this class is initialized. We don't need //to parse the XSLs for every transform. translet = tFactory.newTemplates(getResourceAsStreamSource(templateName)); transformer = translet.newTransformer(); } transformer.setParameter("pubAppContext", configuration.getString("ambra.platform.appContext", "")); return transformer; } /** * Setter for XSL Templates. Takes in a string as the filename and searches for it in resource * path and then as a URI. * * @param xslTemplate The xslTemplate to set. * @throws java.net.URISyntaxException */ @Required public void setXslDefaultTemplate(String xslTemplate) throws URISyntaxException { log.debug("setXslDefaultTemplate called: {}", xslTemplate); this.xslDefaultTemplate = xslTemplate; } /** * Setter for XSL Templates. Takes in a string as the filename and searches for it in resource * path and then as a URI. * * @param xslTemplateMap The xslTemplate to set. */ @Required public void setXslTemplateMap(Map<String, String> xslTemplateMap) { this.xslTemplateMap = xslTemplateMap; } /** * Setter method for configuration. Injected through Spring. * * @param configuration Ambra configuration */ @Required public void setAmbraConfiguration(Configuration configuration) { this.configuration = configuration; } /** * Setter for article represenation * * @param articleRep The articleRep to set. */ @Required public void setArticleRep(String articleRep) { this.articleRep = articleRep; } /** * @param xmlFactoryProperty The xmlFactoryProperty to set. */ @Required public void setXmlFactoryProperty(Map<String, String> xmlFactoryProperty) { this.xmlFactoryProperty = xmlFactoryProperty; } /** * @param validateArticleXml indicates whether or not to perform DTD-validation of article XML * during the transform. Setting this to false improves performance and prevents network * requests to nlm.nih.gov. */ public void setValidateArticleXml(boolean validateArticleXml) { this.validateArticleXml = validateArticleXml; } /** * @return Returns the articleRep. */ public String getArticleRep() { return articleRep; } private StreamSource getResourceAsStreamSource(String filename) throws IOException { log.debug("Loading: {}", filename); URL loc = getClass().getClassLoader().getResource(filename); //If Loading resource fails, try getting the physical file if (loc == null) { File xsl = new File(filename); log.debug("Found File: {}", xsl.getPath()); return new StreamSource(xsl); } else { InputStream is = getClass().getClassLoader().getResourceAsStream(filename); StreamSource source = new StreamSource(is); log.debug("Found Resource: {}", loc.getFile()); //Note: http://stackoverflow.com/questions/7236291/saxon-error-with-xslt-import-statement source.setSystemId(loc.getFile()); return source; } } class XMLServiceURIResolver implements URIResolver { @Override public Source resolve(String href, String base) throws TransformerException { try { InputStream inputStream = this.getClass().getClassLoader().getResourceAsStream(href); return new StreamSource(inputStream); } catch (Exception ex) { throw new TransformerException(ex.getMessage(), ex); } } } }