Java tutorial
/** * Copyright (c) Codice Foundation * * <p>This is free software: you can redistribute it and/or modify it under the terms of the GNU * Lesser General Public License as published by the Free Software Foundation, either version 3 of * the License, or any later version. * * <p>This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. A copy of the GNU Lesser General Public * License is distributed along with this program and can be found at * <http://www.gnu.org/licenses/lgpl.html>. */ package ddf.services.schematron; import com.google.common.collect.ImmutableSet; import ddf.catalog.data.Metacard; import ddf.catalog.util.Describable; import ddf.catalog.validation.MetacardValidator; import ddf.catalog.validation.ReportingMetacardValidator; import ddf.catalog.validation.ValidationException; import ddf.catalog.validation.impl.ValidationExceptionImpl; import ddf.catalog.validation.impl.report.MetacardValidationReportImpl; import ddf.catalog.validation.impl.violation.ValidationViolationImpl; import ddf.catalog.validation.report.MetacardValidationReport; import ddf.catalog.validation.violation.ValidationViolation; import java.io.File; import java.io.StringReader; import java.net.URL; import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; import java.util.Optional; import java.util.Set; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.FutureTask; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.ErrorListener; import javax.xml.transform.Source; import javax.xml.transform.Templates; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMResult; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.sax.SAXSource; import javax.xml.transform.stream.StreamSource; import net.sf.saxon.Configuration; import net.sf.saxon.TransformerFactoryImpl; import org.apache.commons.lang.StringUtils; import org.codice.ddf.platform.util.StandardThreadFactoryBuilder; import org.codice.ddf.platform.util.XMLUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.XMLFilterImpl; /** * This pre-ingest service provides validation of an ingested XML document against a Schematron * schema file. * * <p>When this service is instantiated at deployment time to the OSGi container it goes through 3 * different preprocessing stages on the Schematron schema file. (These steps are required by the * ISO Schematron implementation) * * <ol> * <li>1. Preprocess the Schematron schema with iso_dsdl_include.xsl. This is a macro processor to * assemble the schema from various parts. * <li>2. Preprocess the output from stage 1 with iso_abstract_expand.xsl. This is a macro * processor to convert abstract patterns to real patterns. * <li>3. Compile the Schematron schema into an XSLT script. This will use iso_svrl_for_xslt2.xsl * (which in turn invokes iso_schematron_skeleton_for_saxon.xsl) * </ol> * * <p>When XML documents are ingested, this service will run the XSLT generated by stage 3 against * the XML document, validating it against the "compiled" Schematron schema file. * * <p>This service is using the SVRL script, hence the output of the validation will be an * SVRL-formatted XML document. * * @author rodgersh * @see <a href="http://www.schematron.com">Schematron</a> */ public class SchematronValidationService implements MetacardValidator, Describable, ReportingMetacardValidator { public static final String DEFAULT_THREAD_POOL_SIZE = "16"; private static final String SCHEMATRON_BASE_FOLDER = Paths.get(System.getProperty("ddf.home"), "schematron") .toString(); private static final Logger LOGGER = LoggerFactory.getLogger(SchematronValidationService.class); private static final XMLUtils XML_UTILS = XMLUtils.getInstance(); private TransformerFactory transformerFactory; private int priority = 10; private SchematronReport schematronReport; private List<String> schematronFileNames; private boolean suppressWarnings = false; private String namespace; private String id; private ExecutorService pool = getThreadPool(); private List<Future<Templates>> validators = new ArrayList<>(); private static ExecutorService getThreadPool() throws NumberFormatException { Integer threadPoolSize = Integer .parseInt(System.getProperty("org.codice.ddf.system.threadPoolSize", DEFAULT_THREAD_POOL_SIZE)); return Executors.newFixedThreadPool(threadPoolSize, StandardThreadFactoryBuilder.newThreadFactory("schematronValidationServiceThread")); } /** * Replace tabs, literal carriage returns, and newlines with a single whitespace * * @param input * @return */ static String sanitize(final String input) { return input.replaceAll("[\t \r\n]+", " ").trim(); } public void init() throws SchematronInitializationException { if (transformerFactory == null) { transformerFactory = XML_UTILS.getSecureXmlTransformerFactory(TransformerFactoryImpl.class.getName(), SchematronValidationService.class.getClassLoader()); } // DDF-855: set ErrorListener to catch any warnings/errors during loading of the // ruleset file and log (vs. Saxon default of writing to console) the warnings/errors Configuration config = ((TransformerFactoryImpl) transformerFactory).getConfiguration(); config.setErrorListener(new SaxonErrorListener(schematronFileNames)); updateValidators(); } private void updateValidators() throws SchematronInitializationException { validators.clear(); for (String schematronFileName : schematronFileNames) { FutureTask<Templates> task = new FutureTask<Templates>(() -> { return compileSchematronRules(schematronFileName); }); validators.add(task); pool.submit(task); } } private Templates compileSchematronRules(String schematronFileName) throws SchematronInitializationException { Templates template; File schematronFile = new File(schematronFileName); if (!schematronFile.exists()) { throw new SchematronInitializationException("Could not locate schematron file " + schematronFileName); } try { URL schUrl = schematronFile.toURI().toURL(); Source schSource = new StreamSource(schUrl.toString()); // Stage 1: Perform inclusion expansion on Schematron schema file DOMResult stage1Result = performStage(schSource, getClass().getClassLoader().getResource("iso-schematron/iso_dsdl_include.xsl")); DOMSource stage1Output = new DOMSource(stage1Result.getNode()); // Stage 2: Perform abstract expansion on output file from Stage 1 DOMResult stage2Result = performStage(stage1Output, getClass().getClassLoader().getResource("iso-schematron/iso_abstract_expand.xsl")); DOMSource stage2Output = new DOMSource(stage2Result.getNode()); // Stage 3: Compile the .sch rules that have been prepocessed by Stages 1 and 2 (i.e., // the output of Stage 2) DOMResult stage3Result = performStage(stage2Output, getClass().getClassLoader().getResource("iso-schematron/iso_svrl_for_xslt2.xsl")); DOMSource stage3Output = new DOMSource(stage3Result.getNode()); // Setting the system ID let's us resolve relative paths in the schematron files. // We need the URL string so that the string is properly formatted (e.g. space = %20). stage3Output.setSystemId(schUrl.toString()); template = transformerFactory.newTemplates(stage3Output); } catch (Exception e) { throw new SchematronInitializationException( "Error trying to create SchematronValidationService using sch file " + schematronFileName, e); } return template; } private DOMResult performStage(Source input, URL preprocessorUrl) throws TransformerException, ParserConfigurationException, SchematronInitializationException { Source preprocessorSource = new StreamSource(preprocessorUrl.toString()); Transformer transformer = transformerFactory.newTransformer(preprocessorSource); // Setup an error listener to catch warnings and errors generated during transformation transformer.setErrorListener(new Listener()); // Transform the input using the preprocessor's transformer, capturing the output in a DOM DOMResult domResult = new DOMResult(); transformer.transform(input, domResult); return domResult; } public void setSuppressWarnings(boolean suppressWarnings) { this.suppressWarnings = suppressWarnings; } public void setSchematronFileNames(List<String> schematronFileNames) throws SchematronInitializationException { this.schematronFileNames = new ArrayList<>(); for (String filename : schematronFileNames) { String fullpath = Paths.get(filename).toString(); if (!Paths.get(filename).isAbsolute()) { fullpath = Paths.get(SCHEMATRON_BASE_FOLDER, fullpath).toString(); } this.schematronFileNames.add(fullpath); } if (transformerFactory != null) { updateValidators(); } } public void setNamespace(String namespace) { this.namespace = namespace; } public void setPriority(int priority) { this.priority = priority; // 1 is the highest priority, 100 the lowest if (this.priority > 100) { this.priority = 100; } else if (this.priority < 1) { this.priority = 1; } } @Override public void validate(Metacard metacard) throws ValidationException { MetacardValidationReport report = generateReport(metacard); List<String> errors = new ArrayList<>(); List<String> warnings = new ArrayList<>(); report.getMetacardValidationViolations().forEach(violation -> { if (violation.getSeverity() == ValidationViolation.Severity.ERROR) { errors.add(violation.getMessage()); } else { warnings.add(violation.getMessage()); } }); SchematronValidationException exception = new SchematronValidationException("Schematron validation failed", errors, warnings); if (!errors.isEmpty()) { throw exception; } if (!suppressWarnings && !warnings.isEmpty()) { throw exception; } } private MetacardValidationReport generateReport(Metacard metacard) throws ValidationExceptionImpl { MetacardValidationReportImpl report = new MetacardValidationReportImpl(); Set<String> attributes = ImmutableSet.of("metadata"); String metadata = metacard.getMetadata(); boolean canBeValidated = !(StringUtils.isEmpty(metadata) || (namespace != null && !namespace.equals(XML_UTILS.getRootNamespace(metadata)))); if (canBeValidated) { try { for (Future<Templates> validator : validators) { schematronReport = generateReport(metadata, validator.get(10, TimeUnit.MINUTES)); schematronReport.getErrors() .forEach(errorMsg -> report.addMetacardViolation(new ValidationViolationImpl(attributes, sanitize(errorMsg), ValidationViolation.Severity.ERROR))); schematronReport.getWarnings().forEach( warningMsg -> report.addMetacardViolation(new ValidationViolationImpl(attributes, sanitize(warningMsg), ValidationViolation.Severity.WARNING))); } } catch (TimeoutException | ExecutionException e) { throw new ValidationExceptionImpl(e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new ValidationExceptionImpl(e); } } return report; } private SchematronReport generateReport(String metadata, Templates validator) throws SchematronValidationException { XMLReader xmlReader = null; try { XMLReader xmlParser = XML_UTILS.getSecureXmlParser(); xmlReader = new XMLFilterImpl(xmlParser); } catch (SAXException e) { throw new SchematronValidationException(e); } SchematronReport report; try { Transformer transformer = validator.newTransformer(); DOMResult schematronResult = new DOMResult(); transformer.transform(new SAXSource(xmlReader, new InputSource(new StringReader(metadata))), schematronResult); report = new SvrlReport(schematronResult); } catch (TransformerException e) { throw new SchematronValidationException("Could not setup validator to perform validation.", e); } return report; } @Override public String getVersion() { return null; } @Override public String getId() { return id; } public void setId(String id) { this.id = id; } @Override public String getTitle() { return null; } @Override public String getDescription() { return null; } @Override public String getOrganization() { return null; } @Override public Optional<MetacardValidationReport> validateMetacard(Metacard metacard) { try { return Optional.of(generateReport(metacard)); } catch (ValidationExceptionImpl e) { LOGGER.warn("Exception validating metacard ID {}", metacard.getId(), e); return Optional.empty(); } } /** * The Listener class which catches Saxon configuration errors. * * <p>DDF-855: These warnings and errors are logged so that they are not displayed on the console. */ private static class SaxonErrorListener implements ErrorListener { private List<String> schematronFileNames; public SaxonErrorListener(List<String> schematronFileNames) { this.schematronFileNames = schematronFileNames; } @Override public void warning(TransformerException e) throws TransformerException { LOGGER.debug("Transformer warning: '{}' on file: {}", e.getMessage(), this.schematronFileNames); LOGGER.debug("Saxon exception", e); } @Override public void error(TransformerException e) throws TransformerException { LOGGER.debug("Transformer warning: '{}' on file: {}", e.getMessage(), this.schematronFileNames); LOGGER.debug("Saxon exception", e); } @Override public void fatalError(TransformerException e) throws TransformerException { LOGGER.info("Transformer error: (Schematron file = {}):", this.schematronFileNames, e); } } /** * The Listener class which catches xsl:messages during the transformation/stages of the * Schematron schema. */ private class Listener implements ErrorListener { public void warning(TransformerException e) throws TransformerException { LOGGER.debug("Schematron rule transformation warning", e); } public void error(TransformerException e) throws TransformerException { throw e; } public void fatalError(TransformerException e) throws TransformerException { throw e; } } }