Java tutorial
/* * Copyright 2016 Crown Copyright * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package stroom.pipeline.server.filter; import org.apache.commons.lang.StringUtils; import org.springframework.context.annotation.Scope; import org.springframework.stereotype.Component; import org.xml.sax.Attributes; import org.xml.sax.ErrorHandler; import org.xml.sax.Locator; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import stroom.cache.server.SchemaKey; import stroom.cache.server.SchemaPool; import stroom.cache.server.StoredSchema; import stroom.pipeline.server.LocationFactoryProxy; import stroom.pipeline.server.errorhandler.ErrorHandlerAdaptor; import stroom.pipeline.server.errorhandler.ErrorReceiverProxy; import stroom.pipeline.server.errorhandler.LoggedException; import stroom.pipeline.state.PipelineContext; import stroom.pool.PoolItem; import stroom.util.CharBuffer; import stroom.util.shared.Severity; import stroom.util.spring.StroomScope; import stroom.xmlschema.server.XMLSchemaCache; import stroom.xmlschema.server.XMLSchemaCache.SchemaSet; import stroom.xmlschema.shared.FindXMLSchemaCriteria; import stroom.xmlschema.shared.XMLSchema; import javax.inject.Inject; import javax.xml.XMLConstants; import javax.xml.validation.ValidatorHandler; import java.util.Map; import java.util.Map.Entry; import java.util.TreeMap; import java.util.regex.Pattern; /** * An XML filter for performing inline schema validation of XML. */ @Component @Scope(StroomScope.PROTOTYPE) public class SchemaFilter extends AbstractXMLFilter implements Locator { private static final int INDENT = 2; private static final String SPACE = " "; private static final String SCHEMA_LOCATION = "schemaLocation"; private static final Pattern MULTI_SPACE_PATTERN = Pattern.compile("[\\s]+"); private static final Pattern CVC_PATTERN = Pattern.compile("cvc-[^:]*:"); private static final Pattern NS_REDUCTION_PATTERN = Pattern.compile("\"[^\"]*\":"); private final SchemaPool schemaPool; private final XMLSchemaCache xmlSchemaCache; private final ErrorReceiverProxy errorReceiverProxy; private final LocationFactoryProxy locationFactory; private final PipelineContext pipelineContext; private final Map<String, String> prefixes = new TreeMap<>(); private final CharBuffer sb = new CharBuffer(10); private ErrorHandler errorHandler; private Map<String, String> schemaLocations; private ValidatorHandler validator; private PoolItem<SchemaKey, StoredSchema> poolItem; private FindXMLSchemaCriteria schemaConstraint; private int lineNo; private int colNo; private int depth; private boolean inStartElement; private String schemaLanguage = XMLConstants.W3C_XML_SCHEMA_NS_URI; // private boolean cacheSchemas = true; private boolean schemaValidation = true; private boolean useOriginalLocator; private Locator locator; @Inject public SchemaFilter(final SchemaPool schemaPool, final XMLSchemaCache xmlSchemaCache, final ErrorReceiverProxy errorReceiverProxy, final LocationFactoryProxy locationFactory, final PipelineContext pipelineContext) { this.schemaPool = schemaPool; this.xmlSchemaCache = xmlSchemaCache; this.errorReceiverProxy = errorReceiverProxy; this.locationFactory = locationFactory; this.pipelineContext = pipelineContext; } /** * @see stroom.pipeline.server.filter.AbstractXMLFilter#startProcessing() */ @Override public void startProcessing() { try { if (errorHandler == null) { errorHandler = new ErrorHandlerAdaptor(getElementId(), locationFactory, errorReceiverProxy) { @Override protected void log(final Severity severity, final SAXParseException exception) { String message = exception.getMessage(); if (message.contains("cvc-")) { message = CVC_PATTERN.matcher(message).replaceAll(""); } if (message.contains("One of")) { message = NS_REDUCTION_PATTERN.matcher(message).replaceAll(""); } message = message.trim(); final SAXParseException ex = new SAXParseException(message, exception.getPublicId(), exception.getSystemId(), exception.getLineNumber(), exception.getColumnNumber()); super.log(severity, ex); } }; } schemaLocations = null; prefixes.clear(); validator = null; } finally { super.startProcessing(); } } @Override public void endProcessing() { try { // Return the current schema to the pool if we have one. returnCurrentSchema(); } finally { super.endProcessing(); } } /** * This method tells filters that a stream is about to be parsed so that * they can complete any setup necessary. */ @Override public void startStream() { // Assume the first line is the XML declaration. lineNo = 1; colNo = 0; depth = 0; inStartElement = false; super.startStream(); } @Override public void setDocumentLocator(final Locator locator) { try { if (useOriginalLocator) { this.locator = locator; } else { this.locator = this; } if (validator != null) { validator.setDocumentLocator(this.locator); } } catch (final Throwable t) { unexpectedError("setDocumentLocator()", t); } finally { super.setDocumentLocator(locator); } } @Override public void startDocument() throws SAXException { try { if (validator != null) { validator.startDocument(); } } catch (final Throwable t) { unexpectedError("startDocument()", t); } finally { super.startDocument(); } } /** * Fires necessary end document event for the current validator. * * @throws SAXException * Could be thrown by validator. * * @see stroom.pipeline.server.filter.AbstractXMLFilter#endDocument() */ @Override public void endDocument() throws SAXException { try { if (validator != null) { validator.endDocument(); } } catch (final Throwable t) { unexpectedError("endDocument()", t); } finally { super.endDocument(); } } /** * Adds prefixes from the prefix map. * * @param prefix * The prefix to add. * @param uri * The URI of the prefix. * @throws SAXException * Not thrown. * * @see stroom.pipeline.server.filter.AbstractXMLFilter#startPrefixMapping(java.lang.String, * java.lang.String) */ @Override public void startPrefixMapping(final String prefix, final String uri) throws SAXException { try { prefixes.put(prefix, uri); if (validator != null) { validator.startPrefixMapping(prefix, uri); } } catch (final Throwable t) { unexpectedError("startPrefixMapping()", t); } finally { super.startPrefixMapping(prefix, uri); } } /** * Removes prefixes from the prefix map. * * @param prefix * The prefix to remove. * @throws SAXException * Not thrown. * * @see stroom.pipeline.server.filter.AbstractXMLFilter#endPrefixMapping(java.lang.String) */ @Override public void endPrefixMapping(final String prefix) throws SAXException { try { prefixes.remove(prefix); if (validator != null) { validator.endPrefixMapping(prefix); } } catch (final Throwable t) { unexpectedError("endPrefixMapping()", t); } finally { super.endPrefixMapping(prefix); } } /** * This method is entered for every start element. If this is the first * start element in a document it looks for a schema declaration to use to * validate the rest of the document. * * @param uri * The element's namespace URI, or the empty string. * @param localName * The element's local name, or the empty string. * @param qName * The element's qualified (prefixed) name, or the empty string. * @param atts * The element's attributes. * @exception org.xml.sax.SAXException * The client may throw an exception during processing. * * @see stroom.pipeline.server.filter.AbstractXMLFilter#startElement(java.lang.String, * java.lang.String, java.lang.String, org.xml.sax.Attributes) */ @Override public void startElement(final String uri, final String localName, final String qName, final Attributes atts) throws SAXException { try { // When we are stepping each XML output record will begin at line 1 // again. if (pipelineContext.isStepping() && depth == 0) { lineNo = 1; } colNo = depth * INDENT; lineNo++; inStartElement = true; depth++; storeSchemaLocations(uri, atts); if (validator != null) { validator.startElement(uri, localName, qName, atts); } } catch (final Throwable t) { unexpectedError("startElement()", t); } finally { super.startElement(uri, localName, qName, atts); } } /** * Receive notification of the end of an element. * * <p> * The SAX parser will invoke this method at the end of every element in the * XML document; there will be a corresponding {@link #startElement * startElement} event for every endElement event (even when the element is * empty). * </p> * * <p> * For information on the names, see startElement. * </p> * * @param uri * the Namespace URI, or the empty string if the element has no * Namespace URI or if Namespace processing is not being * performed * @param localName * the local name (without prefix), or the empty string if * Namespace processing is not being performed * @param qName * the qualified XML name (with prefix), or the empty string if * qualified names are not available * @throws org.xml.sax.SAXException * any SAX exception, possibly wrapping another exception */ @Override public void endElement(final String uri, final String localName, final String qName) throws SAXException { try { depth--; if (!inStartElement) { lineNo++; colNo = depth * INDENT; } inStartElement = false; if (validator != null) { validator.endElement(uri, localName, qName); } } catch (final Throwable t) { unexpectedError("endElement()", t); } finally { super.endElement(uri, localName, qName); } } /** * Sends characters to the current validator handler. * * @param ch * Characters. * @param start * Start of char buffer. * @param length * Number of chars to send. * @throws SAXException * Not thrown. * * @see stroom.pipeline.server.filter.AbstractXMLFilter#characters(char[], * int, int) */ @Override public void characters(final char[] ch, final int start, final int length) throws SAXException { try { colNo += length; if (validator != null) { validator.characters(ch, start, length); } } catch (final Throwable t) { unexpectedError("characters()", t); } finally { super.characters(ch, start, length); } } private void storeSchemaLocations(final String uri, final Attributes atts) throws SAXException { if (schemaValidation && schemaLocations == null && xmlSchemaCache != null) { schemaLocations = new TreeMap<>(); String schemaLocation = atts.getValue(XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI, SCHEMA_LOCATION); if (schemaLocation != null) { schemaLocation = MULTI_SPACE_PATTERN.matcher(schemaLocation).replaceAll(SPACE); schemaLocation = schemaLocation.trim(); final String[] locations = schemaLocation.split(SPACE); for (int i = 0; i < locations.length; i += 2) { if (i + 1 < locations.length) { final String namespace = locations[i]; final String schema = locations[i + 1]; schemaLocations.put(namespace, schema); } } } // Make sure all of the schema locations are valid. if (validateSchemaLocations(uri, schemaLocations, schemaConstraint)) { // Locations are valid so get a validator. validator = getValidator(); } } } private boolean validateSchemaLocations(final String rootURI, final Map<String, String> schemaLocations, final FindXMLSchemaCriteria schemaConstraint) throws SAXException { final SchemaSet schemaSet = xmlSchemaCache.getSchemaSet(schemaConstraint); final String validLocations = schemaSet.getLocations(); // Check that the root has a URI. if (rootURI == null || rootURI.trim().length() == 0) { noNamespace(validLocations, schemaConstraint); return false; } // Make sure we have some schema locations. if (schemaLocations == null || schemaLocations.size() == 0) { noSchemaLocations(validLocations, schemaConstraint); return false; } // Check that the root URI has a valid schema location. final String rootLocation = schemaLocations.get(rootURI); if (rootLocation == null) { noSchemaLocation(rootURI, validLocations, schemaConstraint); return false; } // Make sure the namespace or location for the root schema can be found // within the set of valid schemas. final XMLSchema bestMatch = schemaSet.getBestMatch(rootLocation, rootURI); if (bestMatch == null) { invalidSchemaLocation(rootLocation, validLocations, schemaConstraint); return false; } // Finally check that all of the schema locations are valid. final SchemaSet allSchemas = xmlSchemaCache.getAllSchemas(); for (final Entry<String, String> entry : schemaLocations.entrySet()) { final String namespaceURI = entry.getKey(); final String systemId = entry.getValue(); final XMLSchema res = allSchemas.getBestMatch(systemId, namespaceURI); if (res == null) { invalidSchemaLocation(systemId, allSchemas.getLocations(), null); return false; } else if (res.isDeprecated()) { deprecatedSchema(systemId); } } return true; } private void noNamespace(final String validLocations, final FindXMLSchemaCriteria schemaConstraint) throws SAXException { if (errorHandler != null) { sb.append("No namespace has been declared for this XML instance."); sb.append("\nYou must declare a namespace, "); sb.append("e.g. xmlns=\"event-logging:3\"."); printSchemaLocations(sb, validLocations, schemaConstraint); fatalError(sb); } } private void noSchemaLocations(final String validLocations, final FindXMLSchemaCriteria schemaConstraint) throws SAXException { if (errorHandler != null) { sb.append("No schema locations specified."); printSchemaLocations(sb, validLocations, schemaConstraint); fatalError(sb); } } private void noSchemaLocation(final String uri, final String validLocations, final FindXMLSchemaCriteria schemaConstraint) throws SAXException { sb.append("No schema location specified for: "); sb.append(uri); printSchemaLocations(sb, validLocations, schemaConstraint); fatalError(sb); } private void invalidSchemaLocation(final String systemId, final String validLocations, final FindXMLSchemaCriteria schemaConstraint) throws SAXException { sb.append("Invalid schema location: "); sb.append(systemId); printSchemaLocations(sb, validLocations, schemaConstraint); fatalError(sb); } private void printSchemaLocations(final CharBuffer sb, final String validLocations, final FindXMLSchemaCriteria schemaConstraint) { final StringBuilder where = new StringBuilder(); if (schemaConstraint != null) { if (StringUtils.isNotBlank(schemaConstraint.getSchemaGroup())) { if (where.length() == 0) { where.append(" where "); } else { where.append(" and "); } where.append("schema group='"); where.append(schemaConstraint.getSchemaGroup()); where.append("'"); } if (StringUtils.isNotBlank(schemaConstraint.getNamespaceURI())) { if (where.length() == 0) { where.append(" where "); } else { where.append(" and "); } where.append("namespace URI='"); where.append(schemaConstraint.getNamespaceURI()); where.append("'"); } if (StringUtils.isNotBlank(schemaConstraint.getSystemId())) { if (where.length() == 0) { where.append(" where "); } else { where.append(" and "); } where.append("system id='"); where.append(schemaConstraint.getSystemId()); where.append("'"); } } sb.append("\nYou must use one of the following schema locations"); sb.append(where.toString()); sb.append(":\n"); sb.append(validLocations); } private void fatalError(final CharBuffer sb) throws SAXException { final String message = sb.toString(); sb.clear(); final SAXParseException exception = new SAXParseException(message, locator); errorHandler.fatalError(exception); // If we aren't stepping then throw an exception to terminate early. if (!pipelineContext.isStepping()) { throw new LoggedException(message, exception); } } private void deprecatedSchema(final String uri) throws SAXException { sb.append("Schema is deprecated: "); sb.append(uri); errorHandler.warning(new SAXParseException(sb.toString(), locator)); sb.clear(); } private ValidatorHandler getValidator() throws SAXException { ValidatorHandler validatorHandler = null; if (schemaValidation && schemaPool != null) { // Put back the old schema if we need to. returnCurrentSchema(); sb.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); sb.append("<xsd:schema xmlns:xsd=\""); sb.append(schemaLanguage); sb.append("\""); final CharBuffer namespaces = new CharBuffer(100); final CharBuffer locations = new CharBuffer(100); for (final String prefix : prefixes.keySet()) { final String namespace = prefixes.get(prefix); final String location = schemaLocations.get(namespace); if (location != null) { namespaces.append(" xmlns"); if (prefix.length() > 0) { namespaces.append(":"); namespaces.append(prefix); } namespaces.append("=\""); namespaces.append(namespace); namespaces.append("\""); locations.append("<xsd:import namespace=\""); locations.append(namespace); locations.append("\" schemaLocation=\""); locations.append(location); locations.append("\"/>\n"); } } sb.append(namespaces.toString()); sb.append(">\n"); sb.append(locations.toString()); sb.append("</xsd:schema>"); final String data = sb.toString(); sb.clear(); // Get another schema. final SchemaKey schemaKey = new SchemaKey(schemaLanguage, data); poolItem = schemaPool.borrowObject(schemaKey, true); final StoredSchema storedSchema = poolItem.getValue(); // Replay errors generated when creating schema. try { storedSchema.getErrorReceiver().replay(errorReceiverProxy); } catch (final Exception e) { errorHandler.fatalError(new SAXParseException(e.getMessage(), null)); } // Create a validator handler. validatorHandler = storedSchema.getSchema().newValidatorHandler(); validatorHandler.setDocumentLocator(locator); validatorHandler.setErrorHandler(errorHandler); validatorHandler.startDocument(); for (final String prefix : prefixes.keySet()) { validatorHandler.startPrefixMapping(prefix, prefixes.get(prefix)); } } return validatorHandler; } private void returnCurrentSchema() { if (poolItem != null) { schemaPool.returnObject(poolItem, true); poolItem = null; } } /** * This method gets the calculated column number by assuming that the output * is pretty printed.. * * @return The assumed pretty printed column number. * * @see org.xml.sax.Locator#getColumnNumber() */ @Override public int getColumnNumber() { return colNo; } /** * This method gets the calculated line number by assuming that the output * is pretty printed. * * @return The assumed pretty printed line number. * * @see org.xml.sax.Locator#getLineNumber() */ @Override public int getLineNumber() { return lineNo; } /** * Not implemented. * * @return null. * * @see org.xml.sax.Locator#getPublicId() */ @Override public String getPublicId() { return null; } /** * Not implemented. * * @return null. * * @see org.xml.sax.Locator#getSystemId() */ @Override public String getSystemId() { return null; } private void unexpectedError(final String method, final Throwable t) { String message = ""; if (t != null) { if (t.getMessage() != null) { message = t.getMessage(); } if (message == null || message.trim().length() == 0) { message = t.getClass().getName(); } } errorReceiverProxy.log(Severity.ERROR, null, getElementId(), "Unexpected error thrown by schema validator in method " + method + ": " + message, t); } public void setSchemaLanguage(final String schemaLanguage) { this.schemaLanguage = schemaLanguage; } public void setErrorHandler(final ErrorHandler errorHandler) { this.errorHandler = errorHandler; } public void setUseOriginalLocator(final boolean useOriginalLocator) { this.useOriginalLocator = useOriginalLocator; } public void setSchemaValidation(final boolean schemaValidation) { this.schemaValidation = schemaValidation; } public void setSchemaConstraint(final FindXMLSchemaCriteria schemaConstraint) { this.schemaConstraint = schemaConstraint; } }