Java tutorial
// Copyright 2004 The Apache Software Foundation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package org.apache.tapestry.util.xml; import java.io.IOException; import java.io.InputStream; import java.net.URL; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.tapestry.ApplicationRuntimeException; import org.apache.tapestry.ILocation; import org.apache.tapestry.IResourceLocation; import org.apache.tapestry.Location; import org.apache.tapestry.Tapestry; import org.apache.tapestry.util.RegexpMatcher; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.Locator; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import org.xml.sax.helpers.DefaultHandler; /** * A simplified version of {@link org.apache.commons.digester.Digester}. * This version is without as many bells and whistles but has some key features needed when parsing * a document (rather than a configuration file): * <br> * <ul> * <li>Notifications for each bit of text</ul> * <li>Tracking of exact location within the document.</li> * </ul> * * <p> * Like Digester, there's an object stack and a rule stack. The rules are much * simpler (more coding), in that there's a one-to-one relationship between * an element and a rule. * * <p> * Based on SAX2. * * @author Howard Lewis Ship * @version $Id: RuleDirectedParser.java,v 1.9 2004/02/19 17:37:50 hlship Exp $ * @since 3.0 */ public class RuleDirectedParser extends DefaultHandler { private static final Log LOG = LogFactory.getLog(RuleDirectedParser.class); private IResourceLocation _documentLocation; private List _ruleStack = new ArrayList(); private List _objectStack = new ArrayList(); private Object _documentObject; private Locator _locator; private int _line = -1; private int _column = -1; private ILocation _location; private static SAXParserFactory _parserFactory; private SAXParser _parser; private RegexpMatcher _matcher; private String _uri; private String _localName; private String _qName; /** * Map of {@link IRule} keyed on the local name * of the element. */ private Map _ruleMap = new HashMap(); /** * Used to accumlate content provided by * {@link org.xml.sax.ContentHandler#characters(char[], int, int)}. */ private StringBuffer _contentBuffer = new StringBuffer(); /** * Map of paths to external entities (such as the DTD) keyed on public id. */ private Map _entities = new HashMap(); public Object parse(IResourceLocation documentLocation) { if (LOG.isDebugEnabled()) LOG.debug("Parsing: " + documentLocation); try { _documentLocation = documentLocation; URL url = documentLocation.getResourceURL(); if (url == null) throw new DocumentParseException( Tapestry.format("RuleDrivenParser.resource-missing", documentLocation), documentLocation, null, null); return parse(url); } finally { _documentLocation = null; _ruleStack.clear(); _objectStack.clear(); _documentObject = null; _uri = null; _localName = null; _qName = null; _line = -1; _column = -1; _location = null; _locator = null; _contentBuffer.setLength(0); } } protected Object parse(URL url) { if (_parser == null) _parser = constructParser(); InputStream stream = null; try { stream = url.openStream(); } catch (IOException ex) { throw new DocumentParseException(Tapestry.format("RuleDrivenParser.unable-to-open-resource", url), _documentLocation, null, ex); } InputSource source = new InputSource(stream); try { _parser.parse(source, this); stream.close(); } catch (Exception ex) { throw new DocumentParseException(Tapestry.format("RuleDrivenParser.parse-error", url, ex.getMessage()), _documentLocation, getLocation(), ex); } if (LOG.isDebugEnabled()) LOG.debug("Document parsed as: " + _documentObject); return _documentObject; } /** * Returns an {@link ILocation} representing the * current position within the document (depending * on the parser, this may be accurate to * column number level). */ public ILocation getLocation() { if (_locator == null) return null; int line = _locator.getLineNumber(); int column = _locator.getColumnNumber(); if (_line != line || _column != column) { _location = null; _line = line; _column = column; } if (_location == null) _location = new Location(_documentLocation, _line, _column); return _location; } /** * Pushes an object onto the object stack. The first object * pushed is the "document object", the root object returned * by the parse. */ public void push(Object object) { if (_documentObject == null) _documentObject = object; push(_objectStack, object, "object stack"); } /** * Returns the top object on the object stack. */ public Object peek() { return peek(_objectStack, 0); } /** * Returns an object within the object stack, at depth. * Depth 0 is the top object, depth 1 is the next-to-top object, * etc. */ public Object peek(int depth) { return peek(_objectStack, depth); } /** * Removes and returns the top object on the object stack. */ public Object pop() { return pop(_objectStack, "object stack"); } private Object pop(List list, String name) { Object result = list.remove(list.size() - 1); if (LOG.isDebugEnabled()) LOG.debug("Popped " + result + " off " + name + " (at " + getLocation() + ")"); return result; } private Object peek(List list, int depth) { return list.get(list.size() - 1 - depth); } private void push(List list, Object object, String name) { if (LOG.isDebugEnabled()) LOG.debug("Pushing " + object + " onto " + name + " (at " + getLocation() + ")"); list.add(object); } /** * Pushes a new rule onto the rule stack. */ protected void pushRule(IRule rule) { push(_ruleStack, rule, "rule stack"); } /** * Returns the top rule on the stack. */ protected IRule peekRule() { return (IRule) peek(_ruleStack, 0); } protected IRule popRule() { return (IRule) pop(_ruleStack, "rule stack"); } public void addRule(String localElementName, IRule rule) { _ruleMap.put(localElementName, rule); } /** * Registers * a public id and corresponding input source. Generally, the source * is a wrapper around an input stream to a package resource. * * @param publicId the public identifier to be registerred, generally * the publicId of a DTD related to the document being parsed * @param entityPath the resource path of the entity, typically a DTD * file. Relative files names are expected to be stored in the same package * as the class file, otherwise a leading slash is an absolute pathname * within the classpath. * **/ public void registerEntity(String publicId, String entityPath) { if (LOG.isDebugEnabled()) LOG.debug("Registering " + publicId + " as " + entityPath); if (_entities == null) _entities = new HashMap(); _entities.put(publicId, entityPath); } protected IRule selectRule(String localName, Attributes attributes) { IRule rule = (IRule) _ruleMap.get(localName); if (rule == null) throw new DocumentParseException(Tapestry.format("RuleDrivenParser.no-rule-for-element", localName), _documentLocation, getLocation(), null); return rule; } /** * Uses the {@link Locator} to track the position * in the document as a {@link ILocation}. This is invoked * once (before the initial element is parsed) and * the Locator is retained and queried as to * the current file location. * * @see #getLocation() */ public void setDocumentLocator(Locator locator) { _locator = locator; } /** * Accumulates the content in a buffer; the concatinated content * is provided to the top rule just before any start or end tag. */ public void characters(char[] ch, int start, int length) throws SAXException { _contentBuffer.append(ch, start, length); } /** * Pops the top rule off the stack and * invokes {@link IRule#endElement(RuleDirectedParser)}. */ public void endElement(String uri, String localName, String qName) throws SAXException { fireContentRule(); _uri = uri; _localName = localName; _qName = qName; popRule().endElement(this); } /** * Ignorable content is ignored. */ public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { } /** * Invokes {@link #selectRule(String, Attributes)} to choose a new rule, * which is pushed onto the rule stack, then invokes * {@link IRule#startElement(RuleDirectedParser, Attributes)}. */ public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { fireContentRule(); _uri = uri; _localName = localName; _qName = qName; String name = extractName(uri, localName, qName); IRule newRule = selectRule(name, attributes); pushRule(newRule); newRule.startElement(this, attributes); } private String extractName(String uri, String localName, String qName) { return Tapestry.isBlank(localName) ? qName : localName; } /** * Uses {@link javax.xml.parsers.SAXParserFactory} to create a instance * of a validation SAX2 parser. */ protected synchronized SAXParser constructParser() { if (_parserFactory == null) { _parserFactory = SAXParserFactory.newInstance(); configureParserFactory(_parserFactory); } try { return _parserFactory.newSAXParser(); } catch (SAXException ex) { throw new ApplicationRuntimeException(ex); } catch (ParserConfigurationException ex) { throw new ApplicationRuntimeException(ex); } } /** * Configures a {@link SAXParserFactory} before * {@link SAXParserFactory#newSAXParser()} is invoked. * The default implementation sets validating to true * and namespaceAware to false, */ protected void configureParserFactory(SAXParserFactory factory) { factory.setValidating(true); factory.setNamespaceAware(false); } /** * Throws the exception. */ public void error(SAXParseException ex) throws SAXException { fatalError(ex); } /** * Throws the exception. */ public void fatalError(SAXParseException ex) throws SAXException { // Sometimes, a bad parse "corrupts" a parser so that it doesn't // work properly for future parses (of valid documents), // so discard it here. _parser = null; throw ex; } /** * Throws the exception. */ public void warning(SAXParseException ex) throws SAXException { fatalError(ex); } public InputSource resolveEntity(String publicId, String systemId) throws SAXException { String entityPath = null; if (LOG.isDebugEnabled()) LOG.debug("Attempting to resolve entity; publicId = " + publicId + " systemId = " + systemId); if (_entities != null) entityPath = (String) _entities.get(publicId); if (entityPath == null) { if (LOG.isDebugEnabled()) LOG.debug("Entity not found, using " + systemId); return null; } InputStream stream = getClass().getResourceAsStream(entityPath); InputSource result = new InputSource(stream); if (result != null && LOG.isDebugEnabled()) LOG.debug("Resolved " + publicId + " as " + result + " (for " + entityPath + ")"); return result; } /** * Validates that the input value matches against the specified * Perl5 pattern. If valid, the method simply returns. * If not a match, then an error message is generated (using the * errorKey and the input value) and a * {@link InvalidStringException} is thrown. * **/ public void validate(String value, String pattern, String errorKey) throws DocumentParseException { if (_matcher == null) _matcher = new RegexpMatcher(); if (_matcher.matches(pattern, value)) return; throw new InvalidStringException(Tapestry.format(errorKey, value), value, getLocation()); } public IResourceLocation getDocumentLocation() { return _documentLocation; } /** * Returns the localName for the current element. * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes) */ public String getLocalName() { return _localName; } /** * Returns the qualified name for the current element. * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes) */ public String getQName() { return _qName; } /** * Returns the URI for the current element. * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes) */ public String getUri() { return _uri; } private void fireContentRule() { String content = _contentBuffer.toString(); _contentBuffer.setLength(0); if (!_ruleStack.isEmpty()) peekRule().content(this, content); } }