Java tutorial
package org.apache.commons.digester3; /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import org.xml.sax.Attributes; /** * <p> * Extension of {@link RulesBase} for complex schema. * </p> * <p> * This is an extension of the basic pattern matching scheme intended to improve support for mapping complex xml-schema. * It is intended to be a minimal extension of the standard rules big enough to support complex schema but without the * full generality offered by more exotic matching pattern rules. * </p> * <h4>When should you use this rather than the original?</h4> * <p> * This pattern-matching engine is complex and slower than the basic default RulesBase class, but offers more * functionality: * <ul> * <li>Universal patterns allow patterns to be specified which will match regardless of whether there are * "better matching" patterns available.</li> * <li>Parent-match patterns (eg "a/b/?") allow matching for all direct children of a specified element.</li> * <li>Ancestor-match patterns (eg "a/b/*") allow matching all elements nested within a specified element to any nesting * depth.</li> * <li>Completely-wild patterns ("*" or "!*") allow matching all elements.</li> * </ul> * </p> * <h4>Universal Match Patterns</h4> * <p> * The default RulesBase pattern-matching engine always attempts to find the "best matching pattern", and will ignore * rules associated with other patterns that match but are not "as good". As an example, if the pattern "a/b/c" is * associated with rules 1 and 2, and "*/c" is associated with rules 3 and 4 then element "a/b/c" will cause only * rules 1 and 2 to execute. Rules 3 and 4 do have matching patterns, but because the patterns are shorter and include * wildcard characters they are regarded as being "not as good" as a direct match. In general, exact patterns are better * than wildcard patterns, and among multiple patterns with wildcards, the longest is preferred. See the RulesBase class * for more information. * </p> * <p> * This feature of preferring "better" patterns can be a powerful tool. However it also means that patterns can interact * in unexpected ways. * </p> * <p> * When using the ExtendedBaseRules, any pattern prefixed with '!' bypasses the "best match" feature. Even if there is * an exact match or a longer wildcard match, patterns prefixed by '!' will still be tested to see if they match, and if * so their associated Rule objects will be included in the set of rules to be executed in the normal manner. * </p> * <ul> * <li>Pattern <code>"!*/a/b"</code> matches whenever an 'b' element is inside an 'a'.</li> * <li>Pattern <code>"!a/b/?"</code> matches any child of a parent matching <code>"a/b"</code> (see * "Parent Match Patterns").</li> * <li>Pattern <code>"!*/a/b/?"</code> matches any child of a parent matching <code>"!*/a/b"</code> (see * "Parent Match Patterns").</li> * <li>Pattern <code>"!a/b/*"</code> matches any element whose path starts with "a" then "b" (see * "Ancestor Match Patterns").</li> * <li>Pattern <code>"!*/a/b/*"</code> matches any elements whose path contains 'a/b' (see * "Ancestor Match Patterns").</li> * </ul> * <h4>Parent Match Patterns</h4> * <p> * These will match direct child elements of a particular parent element. * <ul> * <li> * <code>"a/b/c/?"</code> matches any child whose parent matches <code>"a/b/c"</code>. Exact parent rules take * precedence over Ancestor Match patterns.</li> * <li> * <code>"*/a/b/c/?"</code> matches any child whose parent matches <code>"*/a/b/c"</code>. The longest * matching still applies to parent matches but the length excludes the '?', which effectively means that standard * wildcard matches with the same level of depth are chosen in preference.</li> * </ul> * </p> * <h4>Ancestor Match Patterns</h4> * <p> * These will match elements whose parentage includes a particular sequence of elements. * <ul> * <li> * <code>"a/b/*"</code> matches any element whose path starts with 'a' then 'b'. Exact parent and parent match rules * take precedence. The longest ancestor match will take precedence.</li> * <li> * <code>"*/a/b/*"</code> matches any elements whose path contains an element 'a' followed by an element 'b'. * The longest matching still applies but the length excludes the '*' at the end.</li> * </ul> * </p> * <h4>Completely Wild Patterns</h4> * <p> * Pattern <code>"*"</code> matches every pattern that isn't matched by any other basic rule. * </p> * <p> * Pattern <code>"!*"</code> matches every pattern. * </p> * <h4>Using The Extended Rules</h4> * <p> * By default, a Digester instance uses a {@link RulesBase} instance as its pattern matching engine. To use an * ExtendedBaseRules instance, call the Digester.setRules method before adding any Rule objects to the digester * instance: * * <pre> * Digester digester = new Digester(); * digester.setRules( new ExtendedBaseRules() ); * </pre> * * </p> * <p> * The most important thing to remember when using the extended rules is that universal and non-universal patterns are * completely independent. Universal patterns are never affected by the addition of new patterns or the removal of * existing ones. Non-universal patterns are never affected by the addition of new <em>universal</em> patterns or the * removal of existing <em>universal</em> patterns. As in the basic matching rules, non-universal (basic) patterns * <strong>can</strong> be affected by the addition of new <em>non-universal</em> patterns or the removal of existing * <em>non-universal</em> patterns, because only rules associated with the "best matching" pattern for each xml element * are executed. * <p> * This means that you can use universal patterns to build up the simple parts of your structure - for example defining * universal creation and property setting rules. More sophisticated and complex mapping will require non-universal * patterns and this might mean that some of the universal rules will need to be replaced by a series of special cases * using non-universal rules. But by using universal rules as your backbone, these additions should not break your * existing rules. * </p> */ public class ExtendedBaseRules extends RulesBase { // ----------------------------------------------------- Instance Variables /** * Counts the entry number for the rules. */ private int counter = 0; /** * The decision algorithm used (unfortunately) doesn't preserve the entry order. This map is used by a comparator * which orders the list of matches before it's returned. This map stores the entry number keyed by the rule. */ private Map<Rule, Integer> order = new HashMap<Rule, Integer>(); // --------------------------------------------------------- Public Methods /** * {@inheritDoc} */ @Override protected void registerRule(String pattern, Rule rule) { super.registerRule(pattern, rule); counter++; order.put(rule, counter); } /** * {@inheritDoc} */ @Override public List<Rule> match(String namespaceURI, String pattern, String name, Attributes attributes) { // calculate the pattern of the parent // (if the element has one) String parentPattern = ""; int lastIndex = pattern.lastIndexOf('/'); boolean hasParent = true; if (lastIndex == -1) { // element has no parent hasParent = false; } else { // calculate the pattern of the parent parentPattern = pattern.substring(0, lastIndex); } // we keep the list of universal matches separate List<Rule> universalList = new ArrayList<Rule>(counter); // Universal wildcards ('*') in the middle of the pattern-string List<Rule> recList = null; // temporary parentPattern // we don't want to change anything.... String tempParentPattern = parentPattern; int parentLastIndex = tempParentPattern.lastIndexOf('/'); // look for pattern. Here, we search the whole // parent. Not ideal, but does the thing.... while (parentLastIndex > -1 && recList == null) { recList = this.cache.get(tempParentPattern + "/*/" + pattern.substring(lastIndex + 1)); if (recList != null) { // when /*/-pattern-string is found, add method // list to universalList. // Digester will do the rest universalList.addAll(recList); } else { // if not, shorten tempParent to move /*/ one position // to the left. // as last part of patttern is always added // we make sure pattern is allowed anywhere. tempParentPattern = parentPattern.substring(0, parentLastIndex); } parentLastIndex = tempParentPattern.lastIndexOf('/'); } // Universal all wildards ('!*') // These are always matched so always add them List<Rule> tempList = this.cache.get("!*"); if (tempList != null) { universalList.addAll(tempList); } // Universal exact parent match // need to get this now since only wildcards are considered later tempList = this.cache.get("!" + parentPattern + "/?"); if (tempList != null) { universalList.addAll(tempList); } // base behaviour means that if we certain matches, we don't continue // but we just have a single combined loop and so we have to set // a variable boolean ignoreBasicMatches = false; // see if we have an exact basic pattern match List<Rule> rulesList = this.cache.get(pattern); if (rulesList != null) { // we have a match! // so ignore all basic matches from now on ignoreBasicMatches = true; } else { // see if we have an exact child match if (hasParent) { // matching children takes preference rulesList = this.cache.get(parentPattern + "/?"); if (rulesList != null) { // we have a match! // so ignore all basic matches from now on ignoreBasicMatches = true; } else { // we don't have a match yet - so try exact ancester // rulesList = findExactAncesterMatch(pattern); if (rulesList != null) { // we have a match! // so ignore all basic matches from now on ignoreBasicMatches = true; } } } } // OK - we're ready for the big loop! // Unlike the basic rules case, // we have to go through for all those universal rules in all cases. // Find the longest key, ie more discriminant int longKeyLength = 0; for (String key : this.cache.keySet()) { // find out if it's a univeral pattern // set a flag boolean isUniversal = key.startsWith("!"); if (isUniversal) { // and find the underlying key key = key.substring(1, key.length()); } // don't need to check exact matches boolean wildcardMatchStart = key.startsWith("*/"); boolean wildcardMatchEnd = key.endsWith("/*"); if (wildcardMatchStart || (isUniversal && wildcardMatchEnd)) { boolean parentMatched = false; boolean basicMatched = false; boolean ancesterMatched = false; boolean parentMatchEnd = key.endsWith("/?"); if (parentMatchEnd) { // try for a parent match parentMatched = parentMatch(key, parentPattern); } else if (wildcardMatchEnd) { // check for ancester match if (wildcardMatchStart) { String patternBody = key.substring(2, key.length() - 2); if (pattern.endsWith(patternBody)) { ancesterMatched = true; } else { ancesterMatched = (pattern.indexOf(patternBody + "/") > -1); } } else { String bodyPattern = key.substring(0, key.length() - 2); if (pattern.startsWith(bodyPattern)) { if (pattern.length() == bodyPattern.length()) { // exact match ancesterMatched = true; } else { ancesterMatched = (pattern.charAt(bodyPattern.length()) == '/'); } } else { ancesterMatched = false; } } } else { // try for a base match basicMatched = basicMatch(key, pattern); } if (parentMatched || basicMatched || ancesterMatched) { if (isUniversal) { // universal rules go straight in // (no longest matching rule) tempList = this.cache.get("!" + key); if (tempList != null) { universalList.addAll(tempList); } } else { if (!ignoreBasicMatches) { // ensure that all parent matches are SHORTER // than rules with same level of matching. // // the calculations below don't work for universal // matching, but we don't care because in that case // this if-stmt is not entered. int keyLength = key.length(); if (wildcardMatchStart) { --keyLength; } if (wildcardMatchEnd) { --keyLength; } else if (parentMatchEnd) { --keyLength; } if (keyLength > longKeyLength) { rulesList = this.cache.get(key); longKeyLength = keyLength; } } } } } } // '*' works in practice as a default matching // (this is because anything is a deeper match!) if (rulesList == null) { rulesList = this.cache.get("*"); } // if we've matched a basic pattern, then add to the universal list if (rulesList != null) { universalList.addAll(rulesList); } // don't filter if namespace is null if (namespaceURI != null) { // remove invalid namespaces Iterator<Rule> it = universalList.iterator(); while (it.hasNext()) { Rule rule = it.next(); String nsUri = rule.getNamespaceURI(); if (nsUri != null && !nsUri.equals(namespaceURI)) { it.remove(); } } } // need to make sure that the collection is sort in the order // of addition. We use a custom comparator for this Collections.sort(universalList, new Comparator<Rule>() { public int compare(Rule r1, Rule r2) { // Get the entry order from the map Integer i1 = order.get(r1); Integer i2 = order.get(r2); // and use that to perform the comparison if (i1 == null) { if (i2 == null) { return 0; } return -1; } else if (i2 == null) { return 1; } return (i1.intValue() - i2.intValue()); } }); return universalList; } /** * Checks the input parentPattern contains the input key at the end. * * @param key The key to be found * @param parentPattern The pattern where looking for the key * @return true, if {@code key} is found inside {@code parentPattern}, false otherwise */ private boolean parentMatch(String key, String parentPattern) { return parentPattern.endsWith(key.substring(1, key.length() - 2)); } /** * Standard match. Matches the end of the pattern to the key. * * @param key The key to be found * @param pattern The pattern where looking for the key * @return true, if {@code key} is found inside {@code pattern}, false otherwise */ private boolean basicMatch(String key, String pattern) { return (pattern.equals(key.substring(2)) || pattern.endsWith(key.substring(1))); } /** * Finds an exact ancester match for given pattern * * @param parentPattern The input pattern * @return A list of {@code Rule} related to the input pattern */ private List<Rule> findExactAncesterMatch(String parentPattern) { List<Rule> matchingRules = null; int lastIndex = parentPattern.length(); while (lastIndex-- > 0) { lastIndex = parentPattern.lastIndexOf('/', lastIndex); if (lastIndex > 0) { matchingRules = this.cache.get(parentPattern.substring(0, lastIndex) + "/*"); if (matchingRules != null) { return matchingRules; } } } return null; } }