Java tutorial
/* * Copyright 2011-2015 UnboundID Corp. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (GPLv2 only) * or the terms of the GNU Lesser General Public License (LGPLv2.1 only) * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see <http://www.gnu.org/licenses>. */ package com.unboundid.scim.sdk; import org.json.JSONObject; import java.text.MessageFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Stack; /** * A parser for SCIM filter expressions. */ public class FilterParser { /** * The filter to be parsed. */ private final String filterString; /** * The default schema that should be assumed when parsing attributes with * no schema explicitly defined in the URN. */ private final String defaultSchema; /** * The position one higher than the last character. */ private int endPos; /** * The current character position. */ private int currentPos; /** * The position marking the first character of the previous word or value. */ private int markPos; /** * Base class for expression stack nodes. The expression stack is needed to * employ the shunting-yard algorithm to parse the filter expression. */ class Node { private final int pos; /** * Create a new node. * * @param pos The position of the node in the filter string. */ public Node(final int pos) { this.pos = pos; } /** * Retrieve the position of the node in the filter string. * @return The position of the node in the filter string. */ public int getPos() { return pos; } } /** * A node representing a filter component. */ class FilterNode extends Node { private final SCIMFilter filterComponent; /** * Create a new filter component node. * * @param filterComponent The filter component. * @param pos The position of the node in the filter string. */ public FilterNode(final SCIMFilter filterComponent, final int pos) { super(pos); this.filterComponent = filterComponent; } /** * Retrieve the filter component. * * @return The filter component. */ public SCIMFilter getFilterComponent() { return filterComponent; } @Override public String toString() { return "FilterNode{" + "filterComponent=" + filterComponent + "} " + super.toString(); } } /** * A node representing a logical operator. */ class OperatorNode extends Node { private final SCIMFilterType filterType; /** * Create a new logical operator node. * * @param filterType The type of operator, either SCIMFilterType.AND or * SCIMFilterType.OR. * @param pos The position of the node in the filter string. */ public OperatorNode(final SCIMFilterType filterType, final int pos) { super(pos); this.filterType = filterType; } /** * Retrieve the type of operator. * * @return The type of operator, either SCIMFilterType.AND or * SCIMFilterType.OR. */ public SCIMFilterType getFilterType() { return filterType; } /** * Retrieve the precedence of the operator. * * @return The precedence of the operator. */ public int getPrecedence() { switch (filterType) { case AND: return 2; case OR: default: return 1; } } @Override public String toString() { return "OperatorNode{" + "filterType=" + filterType + "} " + super.toString(); } } /** * A node representing an opening parenthesis. */ class LeftParenthesisNode extends Node { /** * Create a new opening parenthesis node. * * @param pos The position of the parenthesis in the filter string. */ public LeftParenthesisNode(final int pos) { super(pos); } } /** * Create a new instance of a filter parser. * * @param filterString The filter to be parsed. * @param defaultSchema The default schema that should be assumed when parsing * attributes without the schema explicitly defined in * the URN. */ public FilterParser(final String filterString, final String defaultSchema) { this.filterString = filterString; this.endPos = filterString.length(); this.currentPos = 0; this.markPos = 0; this.defaultSchema = defaultSchema; } /** * Parse the filter provided in the constructor. * * @return A parsed SCIM filter. * * @throws SCIMException If the filter string could not be parsed. */ public SCIMFilter parse() throws SCIMException { try { return readFilter(); } catch (Exception e) { Debug.debugException(e); throw SCIMException.createException(400, MessageFormat.format("Invalid filter ''{0}'': {1}", filterString, e.getMessage())); } } /** * Read a filter component at the current position. A filter component is * <pre> * attribute attribute-operator [value] * </pre> * Most attribute operators require a value but 'pr' (presence) requires * no value. * * @return The parsed filter component. */ private SCIMFilter readFilterComponent() { String word = readWord(); if (word == null) { final String msg = String.format("End of input at position %d but expected a filter expression", markPos); throw new IllegalArgumentException(msg); } final AttributePath filterAttribute; try { filterAttribute = AttributePath.parse(word, defaultSchema); } catch (final Exception e) { Debug.debugException(e); final String msg = String.format("Expected an attribute reference at position %d: %s", markPos, e.getMessage()); throw new IllegalArgumentException(msg); } final String operator = readWord(); if (operator == null) { final String msg = String.format("End of input at position %d but expected an attribute operator", markPos); throw new IllegalArgumentException(msg); } final SCIMFilterType filterType; if (operator.equalsIgnoreCase("eq")) { filterType = SCIMFilterType.EQUALITY; } else if (operator.equalsIgnoreCase("co")) { filterType = SCIMFilterType.CONTAINS; } else if (operator.equalsIgnoreCase("sw")) { filterType = SCIMFilterType.STARTS_WITH; } else if (operator.equalsIgnoreCase("pr")) { filterType = SCIMFilterType.PRESENCE; } else if (operator.equalsIgnoreCase("gt")) { filterType = SCIMFilterType.GREATER_THAN; } else if (operator.equalsIgnoreCase("ge")) { filterType = SCIMFilterType.GREATER_OR_EQUAL; } else if (operator.equalsIgnoreCase("lt")) { filterType = SCIMFilterType.LESS_THAN; } else if (operator.equalsIgnoreCase("le")) { filterType = SCIMFilterType.LESS_OR_EQUAL; } else { final String msg = String.format( "Unrecognized attribute operator '%s' at position %d. " + "Expected: eq,co,sw,pr,gt,ge,lt,le", operator, markPos); throw new IllegalArgumentException(msg); } final String filterValueString; if (!filterType.equals(SCIMFilterType.PRESENCE)) { filterValueString = readValue(); if (filterValueString == null) { final String msg = String.format( "End of input at position %d while expecting a value for " + "operator %s", markPos, operator); throw new IllegalArgumentException(msg); } } else { filterValueString = null; } return new SCIMFilter(filterType, filterAttribute, filterValueString, (filterValueString != null), null); } /** * Read a filter expression. * * @return The SCIM filter. */ private SCIMFilter readFilter() { final Stack<Node> expressionStack = new Stack<Node>(); // Employ the shunting-yard algorithm to parse into reverse polish notation, // where the operands are filter components and the operators are the // logical AND and OR operators. This algorithm ensures that operator // precedence and parentheses are respected. final List<Node> reversePolish = new ArrayList<Node>(); for (String word = readWord(); word != null; word = readWord()) { if (word.equalsIgnoreCase("and") || word.equalsIgnoreCase("or")) { final OperatorNode currentOperator; if (word.equalsIgnoreCase("and")) { currentOperator = new OperatorNode(SCIMFilterType.AND, markPos); } else { currentOperator = new OperatorNode(SCIMFilterType.OR, markPos); } while (!expressionStack.empty() && (expressionStack.peek() instanceof OperatorNode)) { final OperatorNode previousOperator = (OperatorNode) expressionStack.peek(); if (previousOperator.getPrecedence() < currentOperator.getPrecedence()) { break; } reversePolish.add(expressionStack.pop()); } expressionStack.push(currentOperator); } else if (word.equals("(")) { expressionStack.push(new LeftParenthesisNode(markPos)); } else if (word.equals(")")) { while (!expressionStack.empty() && !(expressionStack.peek() instanceof LeftParenthesisNode)) { reversePolish.add(expressionStack.pop()); } if (expressionStack.empty()) { final String msg = String.format( "No opening parenthesis matching closing " + "parenthesis at position %d", markPos); throw new IllegalArgumentException(msg); } expressionStack.pop(); } else { rewind(); final int pos = currentPos; final SCIMFilter filterComponent = readFilterComponent(); reversePolish.add(new FilterNode(filterComponent, pos)); } } while (!expressionStack.empty()) { final Node node = expressionStack.pop(); if (node instanceof LeftParenthesisNode) { final String msg = String.format( "No closing parenthesis matching opening " + "parenthesis at position %d", node.getPos()); throw new IllegalArgumentException(msg); } reversePolish.add(node); } // Evaluate the reverse polish notation to create a single complex filter. final Stack<FilterNode> filterStack = new Stack<FilterNode>(); for (final Node node : reversePolish) { if (node instanceof OperatorNode) { final FilterNode rightOperand = filterStack.pop(); final FilterNode leftOperand = filterStack.pop(); final OperatorNode operatorNode = (OperatorNode) node; if (operatorNode.getFilterType().equals(SCIMFilterType.AND)) { final SCIMFilter filter = SCIMFilter.createAndFilter( Arrays.asList(leftOperand.getFilterComponent(), rightOperand.getFilterComponent())); filterStack.push(new FilterNode(filter, leftOperand.getPos())); } else { final SCIMFilter filter = SCIMFilter.createOrFilter( Arrays.asList(leftOperand.getFilterComponent(), rightOperand.getFilterComponent())); filterStack.push(new FilterNode(filter, leftOperand.getPos())); } } else { filterStack.push((FilterNode) node); } } if (filterStack.size() == 0) { final String msg = String.format("Empty filter expression"); throw new IllegalArgumentException(msg); } else if (filterStack.size() > 1) { final String msg = String.format("Unexpected characters at position %d", expressionStack.get(1).pos); throw new IllegalArgumentException(msg); } return filterStack.get(0).filterComponent; } /** * Read a word at the current position. A word is a consecutive sequence of * characters terminated by whitespace or a parenthesis, or a single opening * or closing parenthesis. Whitespace before and after the word is consumed. * The start of the word is saved in {@code markPos}. * * @return The word at the current position, or {@code null} if the end of * the input has been reached. */ private String readWord() { skipWhitespace(); markPos = currentPos; loop: while (currentPos < endPos) { final char c = filterString.charAt(currentPos); switch (c) { case '(': case ')': if (currentPos == markPos) { currentPos++; } break loop; case ' ': break loop; default: currentPos++; break; } } if (currentPos - markPos == 0) { return null; } final String word = filterString.substring(markPos, currentPos); skipWhitespace(); return word; } /** * Rewind the current position to the start of the previous word or value. */ private void rewind() { currentPos = markPos; } /** * Read a value at the current position. A value can be a number, a datetime * or a boolean value (the words true or false), or a string value in double * quotes, using the same syntax as for JSON values. Whitespace before and * after the value is consumed. The start of the value is saved in * {@code markPos}. * * @return A String representing the value at the current position, or * {@code null} if the end of the input has already been reached. */ public String readValue() { skipWhitespace(); markPos = currentPos; if (currentPos == endPos) { return null; } if (filterString.charAt(currentPos) == '"') { currentPos++; final StringBuilder builder = new StringBuilder(); while (currentPos < endPos) { final char c = filterString.charAt(currentPos); switch (c) { case '\\': currentPos++; if (endOfInput()) { final String msg = String .format("End of input in a string value that began at " + "position %d", markPos); throw new IllegalArgumentException(msg); } final char escapeChar = filterString.charAt(currentPos); currentPos++; switch (escapeChar) { case '"': case '/': case '\'': case '\\': builder.append(escapeChar); break; case 'b': builder.append('\b'); break; case 'f': builder.append('\f'); break; case 'n': builder.append('\n'); break; case 'r': builder.append('\r'); break; case 't': builder.append('\t'); break; case 'u': if (currentPos + 4 > endPos) { final String msg = String.format( "End of input in a string value that began at " + "position %d", markPos); throw new IllegalArgumentException(msg); } final String hexChars = filterString.substring(currentPos, currentPos + 4); builder.append((char) Integer.parseInt(hexChars, 16)); currentPos += 4; break; default: final String msg = String.format( "Unrecognized escape sequence '\\%c' in a string value " + "at position %d", escapeChar, currentPos - 2); throw new IllegalArgumentException(msg); } break; case '"': currentPos++; skipWhitespace(); return builder.toString(); default: builder.append(c); currentPos++; break; } } final String msg = String.format("End of input in a string value that began at " + "position %d", markPos); throw new IllegalArgumentException(msg); } else { loop: while (currentPos < endPos) { final char c = filterString.charAt(currentPos); switch (c) { case ' ': case '(': case ')': break loop; case '+': case '-': case '.': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': // These are all OK. currentPos++; break; case '/': case ':': case ';': case '<': case '=': case '>': case '?': case '@': case '[': case '\\': case ']': case '^': case '_': case '`': // These are not allowed, but they are explicitly called out because // they are included in the range of values between '-' and 'z', and // making sure all possible characters are included can help make // the switch statement more efficient. We'll fall through to the // default clause to reject them. default: final String msg = String.format( "Invalid character '%c' in a number or boolean value at " + "position %d", c, currentPos); throw new IllegalArgumentException(msg); } } final String s = filterString.substring(markPos, currentPos); skipWhitespace(); final Object value = JSONObject.stringToValue(s); if (value.equals(JSONObject.NULL) || value instanceof String) { final String msg = String.format("Invalid filter value beginning at position %d", markPos); throw new IllegalArgumentException(msg); } return s; } } /** * Determine if the end of the input has been reached. * * @return {@code true} if the end of the input has been reached. */ private boolean endOfInput() { return currentPos == endPos; } /** * Skip over any whitespace at the current position. */ private void skipWhitespace() { while (currentPos < endPos && filterString.charAt(currentPos) == ' ') { currentPos++; } } }