Java tutorial
/*************************************************************************** Copyright 2014 Emily Estes Licensed under the Apache License, Version 2.0 (the "License") { return this; } you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ***************************************************************************/ package net.metanotion.json; import java.util.ArrayList; import java.util.List; import org.apache.commons.text.StringEscapeUtils; import net.metanotion.functor.Block; import net.metanotion.util.Pair; /** This class is meant to solve a similar problem to "XPath". The idea is to provide a handler implementation for a streaming parser that matches JS "expressions" to nodes in the JSON object being parsed, and then switches to a user provided handler for processing that subtree. For instance: "foo.bar[5]" would look for a key called "foo" whose value was an object, and then the key "bar" in inside foo's value, and assume bar's value was an array, and find the 5th element of that array, at which point it would pass control to the handler provided with that path expression. JsonPath "grammar": <path> = <element> [ <subelement> ]* <element> = <id> | '*' | <array> | '{}' <subelement> = '.' ( <id> | '*') | <array> <id> = [a-zA-Z_][anything]* <array> = '[' [<int> | <string>] ']' @param <T> The type parameter for the handler. This class always returns null from finish(). */ public final class JsonPath<T> implements Handler<T> { private enum Token { ID, STAR, L_BRACE, R_BRACE, DOT, INT, STRING, L_CURLY, R_CURLY, EOF } private static final class Lexeme { public final Token type; public final Object value; public Lexeme(final Token type, final Object value) { this.type = type; this.value = value; } @Override public String toString() { return type.toString() + " - " + value; } } private static final Lexeme STAR = new Lexeme(Token.STAR, null); private static final Lexeme L_BRACE = new Lexeme(Token.L_BRACE, null); private static final Lexeme R_BRACE = new Lexeme(Token.R_BRACE, null); private static final Lexeme L_CURLY = new Lexeme(Token.L_CURLY, null); private static final Lexeme R_CURLY = new Lexeme(Token.R_CURLY, null); private static final Lexeme DOT = new Lexeme(Token.DOT, null); private static final Lexeme EOF = new Lexeme(Token.EOF, null); private static Lexeme lexToken(final String cs, final int[] position) { final int c = skipWhitespace(cs, position); switch (c) { case -1: return EOF; case '[': return L_BRACE; case ']': return R_BRACE; case '{': return L_CURLY; case '}': return R_CURLY; case '*': return STAR; case '.': return DOT; case '\'': return lexString(cs, position); default: if (Character.isDigit(c) || (c == (int) '-')) { // Lex integer final StringBuilder sb = new StringBuilder(new String(Character.toChars(c))); int digits = Character.isDigit(c) ? 1 : 0; while (true) { final int i = read(cs, position); if (Character.isDigit(i)) { digits++; sb.append(Character.toChars(i)); } else { position[0]--; if (digits == 0) { throw new IllegalArgumentException("Expected at least one digit [0-9]"); } return new Lexeme(Token.INT, Long.valueOf(sb.toString())); } } } else { // Lex identifier position[0]--; final StringBuilder sb = new StringBuilder(); while (true) { final int i = read(cs, position); if (i == -1) { return new Lexeme(Token.ID, sb.toString()); } if (Character.isLetter(i) || (i == (int) '_') || Character.isDigit(i)) { sb.append(Character.toChars(i)); } else { position[0]--; return new Lexeme(Token.ID, sb.toString()); } } } } } private static int read(final String cs, final int[] position) { try { final int c = cs.codePointAt(position[0]); position[0]++; return c; } catch (IndexOutOfBoundsException ioobe) { return -1; } } private static int skipWhitespace(final String cs, final int[] position) { int c; do { c = read(cs, position); } while (Character.isWhitespace((char) c)); return c; } private static Lexeme lexString(final String cs, final int[] position) { final StringBuilder sb = new StringBuilder(); boolean escape = false; while (true) { final int c = read(cs, position); if (c == -1) { throw new IllegalArgumentException("unexpected end of expression in string"); } final String cbuf = new String(Character.toChars(c)); if (("\'".equals(cbuf)) && !escape) { return new Lexeme(Token.STRING, StringEscapeUtils.unescapeJson(sb.toString())); } else if ("\\".equals(cbuf)) { escape = true; sb.append(cbuf); } else { escape = false; sb.append(cbuf); } } } private enum Type { STAR, KEY, LIST, OBJ } private static final class Path { public final Type type; public Object value; public Path(final Type type, final Object value) { this.type = type; this.value = value; } } private static final Path P_STAR = new Path(Type.STAR, null); private static final Path P_OBJ = new Path(Type.OBJ, null); private static final Path P_KEY = new Path(Type.KEY, null); private static List<Path> parseElement(final String expression, final int[] pos, final List<Path> list) { Lexeme lex = lexToken(expression, pos); switch (lex.type) { case EOF: return list; case DOT: lex = lexToken(expression, pos); if (lex.type != Token.ID) { throw new IllegalArgumentException("Expected identifier after the '.', instead found: " + lex); } list.add(P_OBJ); list.add(new Path(Type.KEY, lex.value)); return parseElement(expression, pos, list); case L_BRACE: return parseIndex(expression, pos, list); case L_CURLY: return parseCurly(expression, pos, list); default: throw new IllegalArgumentException( "Expression is not a valid JSON path, expected ID, '[', or eof, found: " + lex.toString()); } } private static List<Path> parseIndex(final String expression, final int[] pos, final List<Path> list) { Lexeme lex = lexToken(expression, pos); switch (lex.type) { case STRING: list.add(P_OBJ); list.add(new Path(Type.KEY, lex.value)); break; case INT: list.add(new Path(Type.LIST, lex.value)); break; case R_BRACE: list.add(new Path(Type.LIST, null)); return parseElement(expression, pos, list); default: throw new IllegalArgumentException("Expression is not a valid JSON path, " + "expected a string or integer index in [], found: " + lex.toString()); } lex = lexToken(expression, pos); if (lex.type != Token.R_BRACE) { throw new IllegalArgumentException( "Expression is not a valid JSON path, expected a ']', found: " + lex.toString()); } return parseElement(expression, pos, list); } private static List<Path> parseCurly(final String expression, final int[] pos, final List<Path> list) { final Lexeme lex = lexToken(expression, pos); if (lex.type != Token.R_CURLY) { throw new IllegalArgumentException( "Expression is not a valid JSON Path, " + "expected }, found:" + lex.toString()); } list.add(P_OBJ); list.add(P_KEY); return parseElement(expression, pos, list); } private static List<Path> parse(final String expression) { final ArrayList<Path> list = new ArrayList<>(); final int[] pos = { 0 }; final Lexeme lex = lexToken(expression, pos); switch (lex.type) { case STAR: list.add(P_STAR); return parseElement(expression, pos, list); case ID: list.add(P_OBJ); list.add(new Path(Type.KEY, lex.value)); return parseElement(expression, pos, list); case L_BRACE: return parseIndex(expression, pos, list); case L_CURLY: return parseCurly(expression, pos, list); default: throw new IllegalArgumentException("Expression is not a valid JSON Path."); } } private final List<Pair<List<Path>, Block<Object, ? extends Object>>> matches = new ArrayList<>(); private final ArrayList<Path> current = new ArrayList<>(); private final List<Handler<T>> accumulators = new ArrayList<>(); private final ArrayList<String> currentKey = new ArrayList<>(); // compare the path to current. private boolean compare(final List<Path> path) { int index = 0; final int len = path.size(); for (final Path c : current) { if (index >= len) { return false; } final Path p = path.get(index); index++; if (p.type != c.type) { return false; } switch (p.type) { case LIST: if ((p.value != null) && (!c.value.equals(p.value))) { return false; } break; case KEY: if ((p.value != null) && (!c.value.equals(p.value))) { return false; } break; default: break; } } return index == len; } private static final class Accumulator<T> implements Handler<T>, Continuation<Object, T> { private final List<Handler<T>> accumulators; private final List<String> currentKey; private final List<Path> match; private final Block<Object, ? extends Object> block; public Accumulator(final List<String> currentKey, final List<Handler<T>> accumulators, final List<Path> match, final Block<Object, ? extends Object> block) { this.currentKey = currentKey; this.accumulators = accumulators; this.match = match; this.block = block; } @Override public Handler<T> resume(Object val) { try { final Path p = match.get(match.size() - 1); if ((p.type == Type.KEY) && (p.value == null)) { final String k = currentKey.get(currentKey.size() - 1); val = new Pair<String, Object>(k, val); } block.eval(val); } catch (final Exception e) { throw new RuntimeException(e); } accumulators.remove(this); return null; } @Override public Handler<T> start() { return this; } @Override public T finish() { return null; } @Override public Handler<T> startObject() { return new ObjectReader<T>(this); } @Override public Handler<T> key(final String key) { throw new RuntimeException("Unexpected Json token, was expecting a value, got a key."); } @Override public Handler<T> endObject() { return null; } @Override public Handler<T> startList() { return new ListReader<T>(this); } @Override public Handler<T> endList() { return null; } @Override public Handler<T> integer(final Long value) { return this.resume(value); } @Override public Handler<T> decimal(final Double value) { return this.resume(value); } @Override public Handler<T> string(final String value) { return this.resume(value); } @Override public Handler<T> bool(final boolean value) { return this.resume(value); } @Override public Handler<T> jsonNull() { return this.resume(null); } } private void findMatch() { for (final Pair<List<Path>, Block<Object, ? extends Object>> candidate : matches) { final List<Path> path = candidate.getKey(); if (compare(path)) { accumulators.add(new Accumulator<T>(currentKey, accumulators, path, candidate.getValue())); } } } /** Add a Path expression to this instance. Every time the path is matched, the iterator will be evaluated with the accumulated value of the nodes beneath the match. Only one match can be active at a time, so given two expressions that would match, the one added first "wins". @param expression A valid JSON path expression to match. @param iterator Block to evaluate against the accumulated child nodes(this must not be null). @return This instance of JsonPath. @throws IllegalArgumentException if the expression is not well formed. @throws NullPointerException if iterator is null. */ public JsonPath<T> add(final String expression, final Block<Object, ? extends Object> iterator) { if (iterator == null) { throw new NullPointerException("block must not be null."); } final List<Path> expr = parse(expression); matches.add(new Pair<List<Path>, Block<Object, ? extends Object>>(expr, iterator)); return this; } @Override public Handler<T> start() { current.clear(); currentKey.clear(); accumulators.clear(); return this; } @Override public T finish() { if (current.size() > 0) { throw new RuntimeException("JSON not well formed."); } return null; } @Override public Handler<T> startObject() { final Handler[] accs = accumulators.toArray(new Handler[accumulators.size()]); for (int i = 0; i < accs.length; i++) { accs[i] = accs[i].startObject(); } accumulators.clear(); for (final Handler a : accs) { if (a != null) { accumulators.add(a); } } currentKey.add(null); current.add(new Path(Type.OBJ, null)); return this; } @Override public Handler<T> key(final String key) { currentKey.set(currentKey.size() - 1, key); current.add(new Path(Type.KEY, key)); final Handler[] accs = accumulators.toArray(new Handler[accumulators.size()]); for (int i = 0; i < accs.length; i++) { accs[i] = accs[i].key(key); } accumulators.clear(); for (final Handler a : accs) { if (a != null) { accumulators.add(a); } } findMatch(); return this; } private Path pop() { return current.remove(current.size() - 1); } private void closeValue() { final int len = current.size(); if (len > 0) { final Path p = current.get(len - 1); if (p.type == Type.KEY) { pop(); } else if (p.type == Type.LIST) { p.value = ((Long) p.value).longValue() + 1; } } } @Override public Handler<T> endObject() { currentKey.remove(currentKey.size() - 1); final Path p = pop(); if (p.type != Type.OBJ) { throw new RuntimeException("Invalid end of object"); } closeValue(); final Handler[] accs = accumulators.toArray(new Handler[accumulators.size()]); for (int i = 0; i < accs.length; i++) { accs[i] = accs[i].endObject(); } accumulators.clear(); for (final Handler a : accs) { if (a != null) { accumulators.add(a); } } findMatch(); return this; } @Override public Handler<T> startList() { current.add(new Path(Type.LIST, Long.valueOf(0))); final Handler[] accs = accumulators.toArray(new Handler[accumulators.size()]); for (int i = 0; i < accs.length; i++) { accs[i] = accs[i].startList(); } accumulators.clear(); for (final Handler a : accs) { if (a != null) { accumulators.add(a); } } findMatch(); return this; } @Override public Handler<T> endList() { final Path p = pop(); if (p.type != Type.LIST) { throw new RuntimeException("Invalid end of list"); } closeValue(); final Handler[] accs = accumulators.toArray(new Handler[accumulators.size()]); for (int i = 0; i < accs.length; i++) { accs[i] = accs[i].endList(); } accumulators.clear(); for (final Handler a : accs) { if (a != null) { accumulators.add(a); } } findMatch(); return this; } @Override public Handler<T> integer(final Long val) { closeValue(); final Handler[] accs = accumulators.toArray(new Handler[accumulators.size()]); for (int i = 0; i < accs.length; i++) { accs[i] = accs[i].integer(val); } accumulators.clear(); for (final Handler a : accs) { if (a != null) { accumulators.add(a); } } findMatch(); return this; } @Override public Handler<T> decimal(final Double val) { closeValue(); final Handler[] accs = accumulators.toArray(new Handler[accumulators.size()]); for (int i = 0; i < accs.length; i++) { accs[i] = accs[i].decimal(val); } accumulators.clear(); for (final Handler a : accs) { if (a != null) { accumulators.add(a); } } findMatch(); return this; } @Override public Handler<T> string(final String val) { closeValue(); final Handler[] accs = accumulators.toArray(new Handler[accumulators.size()]); for (int i = 0; i < accs.length; i++) { accs[i] = accs[i].string(val); } accumulators.clear(); for (final Handler a : accs) { if (a != null) { accumulators.add(a); } } findMatch(); return this; } @Override public Handler<T> bool(final boolean val) { closeValue(); final Handler[] accs = accumulators.toArray(new Handler[accumulators.size()]); for (int i = 0; i < accs.length; i++) { accs[i] = accs[i].bool(val); } accumulators.clear(); for (final Handler a : accs) { if (a != null) { accumulators.add(a); } } findMatch(); return this; } @Override public Handler<T> jsonNull() { closeValue(); final Handler[] accs = accumulators.toArray(new Handler[accumulators.size()]); for (int i = 0; i < accs.length; i++) { accs[i] = accs[i].jsonNull(); } accumulators.clear(); for (final Handler a : accs) { if (a != null) { accumulators.add(a); } } findMatch(); return this; } }