de.micromata.genome.gwiki.page.search.expr.SearchExpressionParser.java Source code

Java tutorial

Introduction

Here is the source code for de.micromata.genome.gwiki.page.search.expr.SearchExpressionParser.java

Source

//
// Copyright (C) 2010-2016 Roger Rene Kommer & Micromata GmbH
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

package de.micromata.genome.gwiki.page.search.expr;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang3.StringUtils;

import de.micromata.genome.gwiki.utils.ClassUtils;
import de.micromata.genome.util.matcher.BooleanListRulesFactory;
import de.micromata.genome.util.matcher.EqualsMatcher;
import de.micromata.genome.util.matcher.InvalidMatcherGrammar;
import de.micromata.genome.util.matcher.LessThanMatcher;
import de.micromata.genome.util.matcher.LessThanOrEqualMatcher;
import de.micromata.genome.util.matcher.Matcher;
import de.micromata.genome.util.matcher.MoreThanMatcher;
import de.micromata.genome.util.matcher.MoreThanOrEqualMatcher;
import de.micromata.genome.util.matcher.NotMatcher;
import de.micromata.genome.util.matcher.string.ContainsIgnoreCaseMatcher;
import de.micromata.genome.util.matcher.string.EqualsWithBoolMatcher;
import de.micromata.genome.util.text.CharToken;
import de.micromata.genome.util.text.RegExpToken;
import de.micromata.genome.util.text.TextSplitterUtils;
import de.micromata.genome.util.text.Token;
import de.micromata.genome.util.text.TokenResult;

public class SearchExpressionParser {
    public static final int TK_UNMATCHED = 0;

    public static final int TK_SPACE = 1;

    public static final int TK_BO = 2;

    public static final int TK_BC = 3;

    public static final int TK_AND = 4;

    public static final int TK_OR = 5;

    public static final int TK_PLUS = 6;

    public static final int TK_MINUS = 7;

    public static final int TK_HASH = 8;

    public static final int TK_NOT = 9;

    public static final int TK_COMMAND = 10;

    public static final int TK_QUOTE = 11;

    public static final int TK_ORDERBY = 12;

    public static final int TK_COMMA = 13;

    public static final int TK_CONTAINING = 14;

    public static final int TK_EQUALS = 15;

    public static final int TK_NOTEQUAL = 16;

    public static final int TK_LESS = 17;

    public static final int TK_LESSOREQUAL = 18;

    public static final int TK_MORE = 19;

    public static final int TK_MOREOREQUAL = 20;

    public static final int TK_LIKE = 21;

    public static final int TK_BOBC_PRIO = 3;

    public static final int TK_COMMA_PRIO = 1;

    public static final int TK_ANDOR_PRIO = 2;

    private static String afterandOr = "[ \\(\\t]+";

    private static final Token[] DefaultToken = new Token[] { //

            new RegExpToken(TK_BO, "(\\()(.*)"), //
            new RegExpToken(TK_BC, "(\\))(.*)"), //
            // new RegExpToken(TK_QUOTE, "^[ \\t]*\\\"(.*?)\\\"(.*)"), //
            new CharToken(TK_QUOTE, '"'), //
            new RegExpToken(TK_CONTAINING, "(\\~)(.*)"), //
            new RegExpToken(TK_AND, "(\\&\\&)(.*)"), //
            new RegExpToken(TK_AND, "(and)(" + afterandOr + ".*)"), //
            new RegExpToken(TK_OR, "(\\|\\|)(" + afterandOr + ".*)"), //
            new RegExpToken(TK_OR, "(or)(" + afterandOr + ".*)"), //
            new RegExpToken(TK_PLUS, "(\\+)([ \\t]+.*)"), //
            new RegExpToken(TK_MINUS, "(\\-)([ \\t]+.*)"), //
            new RegExpToken(TK_MOREOREQUAL, "(\\>\\=)(.*)"), //
            new RegExpToken(TK_LESSOREQUAL, "(\\<\\=)(.*)"), //
            new RegExpToken(TK_NOTEQUAL, "(\\!\\=)(.*)"), //
            new RegExpToken(TK_EQUALS, "(\\=)(.*)"), //
            new RegExpToken(TK_LESS, "(\\<)(.*)"), //
            new RegExpToken(TK_MORE, "(\\>)(.*)"), //
            new RegExpToken(TK_LIKE, "(like)(.*)"), //

            new RegExpToken(TK_NOT, "(\\!)([ \\t]+.*)"), //
            new RegExpToken(TK_NOT, "(not)(" + afterandOr + ".*)"), //

            new RegExpToken(TK_ORDERBY, "^+(order by)(" + afterandOr + ".*)"), //
            new RegExpToken(TK_COMMAND, "^[ \\t]*(\\:)(.*)"), //

            new RegExpToken(TK_COMMA, "^[ \\t]*(\\,)[ \\t]*(.*)"), //
            new RegExpToken(TK_SPACE, "^[ \\t]*([ ])[ \\t]*(.*)"), //

    };

    private static final Map<String, Class<? extends SearchExpressionCommand>> buildInCommandExpressions = new HashMap<String, Class<? extends SearchExpressionCommand>>();
    static {
        buildInCommandExpressions.put("parentpageid", SearchExpressionCommandParentPageId.class);
        buildInCommandExpressions.put("childs", SearchExpressionComandChilds.class);
        buildInCommandExpressions.put("space", SearchExpressionComandWikiSpace.class);
        buildInCommandExpressions.put("pageid", SearchExpressionComandPageIdMatcher.class);
        buildInCommandExpressions.put("prop", SearchExpressionPropSelektorCommand.class);
        buildInCommandExpressions.put("lang", SearchExpressionComandLangMatcher.class);
        buildInCommandExpressions.put("keyword", SearchExpressionCommandKeywordMatcher.class);
    }

    public static class TokenResultList {
        public List<TokenResult> tokenResults;

        public int position;

        public String pattern;

        public TokenResultList(List<TokenResult> tokens, int position, String pattern) {
            this.tokenResults = tokens;
            this.position = position;
            this.pattern = pattern;
        }

        public TokenResult curToken() {
            return tokenResults.get(position);
        }

        public TokenResult nextToken() {
            ++position;
            if (position >= tokenResults.size())
                return null;
            return tokenResults.get(position);
        }

        private boolean contains(int search, int... tks) {
            for (int i : tks) {
                if (search == i)
                    return true;
            }
            return false;
        }

        public void skipping(int... tks) {
            while (position < tokenResults.size()
                    && contains(tokenResults.get(position).getTokenType(), tks) == true) {
                ++position;
            }
        }

        public TokenResult nextTokenSkipping(int... tks) {
            ++position;
            while (position < tokenResults.size()
                    && contains(tokenResults.get(position).getTokenType(), tks) == true) {
                ++position;
            }
            if (position >= tokenResults.size())
                return null;
            return tokenResults.get(position);
        }

        /**
         * return -1 if eof
         * 
         * @param pos
         * @return
         */
        public int lookAheadTokenType(int pos) {
            if (position + pos >= tokenResults.size())
                return -1;
            return tokenResults.get(position + pos).getTokenType();
        }

        public boolean eof() {
            return position >= tokenResults.size();
        }

        public String restOfTokenString() {
            StringBuilder sb = new StringBuilder();
            for (int i = 0; i < tokenResults.size(); ++i) {
                sb.append(tokenResults.get(i).getConsumed());
            }
            return sb.toString();
        }
    }

    private char escapeChar = '\\';

    private Map<String, Class<? extends SearchExpressionCommand>> commandExpressions = new HashMap<String, Class<? extends SearchExpressionCommand>>();

    public SearchExpressionParser() {
        commandExpressions.putAll(buildInCommandExpressions);
    }

    protected SearchExpression consumeElement(String text) {
        if (text.startsWith("{") == true && text.endsWith("}") == true) {

        }
        return new SearchExpressionTextContains(text);
    }

    protected SearchExpression consumeQuoted(TokenResultList tokens) {
        StringBuffer quoted = new StringBuffer();
        TokenResult tk = tokens.curToken();
        int oldpos = tokens.position;
        tk = tokens.nextToken();
        while (tokens.eof() == false) {

            if (tk.getConsumedLength() == 1 && tk.getConsumed().equals("\\") == true) {
                tk = tokens.nextToken();
                if (tk.getTokenType() == TK_QUOTE) {
                    quoted.append("\"");
                } else {
                    String nq = tk.getConsumed();
                    if (nq.length() > 0) {
                        char fc = nq.charAt(0);
                        switch (fc) {
                        case 'n':
                            quoted.append("\n");
                            break;
                        case 'r':
                            quoted.append("\r");
                            break;
                        case 't':
                            quoted.append("\t");
                            break;
                        default:
                            quoted.append(fc);
                            break;
                        }
                        if (nq.length() > 1) {
                            quoted.append(nq.substring(1));
                        }
                    }
                }
            } else if (tk.getTokenType() == TK_QUOTE) {
                String ret = TextSplitterUtils.unescape(quoted.toString(), '\\', '"');
                SearchExpression m = consumeElement(ret);
                tokens.nextTokenSkipping(TK_SPACE);
                return m;
            } else {
                quoted.append(tk.getConsumed());
            }
            tk = tokens.nextToken();
        }
        tokens.position = oldpos;
        SearchExpression m = consumeElement("\"");
        tokens.nextTokenSkipping(TK_SPACE);
        return m;
    }

    protected SearchExpression consumeListElement(TokenResultList tokens) {
        if (tokens.eof() == true)
            return null;

        TokenResult tk = tokens.curToken();
        if (tk.getTokenType() == TK_QUOTE) {
            return consumeQuoted(tokens);
        }
        if (tk.getTokenType() != TK_UNMATCHED) {
            return null;
            // throw new InvalidMatcherGrammar("Excepting element. Got: " + tk.getConsumed() + "; pattern: " + tokens.pattern);
        }
        String elText;
        String cons = tk.getConsumed();
        elText = TextSplitterUtils.unescape(cons, escapeChar);

        SearchExpression m = consumeElement(elText);
        tokens.nextTokenSkipping(TK_SPACE);
        return m;
    }

    protected SearchExpression consumeCommand(TokenResultList tks) {
        if (tks.eof() == true)
            return null;
        SearchExpression left = consumeListElement(tks);
        if (tks.eof() == true)
            return left;
        TokenResult tk = tks.curToken();
        if (tk.getTokenType() != TK_COMMAND) {
            return left;
        }
        if ((left instanceof SearchExpressionTextContains) == false) {
            throw new InvalidMatcherGrammar(
                    "Excepting command string. Got: " + tk.getConsumed() + "; pattern: " + tks.pattern);
        }
        String command = ((SearchExpressionTextContains) left).getText();
        command = command.toLowerCase();
        if (commandExpressions.containsKey(command) == false) {
            tk = tks.nextToken();

            SearchExpression right = new SearchExpressionTextContains(command.toUpperCase());
            SearchExpression ret = ClassUtils.createInstance(SearchExpressionPropSelektorCommand.class,
                    new Class[] { String.class, SearchExpression.class }, "prop", right);
            tks.skipping(TK_SPACE);
            return ret;
        } else {
            tks.nextTokenSkipping(TK_SPACE);
            SearchExpression right = consumeListElement(tks);
            SearchExpression ret = ClassUtils.createInstance(commandExpressions.get(command),
                    new Class[] { String.class, SearchExpression.class }, command, right);
            return ret;
        }
    }

    protected SearchExpression consumePlusMinus(TokenResultList tokens) {
        if (tokens.eof() == true)
            return null;
        TokenResult tk = tokens.curToken();
        if (tk.getTokenType() == TK_NOT) {
            tokens.nextTokenSkipping(TK_SPACE);
            SearchExpression nt = consumeList(tokens);
            return new SearchExpressionNotIn(nt);
        }
        if (tk.getTokenType() == TK_MINUS) {
            tokens.nextTokenSkipping(TK_SPACE);
            SearchExpression n = consumeCommand(tokens);
            return new SearchExpressionNotIn(n);
        }
        if (tk.getTokenType() == TK_PLUS) {
            tokens.nextTokenSkipping(TK_SPACE);
            SearchExpression n = consumeCommand(tokens);
            return new SearchExpressionExact(n);

        }
        return consumeCommand(tokens);
    }

    protected SearchExpression consumeBracket(TokenResultList tks) {
        if (tks.eof() == true)
            return null;
        TokenResult tk = tks.curToken();
        if (tk.getTokenType() == TK_BO) {
            tks.nextTokenSkipping(TK_SPACE);
            SearchExpression m = consume(tks);
            tk = tks.curToken();
            if (tk.getTokenType() != TK_BC) {
                throw new InvalidMatcherGrammar("grammar has no matching close bracket: " + tks.pattern);
            }
            tks.nextTokenSkipping(TK_SPACE);
            return m;
        }
        return consumePlusMinus(tks);
    }

    protected SearchExpression consumeList(TokenResultList tks) {

        SearchExpression left = consumeBracket(tks);
        if (left == null)
            return null;
        if (tks.eof() == true)
            return left;
        List<SearchExpression> elements = new ArrayList<SearchExpression>();
        elements.add(left);

        do {
            // TokenResult tk = tks.curToken();
            // if (tk.)
            // if (tk.getTokenType() != TK_SPACE)
            // break;
            // tks.nextTokenSkipping(TK_SPACE);
            left = consumeBracket(tks);
            if (left == null) {
                break;
            }
            elements.add(left);
        } while (left != null && tks.eof() == false);
        if (elements.size() == 1)
            return elements.get(0);
        return new SearchExpressionWeakOrList(elements);
    }

    protected Matcher<String> createComparator(int tktype, String text) {
        switch (tktype) {
        case TK_CONTAINING:
            return new ContainsIgnoreCaseMatcher<String>(text);
        case TK_EQUALS:
            return new EqualsWithBoolMatcher(text);
        case TK_LESS:
            return new LessThanMatcher<String>(text);
        case TK_MORE:
            return new MoreThanMatcher<String>(text);
        case TK_MOREOREQUAL:
            return new MoreThanOrEqualMatcher<String>(text);
        case TK_LESSOREQUAL:
            return new LessThanOrEqualMatcher<String>(text);
        case TK_NOTEQUAL:
            return new NotMatcher<String>(new EqualsMatcher<String>(text));
        case TK_LIKE:
            return new BooleanListRulesFactory<String>().createMatcher(text);
        default:
            throw new InvalidMatcherGrammar("Unkown comparator");
        }
    }

    protected SearchExpression consumeCompare(TokenResultList tks) {
        SearchExpression left = consumeList(tks);
        if (tks.eof() == true || left == null) {
            return left;
        }
        TokenResult tk = tks.curToken();
        SearchExpression rex;
        int cmptk = tk.getTokenType();
        switch (cmptk) {
        case TK_CONTAINING:
        case TK_EQUALS:
        case TK_LESS:
        case TK_MORE:
        case TK_MOREOREQUAL:
        case TK_LESSOREQUAL:
        case TK_NOTEQUAL:
        case TK_LIKE: {
            tks.nextTokenSkipping(TK_SPACE);
            rex = consumeListElement(tks);
            if (rex == null) {
                throw new InvalidMatcherGrammar("missing right comparator argument");
            }
            if ((left instanceof SearchExpressionFieldSelektor) == false) {
                throw new InvalidMatcherGrammar(
                        "expect field selector on left side of comparator. got: " + left.toString());
            }
            if ((rex instanceof SearchExpressionText) == false) {
                throw new InvalidMatcherGrammar(
                        "expect field selector on right side of comparator. got: " + rex.toString());
            }

            Matcher<String> mt = createComparator(cmptk, ((SearchExpressionText) rex).getText());

            SearchExpressionComparator comp = new SearchExpressionComparator(mt,
                    (SearchExpressionFieldSelektor) left, (SearchExpressionText) rex);
            return comp;
        }
        }
        return left;
    }

    protected SearchExpression consumeAndOr(TokenResultList tks) {
        SearchExpression left = consumeCompare(tks);
        if (left == null)
            return left;
        if (tks.eof() == true)
            return left;
        TokenResult tk = tks.curToken();
        if (tk.getTokenType() == TK_AND || tk.getTokenType() == TK_OR) {
            tks.nextTokenSkipping(TK_SPACE);
            SearchExpression right = consumeAndOr(tks);
            if (right == null) {
                throw new InvalidMatcherGrammar("Missing right express of <expr>[&&||\\|]<expr>: " + tks.pattern
                        + "; rest: " + tks.restOfTokenString());
            }
            if (tk.getTokenType() == TK_AND)
                return new SearchExpressionAnd(left, right);
            return new SearchExpressionOr(left, right);
        }
        return left;
    }

    protected SearchResultComparatorBase consumeThisComparator(TokenResultList tks) {
        SearchExpression se = consumeCommand(tks);
        if ((se instanceof SearchExpressionFieldSelektor) == false) {
            throw new InvalidMatcherGrammar("Need text selector. Got: " + se);
        }
        SearchExpressionFieldSelektor fs = (SearchExpressionFieldSelektor) se;
        if (se instanceof SearchExpressionTextContains) {
            String criteria = ((SearchExpressionTextContains) se).getText();
            if (StringUtils.equalsIgnoreCase(criteria, "relevance") == true) {
                return new SearchResultComparatorRelevance();
            }
            throw new InvalidMatcherGrammar("Unknown order criteria: " + criteria);
        } else {
            return new SearchResultComparatorField(fs);
        }
    }

    protected SearchResultComparatorBase createComparator(TokenResultList tks, SearchResultComparatorBase last) {
        // order by modby, modat, title, page

        SearchResultComparatorBase src = consumeThisComparator(tks);
        tks.skipping(TK_SPACE);
        if (tks.eof() == true) {
            return src;
        }

        if (tks.eof() || tks.curToken().getTokenType() == TK_COMMA) {
            return src;
        }
        if (StringUtils.equalsIgnoreCase(tks.curToken().getConsumed(), "DESC") == true) {
            src.setDesc(true);
            tks.nextTokenSkipping(TK_SPACE);
        } else if (StringUtils.equalsIgnoreCase(tks.curToken().getConsumed(), "ASC") == true) {
            tks.nextTokenSkipping(TK_SPACE);
        } else {
            throw new InvalidMatcherGrammar("expect ',' for the next order expression");
        }
        return src;

        // if (tks.eof() == true)
        // return src;
        // if (tks.curToken().getTokenType() == TK_COMMA) {
        // tks.nextToken();
        // return src;
        // }
        // throw new InvalidMatcherGrammar("expect ',' or end of expression for the next order expression");

    }

    protected SearchExpression consumeOrderBy(TokenResultList tks) {
        SearchExpression ex = consumeAndOr(tks);
        if (tks.eof() == true) {
            return ex;
        }
        TokenResult tk = tks.curToken();
        if (tk.getTokenType() != TK_ORDERBY)
            return ex;
        tks.nextTokenSkipping(TK_SPACE);
        if (tks.eof() == true) {
            throw new InvalidMatcherGrammar("order by need criteria");
        }

        SearchExpressionOrderBy order = new SearchExpressionOrderBy(ex);
        SearchResultComparatorBase last = null;

        while (tks.eof() == false) {
            tks.skipping(TK_SPACE, TK_COMMA);
            if (tks.eof() == true)
                break;
            SearchResultComparatorBase c = createComparator(tks, last);
            if (c == null)
                break;
            if (last != null) {
                last.setNextComparator(c);
            } else {
                order.addComparator(c);
            }
            last = c;
            if (tks.eof() == true) {
                break;
            }
            if (tks.lookAheadTokenType(0) == TK_COMMA) {
                tks.nextTokenSkipping(TK_SPACE);
                continue;
            }
            throw new InvalidMatcherGrammar("expect ',' or end of expression for the next order expression");
        }
        return order;
    }

    protected SearchExpression consume(TokenResultList tkl) {
        return consumeOrderBy(tkl);
    }

    public SearchExpression parse(String pattern) {
        pattern = StringUtils.trim(pattern);
        List<TokenResult> tokenResults = TextSplitterUtils.parseStringTokens(pattern, DefaultToken, escapeChar,
                true, true);
        TokenResultList tkl = new TokenResultList(tokenResults, 0, pattern);
        SearchExpression ret = consume(tkl);
        if (tkl.eof() == false) {
            throw new InvalidMatcherGrammar(
                    "unconsumed tokens. pattern: " + pattern + "; rest: " + tkl.restOfTokenString());
        }
        return ret;
    }

    public Map<String, Class<? extends SearchExpressionCommand>> getCommandExpressions() {
        return commandExpressions;
    }

    public void setCommandExpressions(Map<String, Class<? extends SearchExpressionCommand>> commandExpressions) {
        this.commandExpressions = commandExpressions;
    }

}