eionet.cr.util.sql.VirtuosoFullTextQuery.java Source code

Java tutorial

Introduction

Here is the source code for eionet.cr.util.sql.VirtuosoFullTextQuery.java

Source

/*
 * The contents of this file are subject to the Mozilla Public
 *
 * License Version 1.1 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of
 * the License at http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS
 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * rights and limitations under the License.
 *
 * The Original Code is Content Registry 2.0.
 *
 * The Initial Owner of the Original Code is European Environment
 * Agency. Portions created by Tieto Eesti are Copyright
 * (C) European Environment Agency. All Rights Reserved.
 *
 * Contributor(s):
 * Jaanus Heinlaid, Tieto Eesti*/
package eionet.cr.util.sql;

import java.text.ParseException;
import java.util.HashSet;
import java.util.StringTokenizer;

import org.apache.commons.lang.StringUtils;

import eionet.cr.dao.util.SearchExpression;

/**
 * Full text queries.
 *
 * @author risto
 *
 */
public final class VirtuosoFullTextQuery {

    /** */
    private static final int MIN_WORD_LENGTH = 3;

    /** */
    protected static final String DEFAULT_BOOLEAN_OPERATOR = "and";

    /** */
    private static HashSet<String> booleanOperators;

    /** */
    static {
        booleanOperators = new HashSet<String>();
        booleanOperators.add("&");
        booleanOperators.add("|");
        booleanOperators.add("!");
        booleanOperators.add("AND");
        booleanOperators.add("OR");
        booleanOperators.add("NOT");
        booleanOperators.add("and");
        booleanOperators.add("or");
    }

    /** */
    private String query;
    private StringBuffer parsedQuery = new StringBuffer();;

    /** */
    private HashSet<String> phrases = new HashSet<String>();

    /**
     *
     * @param query
     * @throws ParseException
     */
    private VirtuosoFullTextQuery(String query) throws ParseException {
        this.query = query;
        parse();
    }

    /**
     *
     * @param query
     * @return VirtuosoFullTextQuery
     * @throws ParseException
     */
    public static VirtuosoFullTextQuery parse(String query) throws ParseException {

        return new VirtuosoFullTextQuery(query);
    }

    /**
     *
     * @param searchExpression
     * @return VirtuosoFullTextQuery
     * @throws ParseException
     */
    public static VirtuosoFullTextQuery parse(SearchExpression searchExpression) throws ParseException {

        return new VirtuosoFullTextQuery(searchExpression == null ? "" : searchExpression.toString());
    }

    /**
     * @throws ParseException
     *
     */
    private void parse() throws ParseException {

        if (query == null) {
            return;
        }

        query = query.trim();
        if (query.length() == 0) {
            return;
        }

        // parse phrases first

        int unclosedQuotes = -1;
        int len = query.length();
        for (int i = 0; i < len; i++) {
            if (query.charAt(i) == '"') {
                if (unclosedQuotes != -1) {
                    String phrase = query.substring(unclosedQuotes + 1, i);
                    if (phrase.trim().length() >= MIN_WORD_LENGTH) {
                        if (StringUtils.containsAny(phrase, " \t\r\n\f")) {
                            phrases.add(query.substring(unclosedQuotes + 1, i));
                        }
                    }
                    unclosedQuotes = -1;
                } else {
                    unclosedQuotes = i;
                }
            }
        }

        // remove phrases from expression
        if (phrases != null) {
            for (String phrase : phrases) {
                query = query.replaceAll("\"" + phrase + "\"", "");
            }
        }

        // there must be no unclosed quotes left
        if (unclosedQuotes != -1) {
            throw new ParseException("Unclosed quotes at index " + unclosedQuotes, unclosedQuotes);
        }

        String prevToken = null;
        StringTokenizer st = new StringTokenizer(query, " \t\r\n\f\"");
        // remove any redundant logical operands and too short words from the beginning of query
        query = removeOperands(st);
        st = new StringTokenizer(query, " \t\r\n\f\"");
        while (st.hasMoreTokens()) {

            String token = st.nextToken();

            if (token.equals("&"))
                token = "AND";
            if (token.equals("|"))
                token = "OR";

            // If this token is a VirtuosoSQL full-text query's boolean operator
            // then append it to the parsed query only if the previous token
            // was not null and wasn't already a boolean operator itself.
            //
            // However, if this token is NOT a a VirtuosoSQL full-text query's boolean operator,
            // then append it to the parsed query, but first make sure that the previous
            // token was a boolean operator. If it wasn't then append the default operator.

            boolean appendThisToken = false;

            if (isBooleanOperator(token)) {
                if (prevToken != null && !isBooleanOperator(prevToken)) {
                    appendThisToken = true;
                }
            } else {
                if (token.length() >= MIN_WORD_LENGTH) {

                    appendThisToken = true;
                    if (prevToken != null && !isBooleanOperator(prevToken)) {
                        parsedQuery.append(" ").append(DEFAULT_BOOLEAN_OPERATOR);
                    }
                }
            }

            if (appendThisToken) {

                if (parsedQuery.length() > 0) {
                    parsedQuery.append(" ");
                }

                if (isBooleanOperator(token))
                    parsedQuery.append(token);
                else
                    parsedQuery.append("'").append(token).append("'");

                prevToken = token;
            }
        }

        // add phrases to query
        if (phrases != null && phrases.size() > 0) {
            for (String phrase : phrases) {
                if (parsedQuery != null && parsedQuery.length() > 0)
                    parsedQuery.append(" and ");
                parsedQuery.append("'").append(phrase).append("'");
            }
        }
    }

    /**
     * Remove any redundant logical operands and too short words from the beginning of query.
     *
     * @param st
     * @return the cleaned query string
     */
    private String removeOperands(StringTokenizer st) {

        StringBuffer ret = new StringBuffer();
        boolean nonLogicalTokenFound = false;

        if (st != null) {
            while (st.hasMoreTokens()) {
                String token = st.nextToken();

                if (token.equals(" ") || isBooleanOperator(token)) {
                    if (nonLogicalTokenFound) {
                        ret.append(token).append(" ");
                    }
                } else if (!isBooleanOperator(token) && token.length() >= MIN_WORD_LENGTH) {
                    ret.append(token).append(" ");
                    nonLogicalTokenFound = true;
                }
            }
        }

        return ret.toString();
    }

    /**
     * Check if the string is a boolean operator.
     *
     * @param s
     * @return true if the string is in the list of boolean operators
     */
    private boolean isBooleanOperator(String s) {
        return booleanOperators.contains(s);
    }

    /*
     * (non-Javadoc)
     *
     * @see java.lang.Object#toString()
     */
    public String toString() {
        return new StringBuffer("parsedQuery=[").append(parsedQuery).append("], phrases=").append(phrases)
                .toString();
    }

    public String getParsedQuery() {
        return parsedQuery.toString();
    }

    /**
     * @return the phrases
     */
    public HashSet<String> getPhrases() {
        return phrases;
    }
}