ORG.oclc.os.SRW.Lucene.BasicLuceneQueryTranslator.java Source code

Java tutorial

Introduction

Here is the source code for ORG.oclc.os.SRW.Lucene.BasicLuceneQueryTranslator.java

Source

/* 
 * OCKHAM P2PREGISTRY Copyright 2006 Oregon State University
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package ORG.oclc.os.SRW.Lucene;

import ORG.oclc.os.SRW.SRWDiagnostic;
import java.util.*;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.Version;
import org.z3950.zing.cql.*;

/**
 * @author peter
 *         Date: Oct 25, 2005
 *         Time: 10:38:43 AM
 */
public class BasicLuceneQueryTranslator implements CqlQueryTranslator {
    private static Log log = LogFactory.getLog(BasicLuceneQueryTranslator.class);

    HashMap<String, String> indexMappings = new HashMap<String, String>();
    QueryParser qp = null;
    Term tterm = null;

    private Analyzer getAnalyzer(String name) throws InstantiationException {
        if (name.indexOf('.') == -1) // implicit package name
            if (name.startsWith("Standard"))
                name = "org.apache.lucene.analysis.standard." + name;
            else
                name = "org.apache.lucene.analysis." + name;
        try {
            log.debug("creating instance of Analyzer class " + name);
            Class analyzerClass = Class.forName(name);
            return (Analyzer) analyzerClass.newInstance();
        } catch (Exception e) {
            log.error("Unable to create analyzer \"" + name + "\": " + e.getMessage());
            throw new InstantiationException("Unable to create analyzer \"" + name + "\": " + e.getMessage());
        }
    }

    // Not legal until JDK 6    @Override
    public Term getTerm() {
        return tterm;
    }

    public void init(Properties properties, IndexSearcher searcher) throws InstantiationException {
        SRWLuceneDatabase.makeIndexInfo(properties, searcher, indexMappings);

        // to make a QueryParser, we need to figure out what the default search
        // field is and what analyzers to apply.
        Analyzer defaultAnalyzer;
        String defaultField = (String) indexMappings.get("cql.serverChoice");
        String defaultAnalyzerName = (String) properties.get("analyzer.default");
        if (defaultAnalyzerName == null || defaultAnalyzerName.length() == 0)
            defaultAnalyzer = new WhitespaceAnalyzer(Version.LUCENE_35);
        else
            defaultAnalyzer = getAnalyzer(defaultAnalyzerName);
        // any other analyzers?
        Collection c = searcher.getIndexReader().getFieldNames(IndexReader.FieldOption.INDEXED);
        Iterator iter = c.iterator();
        String analyzerName, field;
        Map analyzerPerField = new HashMap();
        while (iter.hasNext()) {
            field = (String) iter.next();
            analyzerName = (String) properties.get("analyzer." + field);
            if (analyzerName != null && analyzerName.length() > 0)
                analyzerPerField.put(field, getAnalyzer(analyzerName));
        }
        PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer, analyzerPerField);

        qp = new QueryParser(Version.LUCENE_35, defaultField, analyzer);
    }

    // Not legal until JDK 6    @Override
    public void init(Properties properties, SRWLuceneDatabase ldb) throws InstantiationException {
        init(properties, ldb.searcher);
    }

    // Not legal until JDK 6    @Override
    public Query makeQuery(CQLNode node) throws SRWDiagnostic {
        StringBuffer sb = new StringBuffer();
        makeLuceneQuery(node, sb);
        try {
            return qp.parse(sb.toString());
        } catch (ParseException e) {
            log.error(e, e);
            throw new SRWDiagnostic(SRWDiagnostic.QuerySyntaxError, e.getMessage());
        }
    }

    private void makeLuceneQuery(CQLNode node, StringBuffer sb) {
        if (node instanceof CQLBooleanNode) {
            CQLBooleanNode cbn = (CQLBooleanNode) node;
            sb.append("(");
            makeLuceneQuery(cbn.left, sb);
            if (node instanceof CQLAndNode)
                sb.append(" AND ");
            else if (node instanceof CQLNotNode)
                sb.append(" NOT ");
            else if (node instanceof CQLOrNode)
                sb.append(" OR ");
            else
                sb.append(" UnknownBoolean(").append(cbn).append(") ");
            makeLuceneQuery(cbn.right, sb);
            sb.append(")");
        } else if (node instanceof CQLTermNode) {
            CQLTermNode ctn = (CQLTermNode) node;
            String index = ctn.getIndex(), newIndex = (String) indexMappings.get(index);
            if (newIndex != null)
                index = newIndex;
            if (!index.equals(""))
                sb.append(index).append(":");
            String term = ctn.getTerm();
            if (ctn.getRelation().getBase().equals("=") || ctn.getRelation().getBase().equals("scr")) {
                if (term.indexOf(' ') >= 0)
                    sb.append('"').append(term).append('"');
                else
                    sb.append(ctn.getTerm());
            } else if (ctn.getRelation().getBase().equals("any")) {
                if (term.indexOf(' ') >= 0)
                    sb.append('(').append(term).append(')');
                else
                    sb.append(ctn.getTerm());
            } else if (ctn.getRelation().getBase().equals("all")) {
                if (term.indexOf(' ') >= 0) {
                    sb.append('(');
                    StringTokenizer st = new StringTokenizer(term);
                    while (st.hasMoreTokens()) {
                        sb.append(st.nextToken());
                        if (st.hasMoreTokens())
                            sb.append(" AND ");
                    }
                    sb.append(')');
                } else
                    sb.append(ctn.getTerm());
            } else
                sb.append("Unsupported Relation: ").append(ctn.getRelation().getBase());
        } else
            sb.append("UnknownCQLNode(").append(node).append(")");
    }

    private void dumpQueryTree(CQLNode node) {
        if (node instanceof CQLBooleanNode) {
            CQLBooleanNode cbn = (CQLBooleanNode) node;
            dumpQueryTree(cbn.left);
            if (node instanceof CQLAndNode)
                if (log.isDebugEnabled())
                    log.debug(" AND ");
                else if (node instanceof CQLNotNode)
                    if (log.isDebugEnabled())
                        log.debug(" NOT ");
                    else if (node instanceof CQLOrNode)
                        if (log.isDebugEnabled())
                            log.debug(" OR ");
                        else if (log.isDebugEnabled())
                            log.debug(" UnknownBoolean(" + cbn + ") ");
            dumpQueryTree(cbn.right);
        } else if (node instanceof CQLTermNode) {
            CQLTermNode ctn = (CQLTermNode) node;
            if (log.isDebugEnabled())
                log.debug("term(qualifier=\"" + ctn.getIndex() + "\" relation=\"" + ctn.getRelation().getBase()
                        + "\" term=\"" + ctn.getTerm() + "\")");
        } else if (log.isDebugEnabled())
            log.debug("UnknownCQLNode(" + node + ")");
    }

    //    public Query makeQuery(CQLNode node) throws SRWDiagnostic{
    //        return makeQuery(node, null);
    //    }
    //
    //    Query makeQuery(CQLNode node, Query leftQuery) throws SRWDiagnostic{
    //        Query query = null;
    //
    //        if(node instanceof CQLBooleanNode) {
    //            CQLBooleanNode cbn=(CQLBooleanNode)node;
    //
    //            Query left = makeQuery(cbn.left);
    //            Query right = makeQuery(cbn.right, left);
    //
    //            if(node instanceof CQLAndNode) {
    //                if (left instanceof BooleanQuery) {
    //                    query = left;
    //                    log.info("  Anding left and right");
    //                    AndQuery((BooleanQuery) left, right);
    //                } else {
    //                    query = new BooleanQuery();
    //                    log.info("  Anding left and right in new query");
    //                    AndQuery((BooleanQuery) query, left);
    //                    AndQuery((BooleanQuery) query, right);
    //                }
    //
    //            } else if(node instanceof CQLNotNode) {
    //
    //                if (left instanceof BooleanQuery) {
    //                    log.debug("  Notting left and right");
    //                    query = left;
    //                    NotQuery((BooleanQuery) left, right);
    //                } else {
    //                    query = new BooleanQuery();
    //                    log.debug("  Notting left and right in new query");
    //                    AndQuery((BooleanQuery) query, left);
    //                    NotQuery((BooleanQuery) query, right);
    //                }
    //
    //            } else if(node instanceof CQLOrNode) {
    //                if (left instanceof BooleanQuery) {
    //                    log.debug("  Or'ing left and right");
    //                    query = left;
    //                    OrQuery((BooleanQuery) left, right);
    //                } else {
    //                    log.debug("  Or'ing left and right in new query");
    //                    query = new BooleanQuery();
    //                    OrQuery((BooleanQuery) query, left);
    //                    OrQuery((BooleanQuery) query, right);
    //                }
    //            } else {
    //                throw new RuntimeException("Unknown boolean");
    //            }
    //
    //        } else if(node instanceof CQLTermNode) {
    //            CQLTermNode ctn=(CQLTermNode)node;
    //
    //            String relation = ctn.getRelation().getBase();
    //            String index=ctn.getQualifier();
    //
    //            if (!index.equals("")) {
    //                if(relation.equals("=") || relation.equals("scr")) {
    //                    query = createTermQuery(index, ctn.getTerm(), relation);
    //                } else if (relation.equals("<")) {
    //                    term = new Term(index, ctn.getTerm());
    //                    //term is upperbound, exclusive
    //                    query = new RangeQuery(null,term,false);
    //                } else if (relation.equals(">")) {
    //                    term = new Term(index, ctn.getTerm());
    //                    //term is lowerbound, exclusive
    //                    query = new RangeQuery(term,null,false);
    //                } else if (relation.equals("<=")) {
    //                    term = new Term(index, ctn.getTerm());
    //                    //term is upperbound, inclusive
    //                    query = new RangeQuery(null,term,true);
    //                } else if (relation.equals(">=")) {
    //                    term = new Term(index, ctn.getTerm());
    //                    //term is lowebound, inclusive
    //                    query = new RangeQuery(term,null,true);
    //
    //                } else if (relation.equals("<>")) {
    //                    /**
    //                     * <> is an implicit NOT.
    //                     *
    //                     * For example the following statements are identical results:
    //                     *   foo=bar and zoo<>xar
    //                     *   foo=bar not zoo=xar
    //                     */
    //
    //                    if (leftQuery == null) {
    //                        // first term in query create an empty Boolean query to NOT
    //                        query = new BooleanQuery();
    //                    } else {
    //                        if (leftQuery instanceof BooleanQuery) {
    //                            // left query is already a BooleanQuery use it
    //                            query = leftQuery;
    //                        } else {
    //                            // left query was not a boolean, create a boolean query
    //                            // and AND the left query to it
    //                            query = new BooleanQuery();
    //                            AndQuery((BooleanQuery)query, leftQuery);
    //                        }
    //                    }
    //                    //create a term query for the term then NOT it to the boolean query
    //                    Query termQuery = createTermQuery(index,ctn.getTerm(), relation);
    //                    NotQuery((BooleanQuery) query, termQuery);
    //
    //                } else if (relation.equals("any")) {
    //                    //implicit or
    //                    query = createTermQuery(index,ctn.getTerm(), relation);
    //
    //                } else if (relation.equals("all")) {
    //                    //implicit and
    //                    query = createTermQuery(index,ctn.getTerm(), relation);
    //                } else if (relation.equals("exact")) {
    //                    /**
    //                     * implicit and.  this query will only return accurate
    //                     * results for indexes that have been indexed using
    //                     * a non-tokenizing analyzer
    //                     */
    //                    query = createTermQuery(index,ctn.getTerm(), relation);
    //                } else {
    //                    //anything else is unsupported
    //                    throw new SRWDiagnostic(19, ctn.getRelation().getBase());
    //                }
    //
    //            }
    //        } else {
    //            throw new SRWDiagnostic(47, "UnknownCQLNode: "+node+")");
    //        }
    //        if (query != null) {
    //            log.info("Query : " + query.toString());
    //        }
    //        return query;
    //    }
    //
    //    Query createTermQuery(String cqlIndexName, String value, String relation) throws SRWDiagnostic {
    //
    //        Query termQuery = null;
    //
    //        // map the cqlIndexName to a lucene index
    //        String index=(String)indexMappings.get(cqlIndexName);
    //        if(index==null)
    //            throw new SRWDiagnostic(SRWDiagnostic.UnsupportedIndex, cqlIndexName);
    //
    //        /**
    //         * check to see if there are any spaces.  If there are spaces each
    //         * word must be broken into a single term search and then all queries
    //         * must be combined using an and.
    //         */
    //        if (value.indexOf(" ") == -1) {
    //            // no space found, just create a single term search
    //            //todo case insensitivity?
    //            term = new Term(index, value);
    //            if (value.indexOf("?") != -1 || value.indexOf("*")!=-1 ){
    //                termQuery = new WildcardQuery(term);
    //            } else {
    //                termQuery = new TermQuery(term);
    //            }
    //
    //        } else {
    //            // space found, iterate through the terms to create a multiterm search
    //
    //            if (relation == null || relation.equals("=") || relation.equals("<>") || relation.equals("exact")) {
    //                /**
    //                 * default is =, all terms must be next to eachother.
    //                 * <> uses = as its term query.
    //                 * exact is a phrase query
    //                 */
    //                PhraseQuery phraseQuery = new PhraseQuery();
    //                StringTokenizer tokenizer = new StringTokenizer(value, " ");
    //                while (tokenizer.hasMoreTokens()) {
    //                    String curValue = tokenizer.nextToken();
    //                    phraseQuery.add(new Term(index, curValue));
    //                }
    //                termQuery = phraseQuery;
    //
    //            } else if(relation.equals("any")) {
    //                /**
    //                 * any is an implicit OR
    //                 */
    //                termQuery = new BooleanQuery();
    //                StringTokenizer tokenizer = new StringTokenizer(value, " ");
    //                while (tokenizer.hasMoreTokens()) {
    //                    String curValue = tokenizer.nextToken();
    //                    Query subSubQuery = createTermQuery(cqlIndexName, curValue, relation);
    //                    OrQuery((BooleanQuery) termQuery, subSubQuery);
    //                }
    //
    //            } else if (relation.equals("all")) {
    //                /**
    //                 * any is an implicit AND
    //                 */
    //                termQuery = new BooleanQuery();
    //                StringTokenizer tokenizer = new StringTokenizer(value, " ");
    //                while (tokenizer.hasMoreTokens()) {
    //                    String curValue = tokenizer.nextToken();
    //                    Query subSubQuery = createTermQuery(cqlIndexName, curValue, relation);
    //                    AndQuery((BooleanQuery) termQuery, subSubQuery);
    //                }
    //            }
    //
    //        }
    //
    //        return termQuery;
    //    }
    //
    //    /**
    //     * Join the two queries together with boolean AND
    //     * @param query
    //     * @param query2
    //     */
    //    void AndQuery(BooleanQuery query, Query query2) {
    //        /**
    //         * required = true (must match sub query)
    //         * prohibited = false (does not need to NOT match sub query)
    //         */
    //        query.add(query2, BooleanClause.Occur.MUST);
    //    }
    //
    //    void OrQuery(BooleanQuery query, Query query2) {
    //        /**
    //         * required = false (does not need to match sub query)
    //         * prohibited = false (does not need to NOT match sub query)
    //         */
    //        query.add(query2, BooleanClause.Occur.SHOULD);
    //    }
    //
    //    void NotQuery(BooleanQuery query, Query query2) {
    //        /**
    //         * required = false (does not need to match sub query)
    //         * prohibited = true (must not match sub query)
    //         */
    //        query.add(query2, BooleanClause.Occur.MUST_NOT);
    //    }
    //
    //    void dumpQueryTree(CQLNode node) {
    //        if(node instanceof CQLBooleanNode) {
    //            CQLBooleanNode cbn=(CQLBooleanNode)node;
    //            dumpQueryTree(cbn.left);
    //            if(node instanceof CQLAndNode)
    //                log.info(" AND ");
    //            else if(node instanceof CQLNotNode)
    //                log.info(" NOT ");
    //            else if(node instanceof CQLOrNode)
    //                log.info(" OR ");
    //            else log.info(" UnknownBoolean("+cbn+") ");
    //            dumpQueryTree(cbn.right);
    //        }
    //        else if(node instanceof CQLTermNode) {
    //            CQLTermNode ctn=(CQLTermNode)node;
    //            log.info("term(qualifier=\""+ctn.getQualifier()+"\" relation=\""+
    //                ctn.getRelation().getBase()+"\" term=\""+ctn.getTerm()+"\")");
    //        }
    //        else log.info("UnknownCQLNode("+node+")");
    //    }
}