org.openrdf.sail.lucene.LuceneQueryIterator.java Source code

Java tutorial

Introduction

Here is the source code for org.openrdf.sail.lucene.LuceneQueryIterator.java

Source

/*
 * Copyright Aduna (http://www.aduna-software.com/) (c) 2007.
 *
 * Licensed under the Aduna BSD-style license.
 */
package org.openrdf.sail.lucene;

import java.io.IOException;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.Vector;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.openrdf.cursor.Cursor;
import org.openrdf.model.Literal;
import org.openrdf.model.Resource;
import org.openrdf.model.Value;
import org.openrdf.model.impl.LiteralImpl;
import org.openrdf.model.vocabulary.XMLSchema;
import org.openrdf.query.BindingSet;
import org.openrdf.query.algebra.QueryModel;
import org.openrdf.query.algebra.SingletonSet;
import org.openrdf.query.algebra.StatementPattern;
import org.openrdf.query.algebra.TupleExpr;
import org.openrdf.query.algebra.evaluation.QueryBindingSet;
import org.openrdf.sail.SailConnection;
import org.openrdf.store.StoreException;

/**
 * The LuceneQueryIterator iterates over all permutations of the hits of each lucene query and
 * binds the respective variables to the respective values. The underlying sail evaluates this more
 * specific query against its store. This class contains code that was removed from the
 * LuceneSailConnection class.
 * @author Enrico Minack
 */
public class LuceneQueryIterator implements Cursor<BindingSet> {
    private final Logger log = LoggerFactory.getLogger(this.getClass());

    private final LuceneIndex index; // the lucene index
    private final SailConnection sailConn; // the wrapped sail connection
    private final Set<QuerySpec> queries; // the lucene queries

    private final QueryModel query; // the query tree
    private final BindingSet bindings; // the initial binding set (as coming from the query)
    private final boolean includeInferred; // the include-inferred statements flag

    private final Permutation permutations = new Permutation(); // provides all permutations of the hits of all lucene queries

    private Cursor<? extends BindingSet> nextBindingSets; // the results iterator from the underlying sail 
    private BindingSet nextBindingSet = null; // the next result from the results iterator
    private QueryBindingSet derivedBindings = null; // the bindingset build up by this layer, taken to extend solutions from lower layer

    private final Map<QuerySpec, TopDocs> hits = new HashMap<QuerySpec, TopDocs>(); // maps the lucene query to its hits

    private final Formatter formatter = new SimpleHTMLFormatter();
    private final Map<QuerySpec, Highlighter> highlighters = new HashMap<QuerySpec, Highlighter>();

    public LuceneQueryIterator(LuceneIndex index, SailConnection sailConn, Set<QuerySpec> queries, QueryModel query,
            BindingSet bindings, boolean includeInferred) throws StoreException {
        this.index = index;
        this.sailConn = sailConn;
        this.queries = queries;

        this.query = query;
        this.bindings = bindings;
        this.includeInferred = includeInferred;

        // initiate the evaluation
        evaluateLuceneQueries();
    }

    public boolean hasNext() {
        findNextBindingSet();
        return nextBindingSet != null;
    }

    public BindingSet next() {
        if (!hasNext())
            return null;
        //         throw new NoSuchElementException();

        BindingSet result = nextBindingSet;
        this.nextBindingSet = null;
        return result;
    }

    public void remove() {

    }

    public void close() {
        hits.clear();
    }

    private void evaluateLuceneQueries() {
        // TODO: optimize lucene queries here
        // - if they refer to the same subject, merge them into one lucene query
        // - multiple different property constraints can be put into the lucene query string (escape colons here)
        for (QuerySpec query : this.queries) {
            if (this.hits.containsKey(query)) {
                log.warn("there are multiple lucene queries bound to the same resource!");
                log.warn("These queries should be merged here!");
                log.warn("This is not implemented yet!");
                log.warn("So this lucene query is ignored!");
                continue;
            }

            // evaluate the lucene query and put the hits into the map
            TopDocs hits = evaluate(query);
            this.hits.put(query, hits);

            // add the size of hits to the permutations
            this.permutations.addDigit(hits.totalHits);

            // finally remove the evaluated lucene query from the query tree
            removePatterns(query, this.query);
        }
    }

    /**
     * Evaluates one Lucene Query. It distinguishes between two cases,
     * the one where no subject is given and the one were it is given.
     * @param query the lucene query to evaluate
     * @return the lucene hits
     */
    private TopDocs evaluate(QuerySpec query) {
        // get the subject of the query
        Resource subject = query.getSubject();

        try {
            // parse the query string to a lucene query
            Query lucenequery = this.index.parseQuery(query.getQueryString(), query.getPropertyURI());

            // if the query requests for the snippet, create a highlighter using this query
            if (query.getSnippetVariableName() != null) {
                Highlighter highlighter = new Highlighter(formatter, new QueryScorer(lucenequery));
                this.highlighters.put(query, highlighter);
            }

            // distinguish the two cases of subject == null
            if (subject == null) {
                return this.index.search(lucenequery);
            } else {
                return this.index.search(subject, lucenequery);
            }
        } catch (Exception e) {
            log.error("There was a problem evaluating query '" + query.getQueryString() + "' for property '"
                    + query.getPropertyURI() + "!", e);
        }

        return null;
    }

    /**
     * Tries to find the next binding set (result). If no results iterator is existing,
     * it tries to get one by calling {@link findNextBindingSets()}. If it succeeds,
     * then this.nextBindingSet is not null, otherwise it is null.
     */
    private void findNextBindingSet() {
        if (this.nextBindingSet != null)
            return;

        while (true) {
            if (this.nextBindingSets == null) {
                // fill nextBindingSets
                if (!findNextBindingSets())
                    return;
            } else {
                // get next BindingSet
                this.nextBindingSet = getNextBindingSet();
            }

            if (this.nextBindingSet != null)
                return;
        }

    }

    /**
     * Tries to find the next Bindings Set (results iterator) if there is none currently.
     * It prepares the next permutation of hits, binds the respective variables and
     * evaluates the query tree against the underlying sail. The results iterator is then
     * stored as this.nextBindingSets. If this method fails to provide a next bindings set,
     * it returns false.
     * @return true if it succeeded, false otherwise
     */
    private boolean findNextBindingSets() {
        // if there is still a next bindings set, we can safely return
        if (this.nextBindingSets != null)
            return true;

        // check if more permutations are available
        if (this.permutations.isInvalid())
            return false;

        // get the current permutation and the queries
        Vector<Integer> permutation = this.permutations.val();
        Iterator<QuerySpec> queries = this.queries.iterator();

        // this takes the new bindings
        derivedBindings = new QueryBindingSet();

        // for each digit ...
        for (Integer id : permutation) {
            // get the respective query (the query this digit stands for)
            if (!queries.hasNext()) {
                log.warn("There are more permutation digits then there are query specs!");
                return false; // TODO: do we want to return true or false here?
            }
            QuerySpec query = queries.next();

            // if no hits are available, this binding set failed
            if (id <= 0)
                return false;

            // get the hit indicated by the digit value
            Document doc = getDoc(query, id - 1);
            if (doc == null)
                return false; // TODO: do we want to return true or false here?

            // get the score of the hit
            float score = getScore(query, id - 1);

            // bind the respective variables
            String matchVar = query.getMatchesVariableName();
            if (matchVar != null) {
                Resource resource = this.index.getResource(doc);
                Value existing = derivedBindings.getValue(matchVar);
                // if the existing binding contradicts the current binding, than we can safely skip this permutation
                if ((existing != null) && (!existing.stringValue().equals(resource.stringValue()))) {
                    // invalidate the binding
                    derivedBindings = null;

                    // and exit the loop
                    break;
                }
                derivedBindings.addBinding(matchVar, resource);
            }

            if ((query.getScoreVariableName() != null) && (score > 0.0f))
                derivedBindings.addBinding(query.getScoreVariableName(), scoreToLiteral(score));

            if (query.getSnippetVariableName() != null) {
                // get the highlighter of this query
                Highlighter highlighter = this.highlighters.get(query);
                if (highlighter != null) {
                    // extract snippets from
                    // Lucene's query results
                    StringBuffer result = new StringBuffer();

                    // limit to the queried field, if there was one
                    String fieldname = LuceneIndex.TEXT_FIELD_NAME;
                    if (query.getPropertyURI() != null)
                        fieldname = query.getPropertyURI().toString();
                    Field[] fields = doc.getFields(fieldname);
                    int lastLen = 0;
                    for (Field field : fields) {
                        String text = field.stringValue();
                        TokenStream tokenStream = this.index.getAnalyzer().tokenStream(LuceneIndex.TEXT_FIELD_NAME,
                                new StringReader(text));
                        String next = "";
                        try {
                            next = highlighter.getBestFragments(tokenStream, text, 2, "...");
                        } catch (IOException e) {
                            log.error("IOException while getting snippet for filed " + field.name() + " for query\n"
                                    + query, e);
                            continue;
                        } catch (InvalidTokenOffsetsException e) {
                            log.error("InvalidTokenOffsetsException while getting snippet for filed " + field.name()
                                    + " for query\n" + query, e);
                            continue;
                        }

                        if (next.length() > 0) {
                            if (lastLen > 0) {
                                result.append("...");
                            }
                            lastLen = next.length();
                            result.append(next);
                        }
                    }
                    derivedBindings.addBinding(query.getSnippetVariableName(), new LiteralImpl(result.toString()));
                } else {
                    log.warn(
                            "Lucene Query requests snippet, but no highlighter was generated for it, no snippets will be generated!\n{}",
                            query);
                }
            }
        }

        // the derived bindings are used to extend the results of the following evaluation (the results do not contain the given bindings)
        // the bindings given to the LuceneSail shall not be included in its results, so we add them here, but won't include them in the results
        QueryBindingSet evaluateBindings = new QueryBindingSet(this.bindings);
        evaluateBindings.addAll(derivedBindings);

        // finally, evaluate the bindings against the underlying store
        try {
            if (derivedBindings != null) {
                this.nextBindingSets = this.sailConn.evaluate(query, derivedBindings, includeInferred);
            }
        } catch (Exception e) {
            log.error("Provided sail connection could not evaluate tuple expression!", e);
            return false; // TODO: do we want to return true or false here?
        }

        // go to the next permutation, if this was the last one,
        // invalidate the permutation instance, which will be check
        // at the beginning of the next call of the findNextBindingSets method
        if (this.permutations.next()) {
            this.permutations.invalidate();
        }

        // we succeeded
        return true;
    }

    /**
     * Provides the next binding set (result) of the current results iterator
     * @return a binding set, or null if it fails
     */
    private BindingSet getNextBindingSet() {
        try {
            BindingSet possibleSolution = this.nextBindingSets.next();
            if (possibleSolution != null) {
                QueryBindingSet solution = new QueryBindingSet(possibleSolution);
                // fetch the next binding set given by the underlying sail and extend it with the derived bindings
                solution.addAll(derivedBindings);
                return solution;
            } else {
                this.nextBindingSets.close();
                this.nextBindingSets = null;
            }
        } catch (StoreException e) {
            log.error("Evaluation failed:", e);
        }

        return null;
    }

    /**
     * Returns the lucene hit with the given id of the respective lucene query
     * @param query the lucene query
     * @param id the id of the hit to return
     * @return the requested hit, or null if it fails
     */
    private Document getDoc(QuerySpec query, int id) {
        try {
            return index.getIndexReader().document(this.hits.get(query).scoreDocs[id].doc);
        } catch (CorruptIndexException e) {
            log.error("The index seems to be corrupted:", e);
            return null;
        } catch (IOException e) {
            log.error("Could not read from index:", e);
            return null;
        }
    }

    /**
     * Provides the score of the hit with the given id for the given lucenen query.
     * @param query the lucene query
     * @param id the id of the score to return
     * @return the requested score, or 0.0f if it fails
     */
    private float getScore(QuerySpec query, int id) {
        return this.hits.get(query).scoreDocs[id].score;
    }

    /**
     * Returns a score value encoded as a Literal.
     * @param score the float score to convert
     * @return the score as a literal
     */
    private Literal scoreToLiteral(float score) {
        return new LiteralImpl(String.valueOf(score), XMLSchema.FLOAT);
    }

    /**
     * Removes the given pattern from the given tuple expression by
     * replacing it with an singleton set.
     * @param pattern the pattern to remove
     * @param tupleExpr the tuple expression in which the pattern is to be removed
     */
    private void remove(StatementPattern pattern, TupleExpr tupleExpr) {
        if (pattern != null) {
            pattern.replaceWith(new SingletonSet());
        }
    }

    /**
     * Removes all StatementPatterns occurring in the given query from the given tuple expression.
     * @param query the query to remove from the tuple expression
     * @param query2.getTupleExpr() the tuple expression in which the query is to be removed
     */
    private void removePatterns(QuerySpec query, QueryModel query2) {
        remove(query.getMatchesPattern(), query2.getTupleExpr());
        remove(query.getQueryPattern(), query2.getTupleExpr());
        remove(query.getScorePattern(), query2.getTupleExpr());
        remove(query.getPropertyPattern(), query2.getTupleExpr());
        remove(query.getSnippetPattern(), query2.getTupleExpr());
        remove(query.getTypePattern(), query2.getTupleExpr());
        // GEO extensions kreuzverweis
        remove(query.getFromPattern(), query2.getTupleExpr());
        remove(query.getToPattern(), query2.getTupleExpr());
        remove(query.getGeoTolerancePattern(), query2.getTupleExpr());
        remove(query.getGeoLatPattern(), query2.getTupleExpr());
        remove(query.getGeoLongPattern(), query2.getTupleExpr());
        // GEO extensions kreuzverweis
    }

}