psidev.psi.mi.search.engine.impl.AbstractSearchEngine.java Source code

Java tutorial

Introduction

Here is the source code for psidev.psi.mi.search.engine.impl.AbstractSearchEngine.java

Source

/**
 * Copyright 2007 The European Bioinformatics Institute, and others.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package psidev.psi.mi.search.engine.impl;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.*;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import psidev.psi.mi.search.SearchResult;
import psidev.psi.mi.search.engine.SearchEngine;
import psidev.psi.mi.search.engine.SearchEngineException;
import psidev.psi.mi.search.util.DefaultDocumentBuilder;
import psidev.psi.mi.search.util.DocumentBuilder;
import psidev.psi.mi.tab.model.BinaryInteraction;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

/**
 * A Search Engine based on lucene
 *
 * @author Bruno Aranda (baranda@ebi.ac.uk)
 * @version $Id$
 */
public abstract class AbstractSearchEngine<T extends BinaryInteraction> implements SearchEngine<T> {
    private static final Log log = LogFactory.getLog(AbstractSearchEngine.class);

    private static final String WILDCARD = "*";

    protected Directory indexDirectory;

    private static int MAX_TOP_RESULTS = 200;

    /**
     * IndexSearcher is thread-safe, and the api recommends to open only one
     * and use it for all searches.
     * @see <a href="http://lucene.apache.org/java/2_9_0/api/all/org/apache/lucene/search/IndexSearcher.html">IndexSearcher</a>
     *
     */
    protected IndexSearcher indexSearcher;

    public AbstractSearchEngine(Directory indexDirectory) throws IOException {
        this(indexDirectory, null);
    }

    public AbstractSearchEngine(String indexDirectory) throws IOException {
        this(indexDirectory, null);
    }

    public AbstractSearchEngine(File indexDirectory) throws IOException {
        this(indexDirectory, null);
    }

    public AbstractSearchEngine(Directory indexDirectory, IndexWriter indexWriter) throws IOException {
        if (indexDirectory == null) {
            throw new NullPointerException("indexDirectory cannot be null");
        }

        this.indexDirectory = indexDirectory;

        try {
            IndexReader reader = IndexReader.open(indexDirectory);
            this.indexSearcher = new IndexSearcher(reader);
        }
        // directory is empty, needs to create the segment files manually because of a bug in lucene 3.6
        catch (IndexNotFoundException e) {
            if (indexWriter == null) {
                IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_30,
                        new StandardAnalyzer(Version.LUCENE_30));
                config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
                indexWriter = new IndexWriter(indexDirectory, config);
            }

            indexWriter.commit();

            try {
                IndexReader reader = IndexReader.open(indexDirectory);
                this.indexSearcher = new IndexSearcher(reader);
            } catch (Exception e2) {
                throw new ExceptionInInitializerError(e);
            }
        } catch (Exception e) {
            throw new ExceptionInInitializerError(e);
        }
    }

    public AbstractSearchEngine(String indexDirectory, IndexWriter indexWriter) throws IOException {
        this(FSDirectory.open(new File(indexDirectory)), indexWriter);
    }

    public AbstractSearchEngine(File indexDirectory, IndexWriter indexWriter) throws IOException {
        this(FSDirectory.open(indexDirectory), indexWriter);
    }

    public void close() {
        closeIndexSearcher();
    }

    protected void closeIndexSearcher() {
        if (indexSearcher == null) {
            return;
        }
        try {
            indexSearcher.close();
        } catch (IOException e) {
            throw new SearchEngineException(e);
        }
    }

    protected void closeIndexReader(IndexReader indexReader) {
        if (indexReader == null) {
            return;
        }
        try {
            indexReader.close();
        } catch (IOException e) {
            throw new SearchEngineException(e);
        }
    }

    public SearchResult<T> search(String searchQuery, Integer firstResult, Integer maxResults)
            throws SearchEngineException {
        return search(searchQuery, firstResult, maxResults, null);
    }

    public SearchResult<T> search(Query searchQuery, Integer firstResult, Integer maxResults)
            throws SearchEngineException {
        return search(searchQuery, firstResult, maxResults, null);
    }

    public Query createQueryFor(String query) {
        if (query == null) {
            throw new NullPointerException("searchQuery cannot be null");
        }

        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
        QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_30, getSearchFields(), analyzer);
        Query queryResult = null;
        try {
            queryResult = parser.parse(query);
        } catch (ParseException e) {
            throw new SearchEngineException("Problem creating lucene query from string: " + query, e);
        }

        return queryResult;
    }

    public SearchResult<T> search(String searchQuery, Integer firstResult, Integer maxResults, Sort sort)
            throws SearchEngineException {
        if (searchQuery == null) {
            throw new NullPointerException("searchQuery cannot be null");
        }

        if (searchQuery.trim().equals(WILDCARD)) {
            return searchAll(firstResult, maxResults);
        }

        Query query = createQueryFor(searchQuery);

        return search(query, firstResult, maxResults, sort);
    }

    public SearchResult<T> search(Query query, Integer firstResult, Integer maxResults, Sort sort)
            throws SearchEngineException {
        if (log.isDebugEnabled())
            log.debug("Searching=\"" + query + "\" (first=" + firstResult + "/max=" + maxResults + ")");

        if (firstResult == null)
            firstResult = 0;
        if (maxResults == null)
            maxResults = MAX_TOP_RESULTS;

        long startTime = System.currentTimeMillis();

        TopDocs hits = null;

        int totalCount = 0;

        try {
            if (maxResults == 0) {
                TotalHitCountCollector countCollector = new TotalHitCountCollector();
                indexSearcher.search(query, countCollector);
                totalCount = countCollector.getTotalHits();
            } else if (sort != null) {
                hits = indexSearcher.search(query, maxResults + firstResult, sort);
            } else {
                hits = indexSearcher.search(query, maxResults + firstResult);
            }

            if (log.isDebugEnabled())
                log.debug("\tTime: " + (System.currentTimeMillis() - startTime) + "ms");
        } catch (Exception e) {
            throw new SearchEngineException(e);
        }

        if (hits != null) {
            totalCount = hits.totalHits;

            if (totalCount < firstResult) {
                if (log.isDebugEnabled())
                    log.debug("\tNo hits. No results returned");

                return new SearchResult(Collections.EMPTY_LIST, totalCount, firstResult, maxResults, query);
            }

            int maxIndex = Math.min(totalCount, firstResult + maxResults);

            if (log.isDebugEnabled())
                log.debug("\tHits: " + hits.totalHits + ". Will return from " + firstResult + " to " + maxIndex);

            List<T> dataObjects = new ArrayList<T>();

            ScoreDoc[] scoreDocs = hits.scoreDocs;
            for (int i = firstResult; i < maxIndex; i++) {
                try {
                    Document doc = indexSearcher.doc(scoreDocs[i].doc);
                    T data = (T) createDocumentBuilder().createData(doc);
                    dataObjects.add(data);
                } catch (Exception e) {
                    throw new SearchEngineException(e);
                }
            }
            return new SearchResult<T>(dataObjects, totalCount, firstResult, maxResults, query);
        } else {
            return new SearchResult(Collections.EMPTY_LIST, totalCount, firstResult, maxResults, query);
        }
    }

    protected DocumentBuilder createDocumentBuilder() {
        return new DefaultDocumentBuilder();
    }

    public SearchResult<T> searchAll(Integer firstResult, Integer maxResults) throws SearchEngineException {
        if (firstResult == null)
            firstResult = 0;
        if (maxResults == null)
            maxResults = MAX_TOP_RESULTS;

        IndexReader reader = indexSearcher.getIndexReader();

        int totalCount = reader.maxDoc();

        if (maxResults == 0) {
            return new SearchResult(Collections.EMPTY_LIST, totalCount, firstResult, maxResults,
                    new WildcardQuery(new Term("", "*")));
        }

        // this is a hack to ignore any header introduced in the index by mistake (first development versions)
        if (reader.isDeleted(0)) {
            firstResult++;
            totalCount--;
        }

        if (firstResult > totalCount) {
            //            closeIndexReader(reader);
            return new SearchResult(Collections.EMPTY_LIST, totalCount, firstResult, maxResults,
                    new WildcardQuery(new Term("", "*")));
        }

        int maxIndex = Math.min(totalCount, firstResult + maxResults);

        List<T> dataObjects = new ArrayList<T>();

        for (int i = firstResult; i < maxIndex; i++) {
            try {
                Document doc = reader.document(i);
                T data = (T) createDocumentBuilder().createData(doc);
                dataObjects.add(data);
            } catch (Exception e) {
                //                closeIndexReader(reader);
                throw new SearchEngineException(e);
            }
        }

        //        closeIndexReader(reader);
        return new SearchResult(dataObjects, totalCount, firstResult, maxResults,
                new WildcardQuery(new Term("", "*")));
    }

}