org.intermine.api.lucene.KeywordSearch.java Source code

Java tutorial

Introduction

Here is the source code for org.intermine.api.lucene.KeywordSearch.java

Source

package org.intermine.api.lucene;

/*
 * Copyright (C) 2002-2016 FlyMine
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  See the LICENSE file for more
 * information or http://www.gnu.org/copyleft/lesser.html.
 *
 */

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.sql.SQLException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
import java.util.Vector;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;

import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.FieldOption;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.queryParser.QueryParser.Operator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermsFilter;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.intermine.api.InterMineAPI;
import org.intermine.api.data.Objects;
import org.intermine.metadata.ClassDescriptor;
import org.intermine.metadata.FieldDescriptor;
import org.intermine.model.InterMineObject;
import org.intermine.modelproduction.MetadataManager;
import org.intermine.modelproduction.MetadataManager.LargeObjectOutputStream;
import org.intermine.objectstore.ObjectStore;
import org.intermine.objectstore.ObjectStoreException;
import org.intermine.objectstore.intermine.ObjectStoreInterMineImpl;
import org.intermine.sql.Database;
import org.intermine.util.ObjectPipe;

import com.browseengine.bobo.api.BoboBrowser;
import com.browseengine.bobo.api.BoboIndexReader;
import com.browseengine.bobo.api.Browsable;
import com.browseengine.bobo.api.BrowseException;
import com.browseengine.bobo.api.BrowseHit;
import com.browseengine.bobo.api.BrowseRequest;
import com.browseengine.bobo.api.BrowseResult;
import com.browseengine.bobo.api.BrowseSelection;
import com.browseengine.bobo.api.FacetAccessible;
import com.browseengine.bobo.api.FacetSpec;
import com.browseengine.bobo.api.FacetSpec.FacetSortSpec;
import com.browseengine.bobo.facets.FacetHandler;
import com.browseengine.bobo.facets.impl.MultiValueFacetHandler;
import com.browseengine.bobo.facets.impl.PathFacetHandler;
import com.browseengine.bobo.facets.impl.SimpleFacetHandler;

/**
 * Allows for full-text searches over all metadata using the apache lucene
 * engine.
 *
 * Main entry point: contains methods for creating indices, restoring saved indices and
 * running searches over them.
 *
 * @author nils
 */

public final class KeywordSearch {
    private static final String LUCENE_INDEX_DIR = "keyword_search_index";

    /**
     * maximum number of hits returned
     */
    public static final int MAX_HITS = 500;

    /**
     * maximum number of items to be displayed on a page
     */
    public static final int PER_PAGE = 100;

    private static final Logger LOG = Logger.getLogger(KeywordSearch.class);

    private static IndexReader reader = null;
    private static BoboIndexReader boboIndexReader = null;
    private static ObjectPipe<Document> indexingQueue = new ObjectPipe<Document>(100000);
    private static LuceneIndexContainer index = null;

    private static Properties properties = null;
    private static String tempDirectory = null;
    private static Map<Class<? extends InterMineObject>, String[]> specialReferences;
    private static Set<Class<? extends InterMineObject>> ignoredClasses;
    private static Map<Class<? extends InterMineObject>, Set<String>> ignoredFields;
    private static Map<ClassDescriptor, Float> classBoost;
    private static Vector<KeywordSearchFacetData> facets;
    private static boolean debugOutput;
    private static Map<String, String> attributePrefixes = null;

    private KeywordSearch() {
        //don't
    }

    @SuppressWarnings("unchecked")
    private static synchronized void parseProperties(ObjectStore os) {
        if (properties != null) {
            return;
        }

        specialReferences = new HashMap<Class<? extends InterMineObject>, String[]>();
        ignoredClasses = new HashSet<Class<? extends InterMineObject>>();
        classBoost = new HashMap<ClassDescriptor, Float>();
        ignoredFields = new HashMap<Class<? extends InterMineObject>, Set<String>>();
        facets = new Vector<KeywordSearchFacetData>();
        debugOutput = true;

        // load config file to figure out special classes
        String configFileName = "keyword_search.properties";
        ClassLoader classLoader = KeywordSearch.class.getClassLoader();
        InputStream configStream = classLoader.getResourceAsStream(configFileName);
        if (configStream != null) {
            properties = new Properties();
            try {
                properties.load(configStream);

                for (Map.Entry<Object, Object> entry : properties.entrySet()) {
                    String key = (String) entry.getKey();
                    String value = ((String) entry.getValue()).trim();

                    if ("index.ignore".equals(key) && !StringUtils.isBlank(value)) {
                        String[] ignoreClassNames = value.split("\\s+");

                        for (String className : ignoreClassNames) {
                            ClassDescriptor cld = os.getModel().getClassDescriptorByName(className);

                            if (cld == null) {
                                LOG.error("Unknown class in config file: " + className);
                            } else {
                                addCldToIgnored(ignoredClasses, cld);
                            }
                        }
                    } else if ("index.ignore.fields".equals(key) && !StringUtils.isBlank(value)) {
                        String[] ignoredPaths = value.split("\\s+");

                        for (String ignoredPath : ignoredPaths) {
                            if (StringUtils.countMatches(ignoredPath, ".") != 1) {
                                LOG.error("Fields to ignore specified by 'index.ignore.fields'"
                                        + " should contain Class.field, e.g. Company.name");
                            } else {
                                String clsName = ignoredPath.split("\\.")[0];
                                String fieldName = ignoredPath.split("\\.")[1];

                                ClassDescriptor cld = os.getModel().getClassDescriptorByName(clsName);
                                if (cld != null) {
                                    FieldDescriptor fld = cld.getFieldDescriptorByName(fieldName);
                                    if (fld != null) {
                                        addToIgnoredFields(ignoredFields, cld, fieldName);
                                    } else {
                                        LOG.error("Field name '" + fieldName + "' not found for" + " class '"
                                                + clsName + "' specified in" + "'index.ignore.fields'");
                                    }
                                } else {
                                    LOG.error("Class name specified in 'index.ignore.fields'" + " not found: "
                                            + clsName);
                                }
                            }
                        }
                    } else if (key.startsWith("index.references.")) {
                        String classToIndex = key.substring("index.references.".length());
                        ClassDescriptor cld = os.getModel().getClassDescriptorByName(classToIndex);
                        if (cld != null) {
                            Class<? extends InterMineObject> cls = (Class<? extends InterMineObject>) cld.getType();

                            // special fields (references to follow) come as
                            // a
                            // space-separated list
                            String[] specialFields;
                            if (!StringUtils.isBlank(value)) {
                                specialFields = value.split("\\s+");
                            } else {
                                specialFields = null;
                            }

                            specialReferences.put(cls, specialFields);
                        } else {
                            LOG.error("keyword_search.properties: classDescriptor for '" + classToIndex
                                    + "' not found!");
                        }
                    } else if (key.startsWith("index.facet.single.")) {
                        String facetName = key.substring("index.facet.single.".length());
                        String facetField = value;
                        facets.add(
                                new KeywordSearchFacetData(facetField, facetName, KeywordSearchFacetType.SINGLE));
                    } else if (key.startsWith("index.facet.multi.")) {
                        String facetName = key.substring("index.facet.multi.".length());
                        String facetField = value;
                        facets.add(new KeywordSearchFacetData(facetField, facetName, KeywordSearchFacetType.MULTI));
                    } else if (key.startsWith("index.facet.path.")) {
                        String facetName = key.substring("index.facet.path.".length());
                        String[] facetFields = value.split(" ");
                        facets.add(new KeywordSearchFacetData(facetFields, facetName, KeywordSearchFacetType.PATH));
                    } else if (key.startsWith("index.boost.")) {
                        String classToBoost = key.substring("index.boost.".length());
                        ClassDescriptor cld = os.getModel().getClassDescriptorByName(classToBoost);
                        if (cld != null) {
                            classBoost.put(cld, Float.valueOf(value));
                        } else {
                            LOG.error("keyword_search.properties: classDescriptor for '" + classToBoost
                                    + "' not found!");
                        }
                    } else if (key.startsWith("index.prefix")) {
                        String classAndAttribute = key.substring("index.prefix.".length());
                        addAttributePrefix(classAndAttribute, value);
                    } else if ("search.debug".equals(key) && !StringUtils.isBlank(value)) {
                        debugOutput = "1".equals(value) || "true".equals(value.toLowerCase())
                                || "on".equals(value.toLowerCase());
                    }

                    tempDirectory = properties.getProperty("index.temp.directory", "");
                }
            } catch (IOException e) {
                LOG.error("keyword_search.properties: errow while loading file '" + configFileName + "'", e);
            }
        } else {
            LOG.error("keyword_search.properties: file '" + configFileName + "' not found!");
        }

        LOG.debug("Indexing - Ignored classes:");
        for (Class<? extends InterMineObject> class1 : ignoredClasses) {
            LOG.debug("- " + class1.getSimpleName());
        }

        LOG.debug("Indexing - Special References:");
        for (Entry<Class<? extends InterMineObject>, String[]> specialReference : specialReferences.entrySet()) {
            LOG.debug("- " + specialReference.getKey() + " = " + Arrays.toString(specialReference.getValue()));
        }

        LOG.debug("Indexing - Facets:");
        for (KeywordSearchFacetData facet : facets) {
            LOG.debug("- field = " + facet.getField() + ", name = " + facet.getName() + ", type = "
                    + facet.getType().toString());
        }

        LOG.debug("Indexing with and without attribute prefixes:");
        if (attributePrefixes != null) {
            for (String clsAndAttribute : attributePrefixes.keySet()) {
                LOG.debug("- class and attribute: " + clsAndAttribute + " with prefix: "
                        + attributePrefixes.get(clsAndAttribute));
            }
        }

        LOG.info("Search - Debug mode: " + debugOutput);
        LOG.info("Indexing - Temp Dir: " + tempDirectory);
    }

    private static void addAttributePrefix(String classAndAttribute, String prefix) {
        if (StringUtils.isBlank(classAndAttribute) || classAndAttribute.indexOf(".") == -1
                || StringUtils.isBlank(prefix)) {
            LOG.warn("Invalid search.prefix configuration: '" + classAndAttribute + "' = '" + prefix
                    + "'. Should be className.attributeName = prefix.");
        } else {
            if (attributePrefixes == null) {
                attributePrefixes = new HashMap<String, String>();
            }
            attributePrefixes.put(classAndAttribute, prefix);
        }
    }

    /**
     * loads or creates the lucene index
     * @param im API for accessing object store
     * @param path path to store the fsdirectory in
     */
    public static synchronized void initKeywordSearch(InterMineAPI im, String path) {
        try {

            if (index == null) {
                // try to load index from database first
                index = loadIndexFromDatabase(im.getObjectStore(), path);
            }

            if (index == null) {
                LOG.error("lucene index missing!");
                return;
            }

            if (properties == null) {
                parseProperties(im.getObjectStore());
            }

            if (reader == null) {
                reader = IndexReader.open(index.getDirectory(), true);
            }

            if (boboIndexReader == null) {
                // prepare faceting
                HashSet<FacetHandler<?>> facetHandlers = new HashSet<FacetHandler<?>>();
                facetHandlers.add(new SimpleFacetHandler("Category"));
                for (KeywordSearchFacetData facet : facets) {
                    if (facet.getType().equals(KeywordSearchFacetType.MULTI)) {
                        facetHandlers.add(new MultiValueFacetHandler(facet.getField()));
                    } else if (facet.getType().equals(KeywordSearchFacetType.PATH)) {
                        facetHandlers.add(new PathFacetHandler("path_" + facet.getName().toLowerCase()));
                    } else {
                        facetHandlers.add(new SimpleFacetHandler(facet.getField()));
                    }
                }

                boboIndexReader = BoboIndexReader.getInstance(reader, facetHandlers);

                LOG.debug("Fields:" + Arrays.toString(boboIndexReader.getFieldNames(FieldOption.ALL).toArray()));
                LOG.debug("Indexed fields:"
                        + Arrays.toString(boboIndexReader.getFieldNames(FieldOption.INDEXED).toArray()));
            }
        } catch (CorruptIndexException e) {
            LOG.error(e);
        } catch (IOException e) {
            LOG.error(e);
        }
    }

    private static void writeObjectToDB(ObjectStore os, String key, Object object)
            throws IOException, SQLException {
        LOG.debug("Saving stream to database...");
        Database db = ((ObjectStoreInterMineImpl) os).getDatabase();

        LargeObjectOutputStream streamOut = null;
        GZIPOutputStream gzipStream = null;
        ObjectOutputStream objectStream = null;

        try {
            streamOut = MetadataManager.storeLargeBinary(db, key);
            gzipStream = new GZIPOutputStream(new BufferedOutputStream(streamOut));
            objectStream = new ObjectOutputStream(gzipStream);

            LOG.debug("GZipping and serializing object...");
            objectStream.writeObject(object);
        } finally {
            if (objectStream != null) {
                objectStream.flush();
                objectStream.close();
            }
            if (gzipStream != null) {
                gzipStream.finish();
                gzipStream.flush();
                gzipStream.close();
            }
            if (streamOut != null) {
                streamOut.close();
            }
        }

    }

    /**
     * writes index and associated directory to the database using the metadatamanager.
     *
     * @param os intermine objectstore
     * @param classKeys map of classname to key field descriptors (from InterMineAPI)
     */
    public static void saveIndexToDatabase(ObjectStore os, Map<String, List<FieldDescriptor>> classKeys) {
        try {
            if (index == null) {
                createIndex(os, classKeys);
            }

            LOG.debug("Deleting previous search index dirctory blob from db...");
            long startTime = System.currentTimeMillis();
            Database db = ((ObjectStoreInterMineImpl) os).getDatabase();
            boolean blobExisted = MetadataManager.deleteLargeBinary(db, MetadataManager.SEARCH_INDEX);
            if (blobExisted) {
                LOG.debug("Deleting previous search index blob from db took: "
                        + (System.currentTimeMillis() - startTime) + ".");
            } else {
                LOG.debug("No previous search index blob found in db");
            }

            LOG.debug("Saving search index information to database...");
            writeObjectToDB(os, MetadataManager.SEARCH_INDEX, index);
            LOG.debug("Successfully saved search index information to database.");

            // if we have a FSDirectory we need to zip and save that separately
            if ("FSDirectory".equals(index.getDirectoryType())) {
                ZipOutputStream zipOut = null;
                final int bufferSize = 2048;

                try {
                    LOG.debug("Zipping up FSDirectory...");

                    LOG.debug("Deleting previous search index dirctory blob from db...");
                    startTime = System.currentTimeMillis();
                    blobExisted = MetadataManager.deleteLargeBinary(db, MetadataManager.SEARCH_INDEX_DIRECTORY);
                    if (blobExisted) {
                        LOG.debug("Deleting previous search index directory blob from db took: "
                                + (System.currentTimeMillis() - startTime) + ".");
                    } else {
                        LOG.debug("No previous search index directory blob found in db");
                    }
                    LargeObjectOutputStream streamOut = MetadataManager.storeLargeBinary(db,
                            MetadataManager.SEARCH_INDEX_DIRECTORY);

                    zipOut = new ZipOutputStream(streamOut);

                    byte[] data = new byte[bufferSize];

                    // get a list of files from current directory
                    File dir = ((FSDirectory) index.getDirectory()).getFile();
                    String[] files = dir.list();

                    for (int i = 0; i < files.length; i++) {
                        File file = new File(dir.getAbsolutePath() + File.separator + files[i]);
                        LOG.debug("Getting length of file: " + file.getName());
                        long fileLength = file.length();
                        LOG.debug("Zipping file: " + file.getName() + " (" + file.length() / 1024 / 1024 + " MB)");

                        FileInputStream fi = new FileInputStream(file);
                        BufferedInputStream fileInput = new BufferedInputStream(fi, bufferSize);

                        try {
                            ZipEntry entry = new ZipEntry(files[i]);
                            zipOut.putNextEntry(entry);

                            long total = fileLength / bufferSize;
                            long progress = 0;

                            int count;
                            while ((count = fileInput.read(data, 0, bufferSize)) != -1) {
                                zipOut.write(data, 0, count);
                                progress++;
                                if (progress % 1000 == 0) {
                                    LOG.debug("Written " + progress + " of " + total + " batches for file: "
                                            + file.getName());
                                }
                            }
                        } finally {
                            LOG.debug("Closing file: " + file.getName() + "...");
                            fileInput.close();
                        }
                        LOG.debug("Finished storing file: " + file.getName());
                    }
                } catch (IOException e) {
                    LOG.error("Error storing index", e);
                } finally {
                    if (zipOut != null) {
                        zipOut.close();
                    }
                }
            } else if ("RAMDirectory".equals(index.getDirectoryType())) {
                LOG.debug("Saving RAM directory to database...");
                writeObjectToDB(os, MetadataManager.SEARCH_INDEX_DIRECTORY, index.getDirectory());
                LOG.debug("Successfully saved RAM directory to database.");
            }
        } catch (IOException e) {
            LOG.error(null, e);
            throw new RuntimeException("Index creation failed: ", e);
        } catch (SQLException e) {
            LOG.error(null, e);
            throw new RuntimeException("Index creation failed: ", e);
        }
    }

    /**
     * perform a keyword search over all document metadata fields with lucene
     * @param searchString
     *            string to search for
     * @return map of document IDs with their respective scores
     * @deprecated Use runBrowseSearch instead.
     */
    @Deprecated
    public static Map<Integer, Float> runLuceneSearch(String searchString) {
        LinkedHashMap<Integer, Float> matches = new LinkedHashMap<Integer, Float>();

        String queryString = parseQueryString(searchString);

        long time = System.currentTimeMillis();

        IndexSearcher searcher = null;
        try {
            searcher = new IndexSearcher(reader);

            Analyzer analyzer = new WhitespaceAnalyzer();
            org.apache.lucene.search.Query query;

            // pass entire list of field names to the multi-field parser
            // => search through all fields
            String[] fieldNamesArray = new String[index.getFieldNames().size()];
            index.getFieldNames().toArray(fieldNamesArray);
            QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_30, fieldNamesArray, analyzer,
                    index.getFieldBoosts());
            query = queryParser.parse(queryString);

            // required to expand search terms
            query = query.rewrite(reader);
            LOG.debug("Actual query: " + query);

            TopDocs topDocs = searcher.search(query, 500);
            // Filter filter = new TermsFilter();
            // searcher.search(query, filter, collector);

            LOG.debug("Found " + topDocs.totalHits + " document(s) that matched query '" + queryString + "'");

            for (int i = 0; (i < MAX_HITS && i < topDocs.totalHits); i++) {
                Document doc = searcher.doc(topDocs.scoreDocs[i].doc);
                Integer id = Integer.valueOf(doc.get("id"));

                matches.put(id, new Float(topDocs.scoreDocs[i].score));
            }
        } catch (ParseException e) {
            // just return an empty list
            LOG.info("Exception caught, returning no results", e);
        } catch (IOException e) {
            // just return an empty list
            LOG.info("Exception caught, returning no results", e);
        } finally {
            try {
                if (searcher != null) {
                    searcher.close();
                }
            } catch (IOException e) {
                LOG.warn("Error closing searcher", e);
            }
        }

        LOG.info("Lucene search finished in " + (System.currentTimeMillis() - time) + " ms");

        return matches;
    }

    /**
     * @param result search result
     * @param facetVector facets for search results
     * @param facetValues values for facets
     * @return search result for given facet
     */
    public static Vector<KeywordSearchFacet> parseFacets(BrowseResult result,
            Vector<KeywordSearchFacetData> facetVector, Map<String, String> facetValues) {
        long time = System.currentTimeMillis();
        Vector<KeywordSearchFacet> searchResultsFacets = new Vector<KeywordSearchFacet>();
        for (KeywordSearchFacetData facet : facetVector) {
            FacetAccessible boboFacet = result.getFacetMap().get(facet.getField());
            if (boboFacet != null) {
                searchResultsFacets.add(new KeywordSearchFacet(facet.getField(), facet.getName(),
                        facetValues.get(facet.getField()), boboFacet.getFacets()));
            }
        }
        LOG.debug("Parsing " + searchResultsFacets.size() + " facets took " + (System.currentTimeMillis() - time)
                + " ms");
        return searchResultsFacets;
    }

    /**
     * @param browseHits search results
     * @param objMap object map
     * @return matching object
     */
    public static Vector<KeywordSearchHit> getSearchHits(BrowseHit[] browseHits,
            Map<Integer, InterMineObject> objMap) {
        long time = System.currentTimeMillis();
        Vector<KeywordSearchHit> searchHits = new Vector<KeywordSearchHit>();
        for (BrowseHit browseHit : browseHits) {
            try {
                Document doc = browseHit.getStoredFields();
                if (doc == null) {
                    LOG.error("doc is null for browseHit " + browseHit);
                } else {
                    Integer id = Integer.valueOf(doc.getFieldable("id").stringValue());
                    InterMineObject obj = objMap.get(id);
                    searchHits.add(new KeywordSearchHit(browseHit.getScore(), doc, obj));
                }
            } catch (NumberFormatException e) {
                // ignore
            }
        }
        LOG.debug("Creating list of search hits took " + (System.currentTimeMillis() - time) + " ms");
        return searchHits;
    }

    /**
     * @param browseHits the query results.
     *
     * @return set of IDs found in the search results
     */
    public static Set<Integer> getObjectIds(BrowseHit[] browseHits) {
        long time = System.currentTimeMillis();
        Set<Integer> objectIds = new HashSet<Integer>();
        for (BrowseHit browseHit : browseHits) {
            try {
                Document doc = browseHit.getStoredFields();
                if (doc != null) {
                    objectIds.add(Integer.valueOf(doc.getFieldable("id").stringValue()));
                }
            } catch (NumberFormatException e) {
                LOG.info("Invalid id '" + browseHit.getField("id") + "' for hit '" + browseHit + "'", e);
            }
        }
        LOG.debug("Getting IDs took " + (System.currentTimeMillis() - time) + " ms");
        return objectIds;
    }

    /**
     * Run a browse search and get back both search results and facet information.
     * @param im The InterMine state object.
     * @param searchString The search input.
     * @param offset An offset.
     * @param facetValues The facets selected.
     * @param ids A collection of objects to restrict the search to.
     * @return An object which provides access to hits and facets.
     * @throws ObjectStoreException If we can't fetch objects.
     */
    public static ResultsWithFacets runBrowseWithFacets(InterMineAPI im, String searchString, int offset,
            Map<String, String> facetValues, List<Integer> ids) throws ObjectStoreException {
        // last parameter used only when creating lists
        BrowseResult results = runBrowseSearch(searchString, offset, facetValues, ids, 0);
        Collection<KeywordSearchFacet> searchResultsFacets = Collections.emptySet();
        Collection<KeywordSearchHit> searchHits = Collections.emptySet();
        int totalHits = 0;
        if (results != null) {
            totalHits = results.getNumHits();
            LOG.debug("Browse found " + totalHits + " hits");
            BrowseHit[] browseHits = results.getHits();
            Set<Integer> objectIds = getObjectIds(browseHits);
            Map<Integer, InterMineObject> objMap = Objects.getObjects(im, objectIds);
            searchHits = getSearchHits(browseHits, objMap);
            searchResultsFacets = parseFacets(results, facets, facetValues);
            results.close();
        }
        return new ResultsWithFacets(searchHits, searchResultsFacets, totalHits);
    }

    /**
     * perform a keyword search using bobo-browse for faceting and pagination
     * @param searchString string to search for
     * @param offset display offset
     * @param facetValues map of 'facet field name' to 'value to restrict field to' (optional)
     * @param ids ids to research the search to (for search in list)
     * @param listSize size of the list (used only when creating one)
     * @return bobo browse result or null if failed
     */
    public static BrowseResult runBrowseSearch(String searchString, int offset, Map<String, String> facetValues,
            List<Integer> ids, int listSize) {
        return runBrowseSearch(searchString, offset, facetValues, ids, true, 0);
    }

    /**
     * perform a keyword search using bobo-browse for faceting and pagination
     * @param searchString string to search for
     * @param offset display offset
     * @param facetValues map of 'facet field name' to 'value to restrict field to' (optional)
     * @param ids ids to research the search to (for search in list)
     * @param pagination if TRUE only return 100
     * @param listSize siza of a list of results being created
     * @return bobo browse result or null if failed
     */
    public static BrowseResult runBrowseSearch(String searchString, int offset, Map<String, String> facetValues,
            List<Integer> ids, boolean pagination, int listSize) {
        BrowseResult result = null;
        if (index == null) {
            return result;
        }
        long time = System.currentTimeMillis();
        String queryString = parseQueryString(searchString);

        try {
            Analyzer analyzer = new WhitespaceAnalyzer();

            // pass entire list of field names to the multi-field parser
            // => search through all fields
            String[] fieldNamesArray = new String[index.getFieldNames().size()];

            index.getFieldNames().toArray(fieldNamesArray);
            QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_30, fieldNamesArray, analyzer);
            queryParser.setDefaultOperator(Operator.AND);
            queryParser.setAllowLeadingWildcard(true);
            org.apache.lucene.search.Query query = queryParser.parse(queryString);

            // required to expand search terms
            query = query.rewrite(reader);

            if (debugOutput) {
                LOG.debug("Rewritten query: " + query);
            }

            // initialize request
            BrowseRequest browseRequest = new BrowseRequest();
            if (debugOutput) {
                browseRequest.setShowExplanation(true);
            }
            browseRequest.setQuery(query);
            browseRequest.setFetchStoredFields(true);

            if (ids != null && !ids.isEmpty()) {
                TermsFilter idFilter = new TermsFilter(); //we may want fieldcachetermsfilter

                for (int id : ids) {
                    idFilter.addTerm(new Term("id", Integer.toString(id)));
                }

                browseRequest.setFilter(idFilter);
            }

            // pagination
            browseRequest.setOffset(offset);
            if (pagination) {
                // used on keywordsearch results page
                browseRequest.setCount(PER_PAGE);
            } else {
                // when creating lists from results
                // this check should be not necessary and reproduces previous behaviour
                if (listSize == 0) {
                    listSize = 10000;
                }
                browseRequest.setCount(listSize);
            }

            // add faceting selections
            for (Entry<String, String> facetValue : facetValues.entrySet()) {
                if (facetValue != null) {
                    BrowseSelection browseSelection = new BrowseSelection(facetValue.getKey());
                    browseSelection.addValue(facetValue.getValue());
                    browseRequest.addSelection(browseSelection);
                }
            }

            // order faceting results by hits
            FacetSpec orderByHitsSpec = new FacetSpec();
            orderByHitsSpec.setOrderBy(FacetSortSpec.OrderHitsDesc);
            browseRequest.setFacetSpec("Category", orderByHitsSpec);
            for (KeywordSearchFacetData facet : facets) {
                browseRequest.setFacetSpec(facet.getField(), orderByHitsSpec);
            }

            LOG.debug("Prepared browserequest in " + (System.currentTimeMillis() - time) + " ms");
            time = System.currentTimeMillis();

            // execute query and return result
            Browsable browser = null;
            try {
                browser = new BoboBrowser(boboIndexReader);
                result = browser.browse(browseRequest);
            } finally {
                if (browser != null) {
                    browser.close();
                }
            }

            if (debugOutput) {
                for (int i = 0; i < result.getHits().length && i < 5; i++) {
                    Explanation expl = result.getHits()[i].getExplanation();
                    if (expl != null) {
                        LOG.debug(result.getHits()[i].getStoredFields().getFieldable("id")
                                + " - score explanation: " + expl.toString());
                    }
                }
            }
        } catch (ParseException e) {
            // just return an empty list
            LOG.info("Exception caught, returning no results", e);
        } catch (IOException e) {
            // just return an empty list
            LOG.info("Exception caught, returning no results", e);
        } catch (BrowseException e) {
            // just return an empty list
            LOG.info("Exception caught, returning no results", e);
        }

        LOG.debug("Bobo browse finished in " + (System.currentTimeMillis() - time) + " ms");

        return result;
    }

    private static String parseQueryString(String qs) {
        String queryString = qs;
        // keep strings separated by spaces together
        queryString = queryString.replaceAll("\\b(\\s+)\\+(\\s+)\\b", "$1AND$2");
        // i don't know
        queryString = queryString.replaceAll("(^|\\s+)'(\\b[^']+ [^']+\\b)'(\\s+|$)", "$1\"$2\"$3");
        // escape special characters, see http://lucene.apache.org/java/2_9_0/queryparsersyntax.html
        final String[] specialCharacters = { "+", "-", "&&", "||", "!", "(", ")", "{", "}", "[", "]", "^", "~", "?",
                ":", "\\" };
        for (String s : specialCharacters) {
            if (queryString.contains(s)) {
                queryString = queryString.replace(s, "*");
            }
        }
        return toLowerCase(queryString);
    }

    private static String toLowerCase(String s) {
        StringBuffer sb = new StringBuffer();
        String[] bits = s.split(" ");
        for (String b : bits) {
            // booleans have to stay UPPER
            if ("OR".equalsIgnoreCase(b) || "AND".equalsIgnoreCase(b) || "NOT".equalsIgnoreCase(b)) {
                sb.append(b.toUpperCase() + " ");
            } else {
                sb.append(b.toLowerCase() + " ");
            }
        }
        return sb.toString().trim();
    }

    private static LuceneIndexContainer loadIndexFromDatabase(ObjectStore os, String path) {
        LOG.debug("Attempting to restore search index from database...");
        if (os instanceof ObjectStoreInterMineImpl) {
            Database db = ((ObjectStoreInterMineImpl) os).getDatabase();
            LuceneIndexContainer ret = null;
            try {
                ret = restoreIndex(db);

                if (ret != null) {
                    String indexDirectoryType = ret.getDirectoryType();
                    Directory dir = restoreSearchDirectory(indexDirectoryType, path, db);
                    if (dir == null) {
                        LOG.error("Could not load directory");
                        return null;
                    }
                    ret.setDirectory(dir);
                    return ret;
                }

            } catch (ClassNotFoundException e) {
                LOG.error("Could not load search index", e);
            } catch (SQLException e) {
                LOG.error("Could not load search index", e);
            } catch (IOException e) {
                LOG.error("Could not load search index", e);
            }
        } else {
            LOG.error("ObjectStore is of wrong type!");
        }
        return null;
    }

    private static Directory restoreSearchDirectory(String dirType, String path, Database db)
            throws SQLException, IOException, FileNotFoundException, ClassNotFoundException {
        InputStream is;
        LOG.debug("Attempting to restore search directory from database...");
        is = MetadataManager.readLargeBinary(db, MetadataManager.SEARCH_INDEX_DIRECTORY);

        if (is != null) {
            try {
                if ("FSDirectory".equals(dirType)) {
                    return readFSDirectory(path, is);
                } else if ("RAMDirectory".equals(dirType)) {
                    return readRAMDirectory(is);
                } else {
                    LOG.warn("Unknown directory type specified: " + dirType);
                }
            } finally {
                is.close();
            }

        } else {
            LOG.warn("Could not find search directory!");
        }
        return null;
    }

    private static FSDirectory readFSDirectory(String path, InputStream is)
            throws IOException, FileNotFoundException {
        long time = System.currentTimeMillis();
        final int bufferSize = 2048;
        File directoryPath = new File(path + File.separator + LUCENE_INDEX_DIR);
        LOG.debug("Directory path: " + directoryPath);

        // make sure we start with a new index
        if (directoryPath.exists()) {
            String[] files = directoryPath.list();
            for (int i = 0; i < files.length; i++) {
                LOG.info("Deleting old file: " + files[i]);
                new File(directoryPath.getAbsolutePath() + File.separator + files[i]).delete();
            }
        } else {
            directoryPath.mkdir();
        }

        ZipInputStream zis = new ZipInputStream(is);
        ZipEntry entry;
        try {
            while ((entry = zis.getNextEntry()) != null) {
                LOG.info("Extracting: " + entry.getName() + " (" + entry.getSize() + " MB)");

                FileOutputStream fos = new FileOutputStream(
                        directoryPath.getAbsolutePath() + File.separator + entry.getName());
                BufferedOutputStream bos = new BufferedOutputStream(fos, bufferSize);

                int count;
                byte[] data = new byte[bufferSize];

                try {
                    while ((count = zis.read(data, 0, bufferSize)) != -1) {
                        bos.write(data, 0, count);
                    }
                } finally {
                    bos.flush();
                    bos.close();
                }
            }
        } finally {
            zis.close();
        }

        FSDirectory directory = FSDirectory.open(directoryPath);

        LOG.info("Successfully restored FS directory from database in " + (System.currentTimeMillis() - time)
                + " ms");
        return directory;
    }

    private static RAMDirectory readRAMDirectory(InputStream is) throws IOException, ClassNotFoundException {
        long time = System.currentTimeMillis();
        GZIPInputStream gzipInput = new GZIPInputStream(is);
        ObjectInputStream objectInput = new ObjectInputStream(gzipInput);

        try {
            Object object = objectInput.readObject();

            if (object instanceof FSDirectory) {
                RAMDirectory directory = (RAMDirectory) object;

                time = System.currentTimeMillis() - time;
                LOG.info("Successfully restored RAM directory" + " from database in " + time + " ms");
                return directory;
            }
        } finally {
            objectInput.close();
            gzipInput.close();
        }
        return null;
    }

    private static LuceneIndexContainer restoreIndex(Database db)
            throws IOException, ClassNotFoundException, SQLException {

        long time = System.currentTimeMillis();
        InputStream is = MetadataManager.readLargeBinary(db, MetadataManager.SEARCH_INDEX);

        if (is == null) {
            LOG.warn("No search index stored in this DB.");
            return null;
        } else {
            GZIPInputStream gzipInput = new GZIPInputStream(is);
            ObjectInputStream objectInput = new ObjectInputStream(gzipInput);

            try {
                Object object = objectInput.readObject();

                if (object instanceof LuceneIndexContainer) {

                    LOG.info("Successfully restored search index information" + " from database in "
                            + (System.currentTimeMillis() - time) + " ms");
                    LOG.debug("Index: " + index);
                    return (LuceneIndexContainer) object;
                } else {
                    LOG.warn("Object from DB has wrong class:" + object.getClass().getName());
                    return null;
                }
            } finally {
                objectInput.close();
                gzipInput.close();
                is.close();
            }
        }
    }

    private static File createIndex(ObjectStore os, Map<String, List<FieldDescriptor>> classKeys)
            throws IOException {
        long time = System.currentTimeMillis();
        File tempFile = null;
        LOG.debug("Creating keyword search index...");

        parseProperties(os);

        LOG.info("Starting fetcher thread...");
        InterMineObjectFetcher fetchThread = new InterMineObjectFetcher(os, classKeys, indexingQueue,
                ignoredClasses, ignoredFields, specialReferences, classBoost, facets, attributePrefixes);
        fetchThread.start();

        // index the docs queued by the fetchers
        LOG.info("Preparing indexer...");
        index = new LuceneIndexContainer();
        try {
            tempFile = makeTempFile(tempDirectory);
        } catch (IOException e) {
            String tmpDir = System.getProperty("java.io.tmpdir");
            LOG.warn("Failed to create temp directory " + tempDirectory + " trying " + tmpDir + " instead", e);
            try {
                tempFile = makeTempFile(tmpDir);
            } catch (IOException ee) {
                LOG.warn("Failed to create temp directory in " + tmpDir, ee);
                throw ee;
            }
        }

        LOG.info("Index directory: " + tempFile.getAbsolutePath());

        IndexWriter writer;
        writer = new IndexWriter(index.getDirectory(), new WhitespaceAnalyzer(), true,
                IndexWriter.MaxFieldLength.UNLIMITED); //autocommit = false?
        writer.setMergeFactor(10); //10 default, higher values = more parts
        writer.setRAMBufferSizeMB(64); //flush to disk when docs take up X MB

        int indexed = 0;

        // loop and index while we still have fetchers running
        LOG.debug("Starting to index...");
        while (indexingQueue.hasNext()) {
            Document doc = indexingQueue.next();

            // nothing in the queue?
            if (doc != null) {
                try {
                    writer.addDocument(doc);
                    indexed++;
                } catch (IOException e) {
                    LOG.error("Failed to submit #" + doc.getFieldable("id") + " to the index", e);
                }

                if (indexed % 10000 == 1) {
                    LOG.info("docs indexed=" + indexed + "; thread state=" + fetchThread.getState() + "; docs/ms="
                            + indexed * 1.0F / (System.currentTimeMillis() - time) + "; memory="
                            + Runtime.getRuntime().freeMemory() / 1024 + "k/"
                            + Runtime.getRuntime().maxMemory() / 1024 + "k" + "; time="
                            + (System.currentTimeMillis() - time) + "ms");
                }
            }
        }
        if (fetchThread.getException() != null) {
            try {
                writer.close();
            } catch (Exception e) {
                LOG.error("Error closing writer while handling exception.", e);
            }
            throw new RuntimeException("Indexing failed.", fetchThread.getException());
        }
        index.getFieldNames().addAll(fetchThread.getFieldNames());
        LOG.debug("Indexing done, optimizing index files...");
        try {
            writer.optimize();
            writer.close();
        } catch (IOException e) {
            LOG.error("IOException while optimizing and closing IndexWriter", e);
        }

        time = System.currentTimeMillis() - time;
        int seconds = (int) Math.floor(time / 1000);
        LOG.info("Indexing of " + indexed + " documents finished in "
                + String.format("%02d:%02d.%03d", (int) Math.floor(seconds / 60), seconds % 60, time % 1000)
                + " minutes");
        return tempFile;
    }

    private static File makeTempFile(String tempDir) throws IOException {
        LOG.debug("Creating search index tmp dir: " + tempDir);
        File tempFile = File.createTempFile("search_index", "", new File(tempDir));
        if (!tempFile.delete()) {
            throw new IOException("Could not delete temp file");
        }

        index.setDirectory(FSDirectory.open(tempFile));
        index.setDirectoryType("FSDirectory");

        // make sure we start with a new index
        if (tempFile.exists()) {
            String[] files = tempFile.list();
            for (int i = 0; i < files.length; i++) {
                LOG.info("Deleting old file: " + files[i]);
                new File(tempFile.getAbsolutePath() + File.separator + files[i]).delete();
            }
        } else {
            tempFile.mkdir();
        }
        return tempFile;
    }

    /**
     * recurse into class descriptor and add all subclasses to ignoredClasses
     * @param ignoredClassMap
     *            set of classes
     * @param cld
     *            super class descriptor
     */
    @SuppressWarnings("unchecked")
    private static void addCldToIgnored(Set<Class<? extends InterMineObject>> ignoredClassMap,
            ClassDescriptor cld) {
        if (cld == null) {
            LOG.error("cld is null!");
        } else if (InterMineObject.class.isAssignableFrom(cld.getType())) {
            ignoredClassMap.add((Class<? extends InterMineObject>) cld.getType());

            for (ClassDescriptor subCld : cld.getSubDescriptors()) {
                addCldToIgnored(ignoredClassMap, subCld);
            }
        } else {
            LOG.error("cld " + cld + " is not IMO!");
        }
    }

    private static void addToIgnoredFields(Map<Class<? extends InterMineObject>, Set<String>> ignoredFieldMap,
            ClassDescriptor cld, String fieldName) {
        if (cld == null) {
            LOG.error("ClassDesriptor was null when attempting to add an ignored field.");
        } else if (InterMineObject.class.isAssignableFrom(cld.getType())) {
            Set<ClassDescriptor> clds = new HashSet<ClassDescriptor>();
            clds.add(cld);
            for (ClassDescriptor subCld : cld.getSubDescriptors()) {
                clds.add(subCld);
            }

            for (ClassDescriptor ignoreCld : clds) {
                Set<String> fields = ignoredFieldMap.get(ignoreCld.getType());
                @SuppressWarnings("unchecked")
                Class<? extends InterMineObject> cls = (Class<? extends InterMineObject>) ignoreCld.getType();
                if (fields == null) {
                    fields = new HashSet<String>();
                    ignoredFieldMap.put(cls, fields);
                }
                fields.add(fieldName);
            }
        } else {
            LOG.error("cld " + cld + " is not IMO!");
        }
    }

    /**
     * get list of facet fields and names
     * @return map of internal fieldname -> displayed name
     */
    public static Vector<KeywordSearchFacetData> getFacets() {
        return facets;
    }

    /**
     * delete the directory used for the index (used in postprocessing)
     */
    public static void deleteIndexDirectory() {
        if (index != null && "FSDirectory".equals(index.getDirectoryType())) {
            File tempFile = ((FSDirectory) index.getDirectory()).getFile();
            LOG.info("Deleting index directory: " + tempFile.getAbsolutePath());

            if (tempFile.exists()) {
                String[] files = tempFile.list();
                for (int i = 0; i < files.length; i++) {
                    LOG.debug("Deleting index file: " + files[i]);
                    new File(tempFile.getAbsolutePath() + File.separator + files[i]).delete();
                }
                tempFile.delete();
                LOG.warn("Deleted index directory!");
            } else {
                LOG.warn("Index directory does not exist!");
            }

            index = null;
        }
    }

    /**
     * set all the variables to NULL
     */
    public static void close() {
        if (reader != null) {
            try {
                reader.close();
            } catch (IOException e) {
                LOG.error("Not able to free Lucene index file.");
                e.printStackTrace();
            }
        }
        reader = null;
        if (boboIndexReader != null) {
            try {
                boboIndexReader.close();
            } catch (IOException e) {
                LOG.error("Not able to close bobo Index Reader (Lucene).");
                e.printStackTrace();
            }
        }
        boboIndexReader = null;
        indexingQueue = null;
        index = null;
        properties = null;
        tempDirectory = null;
        specialReferences = null;
        ignoredClasses = null;
        ignoredFields = null;
        classBoost = null;
        facets = null;
        attributePrefixes = null;
    }
}