ubic.gemma.search.SearchServiceImpl.java Source code

Java tutorial

Introduction

Here is the source code for ubic.gemma.search.SearchServiceImpl.java

Source

/*
 ompass* The Gemma project
 * 
 * Copyright (c) 2006 University of British Columbia
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

package ubic.gemma.search;

import net.sf.ehcache.Cache;
import net.sf.ehcache.CacheException;
import net.sf.ehcache.CacheManager;
import net.sf.ehcache.Element;
import net.sf.ehcache.config.CacheConfiguration;
import net.sf.ehcache.config.NonstopConfiguration;
import net.sf.ehcache.config.TerracottaConfiguration;
import net.sf.ehcache.config.TimeoutBehaviorConfiguration;
import net.sf.ehcache.config.TimeoutBehaviorConfiguration.TimeoutBehaviorType;
import net.sf.ehcache.store.MemoryStoreEvictionPolicy;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.time.StopWatch;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.compass.core.*;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import ubic.basecode.ontology.model.OntologyIndividual;
import ubic.basecode.ontology.model.OntologyTerm;
import ubic.gemma.annotation.reference.BibliographicReferenceService;
import ubic.gemma.association.phenotype.PhenotypeAssociationManagerService;
import ubic.gemma.expression.experiment.service.ExpressionExperimentService;
import ubic.gemma.expression.experiment.service.ExpressionExperimentSetService;
import ubic.gemma.genome.gene.service.GeneSearchService;
import ubic.gemma.genome.gene.service.GeneService;
import ubic.gemma.genome.gene.service.GeneSetService;
import ubic.gemma.model.analysis.expression.ExpressionExperimentSet;
import ubic.gemma.model.common.Auditable;
import ubic.gemma.model.common.auditAndSecurity.AuditAction;
import ubic.gemma.model.common.auditAndSecurity.AuditEvent;
import ubic.gemma.model.common.auditAndSecurity.AuditTrailService;
import ubic.gemma.model.common.auditAndSecurity.UserQuery;
import ubic.gemma.model.common.description.*;
import ubic.gemma.model.common.search.SearchSettings;
import ubic.gemma.model.common.search.SearchSettingsImpl;
import ubic.gemma.model.common.search.SearchSettingsValueObject;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.arrayDesign.ArrayDesignService;
import ubic.gemma.model.expression.biomaterial.BioMaterial;
import ubic.gemma.model.expression.biomaterial.Treatment;
import ubic.gemma.model.expression.designElement.CompositeSequence;
import ubic.gemma.model.expression.designElement.CompositeSequenceService;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.model.expression.experiment.FactorValue;
import ubic.gemma.model.genome.Gene;
import ubic.gemma.model.genome.Taxon;
import ubic.gemma.model.genome.TaxonDao;
import ubic.gemma.model.genome.biosequence.BioSequence;
import ubic.gemma.model.genome.biosequence.BioSequenceService;
import ubic.gemma.model.genome.gene.GeneProductService;
import ubic.gemma.model.genome.gene.GeneSet;
import ubic.gemma.model.genome.gene.phenotype.valueObject.CharacteristicValueObject;
import ubic.gemma.model.genome.sequenceAnalysis.BioSequenceValueObject;
import ubic.gemma.ontology.OntologyService;
import ubic.gemma.util.ConfigUtils;
import ubic.gemma.util.EntityUtils;
import ubic.gemma.util.ReflectionUtil;

import javax.annotation.PostConstruct;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.*;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * This service is used for performing searches using free text or exact matches to items in the database. <h2>
 * Implementation notes</h2>
 * <p>
 * Internally, there are generally two kinds of searches performed, precise database searches looking for exact matches
 * in the database and compass/lucene searches which look for matches in the stored index.
 * <p>
 * To add more dependencies to this Service edit the applicationContext-search.xml
 * 
 * @author klc
 * @author paul
 * @author keshav
 * @version $Id: SearchServiceImpl.java,v 1.50 2013/04/30 23:35:52 paul Exp $
 */
@Component
public class SearchServiceImpl implements SearchService {

    private static final String ONTOLOGY_CHILDREN_CACHE_NAME = "OntologyChildrenCache";

    /**
     * Penalty applied to all 'index' hits
     */
    private static final double COMPASS_HIT_SCORE_PENALTY_FACTOR = 0.9;

    /**
     * Key for internal in-memory on-the-fly indexes
     */
    private static final String INDEX_KEY = "content";

    /**
     * Penalty applied to scores on hits for entities that derive from an association. For example, if a hit to an EE
     * came from text associated with one of its biomaterials, the score is penalized by this amount.
     */
    private static final double INDIRECT_DB_HIT_PENALTY = 0.8;

    private static Log log = LogFactory.getLog(SearchServiceImpl.class.getName());

    /**
     * 
     */
    private static final int MAX_IN_MEMORY_INDEX_HITS = 1000;

    private static final int MINIMUM_EE_QUERY_LENGTH = 3;

    private static final int MINIMUM_STRING_LENGTH_FOR_FREE_TEXT_SEARCH = 2;

    private static final String NCBI_GENE = "ncbi_gene";

    /**
     * How long after creation before an object is evicted.
     */
    private static final int ONTOLOGY_CACHE_TIME_TO_DIE = 2000;

    /**
     * How long an item in the cache lasts when it is not accessed.
     */
    private static final int ONTOLOGY_CACHE_TIME_TO_IDLE = 600;

    /**
     * How many term children can stay in memory
     */
    private static final int ONTOLOGY_INFO_CACHE_SIZE = 15000;

    /**
     * If fewer than this number of experiments are returned from the a search of experiment characteristics, then
     * search for experiments indirectly as well (ex: by finding bioMatierials tagged with the characteristicsand
     * getting the experiments associated with them ).
     */
    private static final int SUFFICIENT_EXPERIMENT_RESULTS_FROM_CHARACTERISTICS = 100;

    Analyzer analyzer = new StandardAnalyzer();

    @Autowired
    private ArrayDesignService arrayDesignService;

    @Autowired
    private BibliographicReferenceService bibliographicReferenceService;

    @Autowired
    private BioSequenceService bioSequenceService;

    @Autowired
    private CacheManager cacheManager;

    @Autowired
    private CharacteristicService characteristicService;

    private Cache childTermCache;

    @Autowired
    private Compass compassArray;

    @Autowired
    private Compass compassBibliographic;

    @Autowired
    private Compass compassBiosequence;

    @Autowired
    private Compass compassExperimentSet;

    @Autowired
    private Compass compassExpression;

    @Autowired
    private Compass compassGene;

    @Autowired
    private Compass compassGeneSet;

    @Autowired
    private Compass compassProbe;

    @Autowired
    private CompositeSequenceService compositeSequenceService;

    @Autowired
    private ExpressionExperimentSetService experimentSetService;

    @Autowired
    private ExpressionExperimentService expressionExperimentService;

    @Autowired
    private GeneSearchService geneSearchService;

    @Autowired
    private GeneProductService geneProductService;

    @Autowired
    private GeneService geneService;

    @Autowired
    private GeneSetService geneSetService;

    @Autowired
    private OntologyService ontologyService;

    @Autowired
    private PhenotypeAssociationManagerService phenotypeAssociationManagerService;

    @Autowired
    private TaxonDao taxonDao;

    @Autowired
    private AuditTrailService auditTrailService;

    private static final int MAX_LUCENE_HITS = 750;

    private HashMap<String, Taxon> nameToTaxonMap = new LinkedHashMap<String, Taxon>();

    /*
     * (non-Javadoc)
     * 
     * @see org.springframework.beans.factory.InitializingBean#afterPropertiesSet()
     */
    @PostConstruct
    void initializeSearchService() throws Exception {
        try {

            if (cacheManager.cacheExists(ONTOLOGY_CHILDREN_CACHE_NAME)) {
                return;
            }
            boolean terracottaEnabled = ConfigUtils.getBoolean("gemma.cache.clustered", false);
            int diskExpiryThreadIntervalSeconds = 600;
            int maxElementsOnDisk = 10000;
            boolean terracottaCoherentReads = false;
            boolean clearOnFlush = false;

            if (terracottaEnabled) {

                CacheConfiguration config = new CacheConfiguration(ONTOLOGY_CHILDREN_CACHE_NAME,
                        ONTOLOGY_INFO_CACHE_SIZE);
                config.setStatistics(false);
                config.setMemoryStoreEvictionPolicy(MemoryStoreEvictionPolicy.LRU.toString());
                config.setOverflowToDisk(false);
                config.setEternal(true);
                config.setTimeToIdleSeconds(ONTOLOGY_CACHE_TIME_TO_IDLE);
                config.setMaxElementsOnDisk(maxElementsOnDisk);
                config.addTerracotta(new TerracottaConfiguration());
                config.getTerracottaConfiguration().setCoherentReads(terracottaCoherentReads);
                config.clearOnFlush(clearOnFlush);
                config.setTimeToLiveSeconds(ONTOLOGY_CACHE_TIME_TO_DIE);
                config.getTerracottaConfiguration().setClustered(true);
                config.getTerracottaConfiguration().setValueMode("SERIALIZATION");
                NonstopConfiguration nonstopConfiguration = new NonstopConfiguration();
                TimeoutBehaviorConfiguration tobc = new TimeoutBehaviorConfiguration();
                tobc.setType(TimeoutBehaviorType.NOOP.getTypeName());
                nonstopConfiguration.addTimeoutBehavior(tobc);
                config.getTerracottaConfiguration().addNonstop(nonstopConfiguration);
                childTermCache = new Cache(config);

                // childTermCache = new Cache( "OntologyChildrenCache", ONTOLOGY_INFO_CACHE_SIZE,
                // MemoryStoreEvictionPolicy.LFU, false, null, false, ONTOLOGY_CACHE_TIME_TO_DIE,
                // ONTOLOGY_CACHE_TIME_TO_IDLE, false, diskExpiryThreadIntervalSeconds, null, null,
                // maxElementsOnDisk, 10, clearOnFlush, terracottaEnabled, "SERIALIZATION",
                // terracottaCoherentReads );
            } else {
                childTermCache = new Cache(ONTOLOGY_CHILDREN_CACHE_NAME, ONTOLOGY_INFO_CACHE_SIZE,
                        MemoryStoreEvictionPolicy.LFU, false, null, false, ONTOLOGY_CACHE_TIME_TO_DIE,
                        ONTOLOGY_CACHE_TIME_TO_IDLE, false, diskExpiryThreadIntervalSeconds, null);
            }
            cacheManager.addCache(childTermCache);
            childTermCache = cacheManager.getCache(ONTOLOGY_CHILDREN_CACHE_NAME);

        } catch (CacheException e) {
            throw new RuntimeException(e);
        }

        initializeNameToTaxonMap();

    }

    private void initializeNameToTaxonMap() {

        Collection<Taxon> taxonCollection = (Collection<Taxon>) taxonDao.loadAll();

        for (Taxon taxon : taxonCollection) {
            if (taxon.getScientificName() != null)
                nameToTaxonMap.put(taxon.getScientificName().trim().toLowerCase(), taxon);
            if (taxon.getCommonName() != null)
                nameToTaxonMap.put(taxon.getCommonName().trim().toLowerCase(), taxon);
            if (taxon.getAbbreviation() != null)
                nameToTaxonMap.put(taxon.getAbbreviation().trim().toLowerCase(), taxon);
        }

        // loop through again breaking up multi-word taxon database names and handling some special cases(e.g. salmon,
        // rainbow are common to multiple taxa)
        // doing this is a separate loop so that these names take lower precedence when matching than the full terms in
        // the generated keySet
        // some of the special cases the section below may be unnecessary, or more may need to be added
        for (Taxon taxon : taxonCollection) {

            String[] terms;
            if (taxon.getScientificName() != null) {
                terms = taxon.getScientificName().split("\\s+");
                if (terms.length > 1) {
                    for (String s : terms) {

                        if (!s.equalsIgnoreCase("Oncorhynchus")) {
                            nameToTaxonMap.put(s.toLowerCase(), taxon);
                        }
                    }
                }
            }
            if (StringUtils.isNotBlank(taxon.getCommonName())) {
                if (taxon.getCommonName().equalsIgnoreCase("salmonid")) {
                    nameToTaxonMap.put("salmon", taxon);
                }

                terms = taxon.getCommonName().split("\\s+");
                if (terms.length > 1) {
                    for (String s : terms) {
                        if (!s.equalsIgnoreCase("salmon") && !s.equalsIgnoreCase("pink")
                                && !s.equalsIgnoreCase("rainbow")) {
                            nameToTaxonMap.put(s.toLowerCase(), taxon);
                        }
                    }
                }
            }

        }

    }

    @Override
    public Map<Class<?>, List<SearchResult>> ajaxSearch(SearchSettingsValueObject settingsValueObject) {
        SearchSettings settings = SearchSettingsValueObject.Converter.toEntity(settingsValueObject);
        return this.search(settings);
    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.search.SearchService#search(ubic.gemma.search.SearchSettings)
     */
    @Override
    public Map<Class<?>, List<SearchResult>> search(SearchSettings settings) {
        Map<Class<?>, List<SearchResult>> searchResults = new HashMap<Class<?>, List<SearchResult>>();
        try {
            searchResults = this.search(settings, true, false);

        } catch (org.compass.core.engine.SearchEngineQueryParseException qpe) {
            log.error("Query parse Error: " + settings + "; message=" + qpe.getMessage(), qpe);

        } catch (Exception e) {
            log.error("Search error on settings: " + settings + "; message=" + e.getMessage(), e);
        }

        return searchResults;
    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.search.SearchService#search(ubic.gemma.search.SearchSettings)
     */
    @Override
    public Map<Class<?>, List<SearchResult>> speedSearch(SearchSettings settings) {
        Map<Class<?>, List<SearchResult>> searchResults = new HashMap<Class<?>, List<SearchResult>>();
        try {
            searchResults = this.search(settings, true, true);

        } catch (org.compass.core.engine.SearchEngineQueryParseException qpe) {
            log.error("Query parse Error: " + settings + "; message=" + qpe.getMessage(), qpe);

        } catch (Exception e) {
            log.error("Search error on settings: " + settings + "; message=" + e.getMessage(), e);
        }

        return searchResults;
    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.search.SearchService#search(ubic.gemma.search.SearchSettings)
     */
    @Override
    public List<?> search(SearchSettings settings, Class<?> resultClass) {
        Map<Class<?>, List<SearchResult>> searchResults = this.search(settings);
        List<Object> resultObjects = new ArrayList<Object>();

        List<SearchResult> searchResultObjects = searchResults.get(resultClass);
        for (SearchResult sr : searchResultObjects) {
            resultObjects.add(sr.getResultObject());
        }

        return resultObjects;
    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.search.SearchService#search(ubic.gemma.search.SearchSettings, boolean)
     */
    @Override
    public Map<Class<?>, List<SearchResult>> search(SearchSettings settings, boolean fillObjects,
            boolean webSpeedSearch) {

        if (StringUtils.isBlank(settings.getTermUri()) && !settings.getQuery().startsWith("http://")) {
            return generalSearch(settings, fillObjects, webSpeedSearch);
        }

        // we only attempt an ontology search if the uri looks remotely like a url.
        return ontologyUriSearch(settings);

    }

    /**
     * @param settings
     * @return results, if the settings.termUri is populated. This includes gene uris.
     */
    private Map<Class<?>, List<SearchResult>> ontologyUriSearch(SearchSettings settings) {
        Map<Class<?>, List<SearchResult>> results = new HashMap<Class<?>, List<SearchResult>>();

        // 1st check to see if the query is a URI (from an ontology).
        // Do this by seeing if we can find it in the loaded ontologies.
        // Escape with general utilities because might not be doing a lucene backed search. (just a hibernate one).
        String termUri = settings.getTermUri();

        if (StringUtils.isBlank(termUri)) {
            termUri = settings.getQuery();
        }

        if (!termUri.startsWith("http://")) {
            return results;
        }

        OntologyTerm matchingTerm = null;
        String uriString = null;

        uriString = StringEscapeUtils.escapeJava(StringUtils.strip(termUri));

        if (StringUtils.containsIgnoreCase(uriString, NCBI_GENE)) {
            // Perhaps is a valid gene URL. Want to search for the gene in gemma.
            // 1st get objects tagged with the given gene identifier
            Collection<Class<?>> classesToFilterOn = new HashSet<Class<?>>();
            classesToFilterOn.add(ExpressionExperiment.class);

            Collection<Characteristic> foundCharacteristics = characteristicService.findByUri(classesToFilterOn,
                    uriString);
            Map<Characteristic, Object> parentMap = characteristicService.getParents(classesToFilterOn,
                    foundCharacteristics);

            Collection<SearchResult> characteristicOwnerResults = filterCharacteristicOwnersByClass(
                    classesToFilterOn, parentMap);

            if (!characteristicOwnerResults.isEmpty()) {
                results.put(ExpressionExperiment.class, new ArrayList<SearchResult>());
                results.get(ExpressionExperiment.class).addAll(characteristicOwnerResults);
            }

            if (settings.getSearchGenes()) {
                // Get the gene
                String ncbiAccessionFromUri = StringUtils.substringAfterLast(uriString, "/");
                Gene g = null;

                try {
                    g = geneService.findByNCBIId(Integer.parseInt(ncbiAccessionFromUri));
                } catch (NumberFormatException e) {
                    // ok
                }

                if (g != null) {
                    results.put(Gene.class, new ArrayList<SearchResult>());
                    results.get(Gene.class).add(new SearchResult(g));
                }
            }
            return results;
        }

        /*
         * Not searching for a gene.
         */
        Collection<SearchResult> matchingResults;
        Collection<Class<?>> classesToSearch = new HashSet<Class<?>>();
        if (settings.getSearchExperiments()) {
            classesToSearch.add(ExpressionExperiment.class); // not sure ...
            classesToSearch.add(BioMaterial.class);
            classesToSearch.add(FactorValue.class);
        }

        // this doesn't seem to be implemented yet, LiteratureEvidence and GenericEvidence aren't handled in the
        // fillValueObjects method downstream
        /*
         * if ( settings.getSearchPhenotypes() ) { classesToSearch.add( PhenotypeAssociationImpl.class ); }
         */
        matchingTerm = this.ontologyService.getTerm(uriString);
        if (matchingTerm == null || matchingTerm.getUri() == null) {
            /*
             * Maybe the ontology isn't loaded. Look anyway.
             */

            Map<Characteristic, Object> parentMap = characteristicService.getParents(classesToSearch,
                    characteristicService.findByUri(classesToSearch, uriString));
            matchingResults = filterCharacteristicOwnersByClass(classesToSearch, parentMap);

        } else {

            log.info("Found ontology term: " + matchingTerm);

            // Was a URI from a loaded ontology soo get the children.
            Collection<OntologyTerm> terms2Search4 = matchingTerm.getChildren(true);
            terms2Search4.add(matchingTerm);

            matchingResults = this.databaseCharacteristicExactUriSearchForOwners(classesToSearch, terms2Search4);
        }

        for (SearchResult searchR : matchingResults) {
            if (results.containsKey(searchR.getResultClass())) {
                results.get(searchR.getResultClass()).add(searchR);
            } else {
                List<SearchResult> rs = new ArrayList<SearchResult>();
                rs.add(searchR);
                results.put(searchR.getResultClass(), rs);
            }
        }

        return results;
    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.search.SearchService#searchExpressionExperiments(java.lang.String, java.lang.Long)
     */
    @Override
    public Collection<Long> searchExpressionExperiments(String query, Long taxonId) {
        Taxon taxon = taxonDao.load(taxonId);
        Collection<Long> eeIds = new HashSet<Long>();
        if (StringUtils.isNotBlank(query)) {

            if (query.length() < MINIMUM_EE_QUERY_LENGTH)
                return eeIds;

            // Initial list
            List<SearchResult> results = this
                    .search(SearchSettingsImpl.expressionExperimentSearch(query), false, false)
                    .get(ExpressionExperiment.class);
            for (SearchResult result : results) {
                eeIds.add(result.getId());
            }

            // Filter by taxon
            if (taxon != null) {
                Collection<Long> eeIdsToKeep = new HashSet<Long>();
                Collection<ExpressionExperiment> ees = expressionExperimentService.findByTaxon(taxon);
                for (ExpressionExperiment ee : ees) {
                    if (eeIds.contains(ee.getId()))
                        eeIdsToKeep.add(ee.getId());
                }
                eeIds.retainAll(eeIdsToKeep);
            }
        } else {
            Collection<ExpressionExperiment> ees = (taxon != null) ? expressionExperimentService.findByTaxon(taxon)
                    : expressionExperimentService.loadAll();
            for (ExpressionExperiment ee : ees) {
                eeIds.add(ee.getId());
            }
        }
        return eeIds;
    }

    /**
     * Add results.
     * 
     * @param rawResults To add to
     * @param newResults To be added
     */
    private void accreteResults(List<SearchResult> rawResults, Collection<SearchResult> newResults) {
        for (SearchResult sr : newResults) {
            if (!rawResults.contains(sr)) {
                /*
                 * We do this because we don't want to clobber results, when the same object comes up more than once in
                 * different searches. FIXME - perhaps check if the score of the existing one is lower?
                 */
                rawResults.add(sr);
            }
        }
    }

    /**
     * @param characteristicUris
     * @param term
     */
    private void addChildTerms(Collection<String> characteristicUris, OntologyTerm term) {
        String uri = term.getUri();
        /*
         * getChildren can be very slow for 'high-level' classes like "neoplasm", so we use a cache.
         */
        Collection<OntologyTerm> children = null;
        if (StringUtils.isBlank(uri)) {
            // shouldn't happen, but just in case
            if (log.isDebugEnabled())
                log.debug("Blank uri for " + term);
        }

        Element cachedChildren = this.childTermCache.get(uri);
        // log.debug("Getting children of " + term);
        if (cachedChildren == null) {
            try {
                children = term.getChildren(false);
                childTermCache.put(new Element(uri, children));
            } catch (com.hp.hpl.jena.ontology.ConversionException ce) {
                log.warn("getting children for term: " + term
                        + " caused com.hp.hpl.jena.ontology.ConversionException. " + ce.getMessage());
            }
        } else {
            children = (Collection<OntologyTerm>) cachedChildren.getValue();
        }

        if (children != null) { // will happen if there's a com.hp.hpl.jena.ontology.ConversionException
            for (OntologyTerm child : children) {
                characteristicUris.add(child.getUri());
            }
        }

    }

    /**
     * A general search for array designs.
     * <p>
     * This search does both an database search and a compass search. This is also contains an underlying
     * {@link CompositeSequence} search, returning the {@link ArrayDesign} collection for the given composite sequence
     * search string (the returned collection of array designs does not contain duplicates).
     * 
     * @param searchString
     * @param probeResults Collection of results from a previous CompositeSequence search. Can be null; otherwise used
     *        to avoid a second search for probes. The array designs for the probes are added to the final results.
     * @return
     */
    private Collection<SearchResult> arrayDesignSearch(SearchSettings settings,
            Collection<SearchResult> probeResults) {

        StopWatch watch = startTiming();
        String searchString = settings.getQuery();
        Collection<SearchResult> results = new HashSet<SearchResult>();

        ArrayDesign shortNameResult = arrayDesignService.findByShortName(searchString);
        if (shortNameResult != null) {
            results.add(new SearchResult(shortNameResult, 1.0));
        } else {
            Collection<ArrayDesign> nameResult = arrayDesignService.findByName(searchString);
            if (nameResult != null)
                for (ArrayDesign ad : nameResult) {
                    results.add(new SearchResult(ad, 1.0));
                }
        }

        Collection<ArrayDesign> altNameResults = arrayDesignService.findByAlternateName(searchString);
        for (ArrayDesign arrayDesign : altNameResults) {
            results.add(new SearchResult(arrayDesign, 0.9));
        }

        Collection<ArrayDesign> manufacturerResults = arrayDesignService.findByManufacturer(searchString);
        for (ArrayDesign arrayDesign : manufacturerResults) {
            results.add(new SearchResult(arrayDesign, 0.9));
        }

        results.addAll(compassArrayDesignSearch(settings));
        results.addAll(databaseArrayDesignSearch(settings));

        Collection<SearchResult> probes = null;
        if (probeResults == null) {
            probes = compassCompositeSequenceSearch(settings);
        } else {
            probes = probeResults;
        }

        for (SearchResult r : probes) {
            CompositeSequence cs = (CompositeSequence) r.getResultObject();
            if (cs.getArrayDesign() == null) // This might happen as compass
                // might not have indexed the AD
                // for the CS
                continue;
            results.add(r);
        }

        watch.stop();
        if (watch.getTime() > 1000)
            log.info("Array Design search for '" + settings + "' took " + watch.getTime() + " ms");

        return results;
    }

    /**
     * *
     * 
     * @param searchString
     * @param previousGeneSearchResults Can be null, otherwise used to avoid a second search for genes. The biosequences
     *        for the genes are added to the final results.
     * @return
     */
    private Collection<SearchResult> bioSequenceSearch(SearchSettings settings,
            Collection<SearchResult> previousGeneSearchResults) {
        StopWatch watch = startTiming();

        Collection<SearchResult> searchResults = new HashSet<SearchResult>();
        searchResults.addAll(compassBioSequenceSearch(settings, previousGeneSearchResults));
        searchResults.addAll(databaseBioSequenceSearch(settings));

        watch.stop();
        if (watch.getTime() > 1000)
            log.info("Biosequence search for '" + settings + "' took " + watch.getTime() + " ms "
                    + searchResults.size() + " results.");

        return searchResults;
    }

    /**
     * @param settings
     */
    private Collection<SearchResult> characteristicExpressionExperimentSearch(final SearchSettings settings) {

        Collection<SearchResult> results = new HashSet<SearchResult>();

        Collection<Class<?>> classToSearch = new ArrayList<Class<?>>(1);
        Queue<Class<?>> orderedClassesToSearch = new LinkedList<Class<?>>();
        orderedClassesToSearch.add(ExpressionExperiment.class);
        orderedClassesToSearch.add(FactorValue.class);
        orderedClassesToSearch.add(BioMaterial.class);
        orderedClassesToSearch.add(Treatment.class);

        Collection<SearchResult> characterSearchResults = new HashSet<SearchResult>();

        while (characterSearchResults.size() < SUFFICIENT_EXPERIMENT_RESULTS_FROM_CHARACTERISTICS
                && !orderedClassesToSearch.isEmpty()) {
            classToSearch.clear();
            classToSearch.add(orderedClassesToSearch.poll());
            Collection<SearchResult> classResults = ontologySearchAnnotatedObject(classToSearch, settings);
            characterSearchResults.addAll(classResults);

            String msg = "Found " + classResults.size() + " " + classToSearch.iterator().next().getSimpleName()
                    + " results from characteristic search.";
            if (characterSearchResults.size() >= SUFFICIENT_EXPERIMENT_RESULTS_FROM_CHARACTERISTICS) {
                msg += " Total found > " + SUFFICIENT_EXPERIMENT_RESULTS_FROM_CHARACTERISTICS
                        + ", will not search for more entities.";
            }
            log.info(msg);
        }

        StopWatch watch = new StopWatch();
        watch.start();

        // filter and get parents...
        int numEEs = 0;
        Collection<BioMaterial> biomaterials = new HashSet<BioMaterial>();
        Collection<FactorValue> factorValues = new HashSet<FactorValue>();
        Collection<Treatment> treatments = new HashSet<Treatment>();

        for (SearchResult sr : characterSearchResults) {
            Class<?> resultClass = sr.getResultClass();
            if (ExpressionExperiment.class.isAssignableFrom(resultClass)) {
                sr.setHighlightedText(sr.getHighlightedText() + " (characteristic)");
                results.add(sr);
                numEEs++;
            } else if (BioMaterial.class.isAssignableFrom(resultClass)) {
                biomaterials.add((BioMaterial) sr.getResultObject());
            } else if (FactorValue.class.isAssignableFrom(resultClass)) {
                factorValues.add((FactorValue) sr.getResultObject());
            } else if (Treatment.class.isAssignableFrom(resultClass)) {
                treatments.add((Treatment) sr.getResultObject());
            }
        }

        /*
         * Much faster to batch it...
         */
        if (biomaterials.size() > 0) {
            Collection<ExpressionExperiment> ees = expressionExperimentService.findByBioMaterials(biomaterials);
            for (ExpressionExperiment ee : ees) {
                results.add(new SearchResult(ee, INDIRECT_DB_HIT_PENALTY, "BioMaterial characteristic"));
            }
        }

        if (factorValues.size() > 0) {
            Collection<ExpressionExperiment> ees = expressionExperimentService.findByFactorValues(factorValues);
            for (ExpressionExperiment ee : ees) {
                if (log.isDebugEnabled())
                    log.debug(ee);
                results.add(new SearchResult(ee, INDIRECT_DB_HIT_PENALTY, "Factor characteristic"));
            }
        }

        if (treatments.size() > 0) {
            log.info("Not processing treatments, but hits were found");
            // Collection<ExpressionExperiment> ees = expressionExperimentService.findByTreatments( treatments );
            // for ( ExpressionExperiment ee : ees ) {
            // if ( !results.contains( ee ) ) {
            // results.add( new SearchResult( ee, INDIRECT_DB_HIT_PENALTY, "Treatment" ) );
            // }
            // }
        }

        if (log.isDebugEnabled()) {
            log.debug(
                    "ExpressionExperiment search: " + settings + " -> " + results.size() + " characteristic hits");
        }

        if (watch.getTime() > 1000) {
            log.info("Retrieving " + results.size() + " experiments from " + characterSearchResults.size()
                    + " retrieved characteristics took " + watch.getTime() + " ms");
            log.info("Breakdown: " + numEEs + " via direct association with EE; " + biomaterials.size()
                    + " via association with Biomaterial; " + factorValues.size() + " via experimental design");
        }

        return results;
    }

    /**
     * Search for the query in ontologies, including items that are associated with children of matching query terms.
     * That is, 'brain' should return entities tagged as 'hippocampus'.
     * 
     * @param classes Classes of characteristic-bound entities. For example, to get matching characteristics of
     *        ExpressionExperiments, pass ExpressionExperiments.class in this collection parameter.
     * @param settings
     * @return SearchResults of CharcteristicObjects. Typically to be useful one needs to retrieve the 'parents'
     *         (entities which have been 'tagged' with the term) of those Characteristics
     */
    private Collection<SearchResult> characteristicSearchWithChildren(Collection<Class<?>> classes,
            SearchSettings settings) {

        String query = settings.getQuery();

        Set<String> rawTerms = extractTerms(query);

        Collection<SearchResult> allResults = new HashSet<SearchResult>();
        Map<SearchResult, String> matchMap = new HashMap<SearchResult, String>();

        for (String rawTerm : rawTerms) {
            if (StringUtils.isBlank(rawTerm)) {
                continue;
            }
            log.info("Ontology search term:" + rawTerm);
            allResults.addAll(characteristicSearchWord(classes, matchMap, rawTerm));
        }

        return postProcessCharacteristicResults(query, allResults, matchMap);

    }

    /**
     * @param classes
     * @param matches
     * @param query
     * @return
     */
    private Collection<SearchResult> characteristicSearchWord(Collection<Class<?>> classes,
            Map<SearchResult, String> matches, String query) {

        StopWatch watch = startTiming();
        Collection<String> characteristicUris = new HashSet<String>();

        Collection<OntologyIndividual> individuals = ontologyService.findIndividuals(query);
        if (individuals.size() > 0 && watch.getTime() > 1000) {
            log.info("Found " + individuals.size() + " individuals matching '" + query + "' in " + watch.getTime()
                    + "ms");
        }
        watch.reset();
        watch.start();

        for (OntologyIndividual term : individuals) {
            if ((term != null) && (term.getUri() != null))
                characteristicUris.add(term.getUri());
        }

        Collection<OntologyTerm> matchingTerms = ontologyService.findTerms(query);

        if (watch.getTime() > 1000) {
            log.info("Found " + matchingTerms.size() + " ontology classes matching '" + query + "' in "
                    + watch.getTime() + "ms");
        }

        watch.reset();
        Collection<SearchResult> results = new HashSet<SearchResult>();
        Collection<Characteristic> cs = new HashSet<Characteristic>();
        if (!matchingTerms.isEmpty()) {
            watch.start();

            for (OntologyTerm term : matchingTerms) {
                String uri = term.getUri();
                if (uri == null || uri.isEmpty())
                    continue;
                characteristicUris.add(uri);
                addChildTerms(characteristicUris, term);
            }

            // int cacheHits = childTermCache.getStatistics().getCacheHits();
            // if ( log.isDebugEnabled() ) log.debug( cacheHits + " cache hits for ontology children" );

            if (watch.getTime() > 1000) {
                log.info("Found " + characteristicUris.size() + " possible matches + child terms in "
                        + watch.getTime() + "ms");
            }

            /*
             * Find occurrences of these terms in our system. This is fast, so long as there aren't too many.
             */
            Collection<SearchResult> matchingCharacteristics = dbHitsToSearchResult(
                    characteristicService.findByUri(classes, characteristicUris));

            for (SearchResult crs : matchingCharacteristics) {
                cs.add((Characteristic) crs.getResultObject());
            }
        }
        watch.reset();
        watch.start();
        /*
         * Add characteristics that have values matching the query; this pulls in items not associated with ontology
         * terms (free text). We do this here so we can apply the query logic to the matches.
         */
        String dbQueryString = query.replaceAll("\\*", "");
        Collection<Characteristic> valueMatches = characteristicService.findByValue(classes, dbQueryString);

        if (valueMatches != null && !valueMatches.isEmpty())
            cs.addAll(valueMatches);

        /*
         * Retrieve the owner objects
         */
        Collection<SearchResult> matchingEntities = getAnnotatedEntities(classes, cs);
        results.addAll(matchingEntities);

        if (watch.getTime() > 1000) {
            log.info("Slow search: found " + matchingEntities.size() + " matches to characteristics for '" + query
                    + "' from " + characteristicUris.size() + " URIS in " + watch.getTime() + "ms");
        }

        watch.stop();

        for (SearchResult searchR : results) {
            if (!matches.containsKey(searchR)) {
                matches.put(searchR, query);
            } else {
                matches.put(searchR, matches.get(searchR) + " " + query);
            }
        }

        return results;
    }

    /**
     * A Compass search on array designs.
     * 
     * @param query
     * @return {@link Collection}
     */
    private Collection<SearchResult> compassArrayDesignSearch(SearchSettings settings) {
        return compassSearch(compassArray, settings);
    }

    /**
     * @param query
     * @return
     */
    private Collection<SearchResult> compassBibliographicReferenceSearch(SearchSettings settings) {
        return compassSearch(compassBibliographic, settings);
    }

    /**
     * A compass backed search that finds biosequences that match the search string. Searches the gene and probe indexes
     * for matches then converts those results to biosequences
     * 
     * @param searchString
     * @param previousGeneSearchResults Can be null, otherwise used to avoid a second search for genes. The biosequences
     *        for the genes are added to the final results.
     * @return
     * @throws Exception
     */
    private Collection<SearchResult> compassBioSequenceSearch(SearchSettings settings,
            Collection<SearchResult> previousGeneSearchResults) {

        Collection<SearchResult> results = compassSearch(compassBiosequence, settings);
        // for (SearchResult result : results) {
        // // Thaw biosequences found by compass search.
        // BioSequence bs = (BioSequence) result.getResultObject();
        // bioSequenceService.thaw(Arrays.asList(new BioSequence[] {bs}));
        // }

        Collection<SearchResult> geneResults = null;
        if (previousGeneSearchResults == null) {
            log.info("Biosequence Search:  running gene search with " + settings.getQuery());
            geneResults = compassGeneSearch(settings);
        } else {
            log.info("Biosequence Search:  using previous results");
            geneResults = previousGeneSearchResults;
        }

        Map<Gene, SearchResult> genes = new HashMap<Gene, SearchResult>();
        for (SearchResult sr : geneResults) {
            Object resultObject = sr.getResultObject();
            if (Gene.class.isAssignableFrom(resultObject.getClass())) {
                genes.put((Gene) resultObject, sr);
            } else {
                // see bug 1774 -- may not be happening any more.
                log.warn("Expected a Gene, got a " + resultObject.getClass() + " on query=" + settings.getQuery());
            }
        }

        Map<Gene, Collection<BioSequence>> seqsFromDb = bioSequenceService.findByGenes(genes.keySet());
        for (Gene gene : seqsFromDb.keySet()) {
            List<BioSequence> bs = new ArrayList<BioSequence>(seqsFromDb.get(gene));
            // bioSequenceService.thaw( bs );
            results.addAll(dbHitsToSearchResult(bs, genes.get(gene), null));
        }

        return results;
    }

    /**
     * @param settings
     * @return
     */
    private Collection<SearchResult> compassCompositeSequenceSearch(final SearchSettings settings) {
        return compassSearch(compassProbe, settings);
    }

    /**
     * A compass search on expressionExperiments.
     * 
     * @param query
     * @return {@link Collection}
     */
    private Collection<SearchResult> compassExpressionSearch(SearchSettings settings) {
        return compassSearch(compassExpression, settings);
    }

    /**
     * @param query
     * @return
     */
    private Collection<SearchResult> compassGeneSearch(final SearchSettings settings) {
        return compassSearch(compassGene, settings);
    }

    /**
     * @param bean
     * @param settings
     * @return
     */
    private Collection<SearchResult> compassSearch(Compass bean, final SearchSettings settings) {

        if (!settings.getUseIndices())
            return new HashSet<SearchResult>();

        CompassTemplate template = new CompassTemplate(bean);
        Collection<SearchResult> searchResults = template.execute(new CompassCallback<Collection<SearchResult>>() {
            @Override
            public Collection<SearchResult> doInCompass(CompassSession session) throws CompassException {
                return performSearch(settings, session);
            }
        });
        if (log.isDebugEnabled()) {
            log.debug("Compass search via " + bean.getSettings().getSetting("compass.name") + " : " + settings
                    + " -> " + searchResults.size() + " hits");
        }
        return searchResults;
    }

    /**
     * Search by name of the composite sequence as well as gene.
     * 
     * @return
     * @throws Exception
     */
    private Collection<SearchResult> compositeSequenceSearch(SearchSettings settings) {

        StopWatch watch = startTiming();

        /*
         * FIXME: this at least partly ignores any array design that was set as a restriction, especially in a gene
         * search.
         */

        Collection<SearchResult> allResults = new HashSet<SearchResult>();
        // Temporaily removing compass searching of composite sequences because it only bloats the results.
        // allResults.addAll( compassCompositeSequenceSearch( settings ) );
        allResults.addAll(databaseCompositeSequenceSearch(settings));
        // allResults.addAll( compositeSequenceByGeneSearch( settings, geneSearchResults ) );

        /*
         * This last step is needed because the compassSearch for compositeSequences returns bioSequences too.
         */
        Collection<SearchResult> finalResults = new HashSet<SearchResult>();
        for (SearchResult sr : allResults) {
            if (CompositeSequence.class.isAssignableFrom(sr.getResultClass())) {
                finalResults.add(sr);
            }
        }

        watch.stop();
        if (watch.getTime() > 1000)
            log.info("Composite sequence search for '" + settings + "' took " + watch.getTime() + " ms, "
                    + finalResults.size() + " results.");
        return finalResults;
    }

    private List<SearchResult> convertEntitySearchResutsToValueObjectsSearchResults(
            Collection<SearchResult> searchResults) {
        List<SearchResult> convertedSearchResults = new ArrayList<SearchResult>();
        for (SearchResult searchResult : searchResults) {
            if (BioSequence.class.isAssignableFrom(searchResult.getResultClass())) {
                SearchResult convertedSearchResult = new SearchResult(
                        BioSequenceValueObject
                                .fromEntity(bioSequenceService.thaw((BioSequence) searchResult.getResultObject())),
                        searchResult.getScore(), searchResult.getHighlightedText());
                convertedSearchResults.add(convertedSearchResult);
            } // else if ...
            else {
                convertedSearchResults.add(searchResult);
            }
        }
        return convertedSearchResults;
    }

    /**
     * Turn a string into a Lucene-indexable document.
     * 
     * @param content
     * @return
     */
    private Document createDocument(String content) {
        Document doc = new Document();
        Field f = new Field(INDEX_KEY, content, Field.Store.YES, Field.Index.ANALYZED);
        doc.add(f);
        return doc;
    }

    /**
     * Searches the DB for array designs which have composite sequences whose names match the given search string.
     * Because of the underlying database search, this is acl aware. That is, returned array designs are filtered based
     * on access control list (ACL) permissions.
     * 
     * @param searchString
     * @return
     * @throws Exception
     */
    private Collection<SearchResult> databaseArrayDesignSearch(SearchSettings settings) {

        if (!settings.getUseDatabase())
            return new HashSet<SearchResult>();

        StopWatch watch = startTiming();

        Collection<ArrayDesign> adSet = new HashSet<ArrayDesign>();

        // search by exact composite sequence name
        Collection<CompositeSequence> matchedCs = compositeSequenceService.findByName(settings.getQuery());
        for (CompositeSequence sequence : matchedCs) {
            adSet.add(sequence.getArrayDesign());
        }

        watch.stop();
        if (watch.getTime() > 1000)
            log.info("Array Design Compositesequence DB search for " + settings + " took " + watch.getTime() + " ms"
                    + " found " + adSet.size() + " Ads");

        return dbHitsToSearchResult(adSet);

    }

    /**
     * A database serach for biosequences. Biosequence names are already indexed by compass...
     * 
     * @param searchString
     * @return
     */
    private Collection<SearchResult> databaseBioSequenceSearch(SearchSettings settings) {

        if (!settings.getUseDatabase())
            return new HashSet<SearchResult>();

        StopWatch watch = startTiming();

        String searchString = settings.getQuery();

        // replace * with % for inexact symbol search
        String inexactString = searchString;
        Pattern pattern = Pattern.compile("\\*");
        Matcher match = pattern.matcher(inexactString);
        inexactString = match.replaceAll("%");

        Collection<BioSequence> bs = bioSequenceService.findByName(inexactString);
        // bioSequenceService.thaw( bs );
        Collection<SearchResult> bioSequenceList = new HashSet<SearchResult>(dbHitsToSearchResult(bs));

        watch.stop();
        if (watch.getTime() > 1000)
            log.info("BioSequence DB search for " + searchString + " took " + watch.getTime() + " ms and found"
                    + bioSequenceList.size() + " BioSequences");

        return bioSequenceList;
    }

    /**
     * Takes a list of ontology terms, and classes of objects of interest to be returned. Looks through the
     * characteristic table for an exact match with the given ontology terms. Only tries to match the uri's.
     * 
     * @param clazz Class of objects to restrict the search to (typically ExpressionExperimentImpl.class, for example).
     * @param terms A list of ontololgy terms to search for
     * @return Collection of search results for the objects owning the found characteristics, where the owner is of
     *         class clazz
     */
    private Collection<SearchResult> databaseCharacteristicExactUriSearchForOwners(Collection<Class<?>> classes,
            Collection<OntologyTerm> terms) {

        // Collection<Characteristic> characteristicValueMatches = new ArrayList<Characteristic>();
        Collection<Characteristic> characteristicURIMatches = new ArrayList<Characteristic>();

        for (OntologyTerm term : terms) {
            // characteristicValueMatches.addAll( characteristicService.findByValue( term.getUri() ));
            characteristicURIMatches.addAll(characteristicService.findByUri(classes, term.getUri()));
        }

        Map<Characteristic, Object> parentMap = characteristicService.getParents(classes, characteristicURIMatches);
        // parentMap.putAll( characteristicService.getParents(characteristicValueMatches ) );

        return filterCharacteristicOwnersByClass(classes, parentMap);
    }

    /**
     * Search the DB for composite sequences and the genes that are matched to them.
     * 
     * @param searchString
     * @return
     * @throws Exception
     */
    private Collection<SearchResult> databaseCompositeSequenceSearch(final SearchSettings settings) {

        if (!settings.getUseDatabase())
            return new HashSet<SearchResult>();

        StopWatch watch = startTiming();

        Set<Gene> geneSet = new HashSet<Gene>();

        String searchString = settings.getQuery();
        ArrayDesign ad = settings.getPlatformConstraint();

        // search by exact composite sequence name
        Collection<CompositeSequence> matchedCs = new HashSet<CompositeSequence>();
        if (ad != null) {
            CompositeSequence cs = compositeSequenceService.findByName(ad, searchString);
            matchedCs.add(cs);
        } else {
            matchedCs = compositeSequenceService.findByName(searchString);
        }

        /*
         * In case the query _is_ a gene
         */
        Collection<SearchResult> rawGeneResults = this.databaseGeneSearch(settings);
        for (SearchResult searchResult : rawGeneResults) {
            Object j = searchResult.getResultObject();
            if (Gene.class.isAssignableFrom(j.getClass())) {
                geneSet.add((Gene) j);
            }
        }

        for (Gene g : geneSet) {
            if (settings.getPlatformConstraint() != null) {
                matchedCs.addAll(compositeSequenceService.findByGene(g, settings.getPlatformConstraint()));
            } else {
                matchedCs.addAll(compositeSequenceService.findByGene(g));
            }
        }

        // search by associated genes.
        for (CompositeSequence sequence : matchedCs) {
            geneSet.addAll(compositeSequenceService.getGenes(sequence));
        }

        watch.stop();
        if (watch.getTime() > 1000)
            log.info("Gene composite sequence DB search " + searchString + " took " + watch.getTime() + " ms, "
                    + geneSet.size() + " items.");

        Collection<SearchResult> results = dbHitsToSearchResult(geneSet);

        results.addAll(dbHitsToSearchResult(matchedCs));

        return results;
    }

    /**
     * Does search on exact string by: id, name and short name. This only returns results if these fields match exactly,
     * but it's fast.
     * 
     * @param query
     * @return {@link Collection}
     */
    private Collection<SearchResult> databaseExpressionExperimentSearch(final SearchSettings settings) {

        if (!settings.getUseDatabase())
            return new HashSet<SearchResult>();

        StopWatch watch = startTiming();

        Map<ExpressionExperiment, String> results = new HashMap<ExpressionExperiment, String>();
        String query = StringEscapeUtils.unescapeJava(settings.getQuery());
        Collection<ExpressionExperiment> ees = expressionExperimentService.findByName(query);
        if (!ees.isEmpty()) {
            for (ExpressionExperiment ee : ees) {
                results.put(ee, ee.getName());
            }
        } else {
            ExpressionExperiment ee = expressionExperimentService.findByShortName(query);
            if (ee != null) {
                results.put(ee, ee.getShortName());
            } else {

                ees = expressionExperimentService.findByAccession(query);
                for (ExpressionExperiment e : ees) {
                    results.put(e, e.getId().toString());
                }

                if (results.isEmpty()) {
                    try {
                        // maybe user put in a primary key value.
                        ee = expressionExperimentService.load(new Long(query));
                        if (ee != null)
                            results.put(ee, ee.getId().toString());
                    } catch (NumberFormatException e) {
                        // no-op - it's not an ID.
                    }
                }
            }
        }

        watch.stop();
        if (watch.getTime() > 1000)
            log.info("DB Expression Experiment search for " + settings + " took " + watch.getTime()
                    + " ms and found " + results.size() + " EEs");

        Collection<SearchResult> r = dbHitsToSearchResult(results);
        return r;
    }

    /**
     * Search the DB for genes that exactly match the given search string searches geneProducts, gene and bioSequence
     * tables
     * 
     * @param searchString
     * @return
     * @throws Exception
     */
    private Collection<SearchResult> databaseGeneSearch(SearchSettings settings) {

        if (!settings.getUseDatabase())
            return new HashSet<SearchResult>();

        StopWatch watch = startTiming();
        String searchString = StringEscapeUtils.unescapeJava(settings.getQuery());
        if (StringUtils.isBlank(searchString))
            return new HashSet<SearchResult>();

        Collection<SearchResult> results = new HashSet<SearchResult>();

        /*
         * First search by accession. If we find it, stop.
         */
        Gene result = null;
        try {
            result = geneService.findByNCBIId(Integer.parseInt(searchString));
        } catch (NumberFormatException e) {
            //
        }
        if (result != null) {
            results.add(this.dbHitToSearchResult(null, result));
        } else {
            result = geneService.findByAccession(searchString, null);
            if (result != null) {
                results.add(this.dbHitToSearchResult(null, result));
            }
        }
        if (results.size() > 0) {
            filterByTaxon(settings, results, true);
            watch.stop();
            if (watch.getTime() > 1000)
                log.info("Gene DB search for " + searchString + " took " + watch.getTime() + " ms and found "
                        + results.size() + " genes");
            return results;
        }

        // replace * at end with % for inexact symbol search
        String inexactString = searchString;
        Pattern pattern = Pattern.compile("\\*$");
        Matcher match = pattern.matcher(inexactString);
        inexactString = match.replaceAll("%");
        // note that at this point, the inexactString might not have a wildcard - only if the user asked for it.

        String exactString = inexactString.replaceAll("%", "");

        // if the query is shortish, always do a wild card search. This gives better behavior in 'live
        // search' situations. If we do wildcards on very short queries we get too many results.
        Collection<Gene> geneSet = new HashSet<Gene>();
        if (searchString.length() <= 2) {
            // case 0: user entered a very short string. We search only for exact matches.
            geneSet.addAll(geneService.findByOfficialSymbolInexact(exactString));
        } else if (searchString.length() > 2 && inexactString.endsWith("%")) {
            // case 1: user asked for wildcard. We allow this on strings of length 3 or more.
            geneSet.addAll(geneService.findByOfficialSymbolInexact(inexactString));
        } else if (searchString.length() > 3 && searchString.length() < 6) {
            // case 2: user did not ask for a wildcard, but we add it anyway, if the string is 4 or 5 characters.
            if (!inexactString.endsWith("%")) {
                inexactString = inexactString + "%";
            }
            geneSet.addAll(geneService.findByOfficialSymbolInexact(inexactString));

        } else {
            // case 3: string is long enough, and user did not ask for wildcard.
            geneSet.addAll(geneService.findByOfficialSymbol(exactString));
        }

        /*
         * If we found a match using official symbol or name, don't bother with this
         */
        if (geneSet.isEmpty()) {
            geneSet.addAll(geneService.findByAlias(exactString));
            geneSet.addAll(geneProductService.getGenesByName(exactString));
            geneSet.addAll(geneProductService.getGenesByNcbiId(exactString));
            geneSet.addAll(bioSequenceService.getGenesByAccession(exactString));
            geneSet.addAll(bioSequenceService.getGenesByName(exactString));
            geneSet.addAll(geneService.findByEnsemblId(exactString));
        }

        watch.stop();
        if (watch.getTime() > 1000)
            log.info("Gene DB search for " + searchString + " took " + watch.getTime() + " ms and found "
                    + geneSet.size() + " genes");

        results = dbHitsToSearchResult(geneSet);
        filterByTaxon(settings, results, true);
        return results;
    }

    /**
     * Convert hits from database searches into SearchResults.
     * 
     * @param entities
     * @return
     */
    private Collection<SearchResult> dbHitsToSearchResult(Collection<? extends Object> entities) {
        return this.dbHitsToSearchResult(entities, null, null);
    }

    /**
     * Convert hits from database searches into SearchResults.
     * 
     * @param entities
     * @return
     */
    private Collection<SearchResult> dbHitsToSearchResult(Collection<? extends Object> entities, String matchText) {
        return this.dbHitsToSearchResult(entities, null, matchText);
    }

    /**
     * Convert hits from database searches into SearchResults.
     * 
     * @param entities
     * @param compassHitDerivedFrom SearchResult that these entities were derived from. For example, if you
     *        compass-searched for genes, and then used the genes to get sequences from the database, the gene is
     *        compassHitsDerivedFrom. If null, we treat this as a direct hit.
     * @param matchText TODO
     * @return
     */
    private List<SearchResult> dbHitsToSearchResult(Collection<? extends Object> entities,
            SearchResult compassHitDerivedFrom, String matchText) {
        List<SearchResult> results = new ArrayList<SearchResult>();
        for (Object e : entities) {
            if (e == null) {
                log.warn("Null search result object");
                continue;
            }
            SearchResult esr = dbHitToSearchResult(compassHitDerivedFrom, e, matchText);
            results.add(esr);
        }
        return results;
    }

    /**
     * Convert hits from database searches into SearchResults.
     * 
     * @param entities
     * @return
     */
    private Collection<SearchResult> dbHitsToSearchResult(Map<? extends Object, String> entities) {
        return this.dbHitsToSearchResult(entities, null);
    }

    /**
     * Convert hits from database searches into SearchResults.
     * 
     * @param entities
     * @param compassHitDerivedFrom SearchResult that these entities were derived from. For example, if you
     *        compass-searched for genes, and then used the genes to get sequences from the database, the gene is
     *        compassHitsDerivedFrom. If null, we treat this as a direct hit.
     * @return
     */
    private List<SearchResult> dbHitsToSearchResult(Map<? extends Object, String> entities,
            SearchResult compassHitDerivedFrom) {
        List<SearchResult> results = new ArrayList<SearchResult>();
        for (Object e : entities.keySet()) {
            SearchResult esr = dbHitToSearchResult(compassHitDerivedFrom, e, entities.get(e));
            results.add(esr);
        }
        return results;
    }

    /**
     * @param compassHitDerivedFrom
     * @param e
     * @return
     */
    private SearchResult dbHitToSearchResult(SearchResult compassHitDerivedFrom, Object e) {
        return this.dbHitToSearchResult(compassHitDerivedFrom, e, null);
    }

    /**
     * @param compassHitDerivedFrom
     * @param e
     * @return
     */
    private SearchResult dbHitToSearchResult(SearchResult compassHitDerivedFrom, Object e, String text) {
        SearchResult esr = null;
        if (compassHitDerivedFrom != null && text == null) {
            esr = new SearchResult(e, compassHitDerivedFrom.getScore() * INDIRECT_DB_HIT_PENALTY);
            esr.setHighlightedText(compassHitDerivedFrom.getHighlightedText());
        } else {
            // log.info( e + " " + text );
            esr = new SearchResult(e, 1.0, text);
        }
        return esr;
    }

    /**
     * @param parentMap
     */
    private void debugParentFetch(Map<Characteristic, Object> parentMap) {
        /*
         * This is purely debugging.
         */
        if (parentMap.size() > 0) {
            if (log.isDebugEnabled())
                log.debug("Found " + parentMap.size() + " owners for " + parentMap.keySet().size()
                        + " characteristics:");
            // int maxPrint = 10; int i = 0;
            // for ( Map.Entry<Characteristic, Object> entry : parentMap.entrySet()) {
            // if(i < maxPrint){
            // Object obj = entry.getValue();
            // Characteristic charac = entry.getKey();
            // if ( obj instanceof Auditable ) {
            // if ( log.isDebugEnabled() ) {
            // log.debug("Key: Characteristic Name: " + charac.getName() +" Characteristic Desc: " +
            // charac.getDescription() +" Characteristic Category: " + charac.getCategory() );
            // log.debug("Val: Owner Class: " + obj.getClass()
            // +" Owner Name: " + ( ( Auditable ) obj ).getName() +" Owner Desc: " + ( ( Auditable ) obj
            // ).getDescription() );
            // }
            // } else {
            // if ( log.isDebugEnabled() ) {
            // log.debug( " Owner : " + obj.toString() + " Owner Class: " + obj.getClass() );
            // }
            // }
            // i++;
            // }
            // }
        }
    }

    /**
     * Deals with the case where the user queried something like "hypothalamus AND sex" (without the quotes).
     * 
     * @param matches
     * @param parsedQuery
     * @return
     */
    private Collection<SearchResult> doCharacteristicSearchWithLogic(Map<SearchResult, String> matches,
            Query parsedQuery) {
        Collection<SearchResult> results = new HashSet<SearchResult>();
        try {

            Map<String, Collection<SearchResult>> invertedMatches = new HashMap<String, Collection<SearchResult>>();
            Directory idx = indexCharacteristicHits(matches, invertedMatches);
            IndexSearcher searcher = new IndexSearcher(idx);
            TopDocCollector hc = new TopDocCollector(MAX_IN_MEMORY_INDEX_HITS);
            searcher.search(parsedQuery, hc);

            TopDocs topDocs = hc.topDocs();

            int hitcount = topDocs.totalHits;
            log.info("Hits: " + hitcount);

            /*
             * If we got hits, it means that some of our results match... so we have to retrieve the objects.
             */

            for (int i = 0; i < hitcount; i++) {

                ScoreDoc scoreDoc = topDocs.scoreDocs[i];

                Document doc = searcher.doc(scoreDoc.doc);

                String match = doc.getField(INDEX_KEY).stringValue();
                Collection<SearchResult> resultsMatching = invertedMatches.get(match);
                if (resultsMatching != null) {
                    log.debug("All matches to '" + match + "': " + resultsMatching.size());
                    for (SearchResult searchResult : resultsMatching) {
                        results.add(searchResult);
                    }
                }
            }

        } catch (CorruptIndexException e) {
            throw new RuntimeException(e);
        } catch (LockObtainFailedException e) {
            throw new RuntimeException(e);
        } catch (IOException e) {
            throw new RuntimeException(e);

        }
        return results;
    }

    /**
     * @param settings
     * @return
     */
    private Collection<SearchResult> phenotypeSearch(SearchSettings settings) {
        Collection<SearchResult> results = this.dbHitsToSearchResult(
                this.phenotypeAssociationManagerService.searchInDatabaseForPhenotype(settings.getQuery()));
        return results;
    }

    /**
     * @param settings
     * @return
     */
    private Collection<SearchResult> experimentSetSearch(SearchSettings settings) {
        Collection<SearchResult> results = this
                .dbHitsToSearchResult(this.experimentSetService.findByName(settings.getQuery()));

        results.addAll(compassSearch(compassExperimentSet, settings));
        return results;
    }

    /**
     * A general search for expression experiments. This search does both an database search and a compass search.
     * 
     * @param settings
     * @return {@link Collection}
     */
    private Collection<SearchResult> expressionExperimentSearch(final SearchSettings settings) {
        StopWatch watch = startTiming();

        Collection<SearchResult> results = new HashSet<SearchResult>();

        if (settings.getUseDatabase()) {
            results.addAll(databaseExpressionExperimentSearch(settings));
        }

        if (results.size() == 0) {
            /*
             * User didn't put in an exact id, so they get a slower more thorough search.
             */

            if (settings.getUseIndices()) {
                results.addAll(compassExpressionSearch(settings));
            }

            // a submethod of this one (ontologySearchAnnotatedObject) takes a long time
            if (settings.getUseCharacteristics()) {
                results.addAll(characteristicExpressionExperimentSearch(settings));
            }
        }

        /*
         * Find data sets that match the platform -- TODO make this do something intelligent with GPL570 + brain.
         */
        if (results.size() == 0) {
            Collection<SearchResult> matchingPlatforms = arrayDesignSearch(settings, null);
            for (SearchResult adRes : matchingPlatforms) {
                if (adRes.getResultObject() instanceof ArrayDesign) {
                    ArrayDesign ad = (ArrayDesign) adRes.getResultObject();
                    Collection<ExpressionExperiment> expressionExperiments = this.arrayDesignService
                            .getExpressionExperiments(ad);
                    if (expressionExperiments.size() > 0)
                        results.addAll(dbHitsToSearchResult(expressionExperiments));
                }
            }
        }

        if (results.size() == 0) {
            /*
             * Search for bib refs
             */
            List<BibliographicReferenceValueObject> bibrefs = bibliographicReferenceService
                    .search(settings.getQuery());

            if (!bibrefs.isEmpty()) {
                Collection<BibliographicReference> refs = new HashSet<BibliographicReference>();
                Collection<SearchResult> r = this.compassBibliographicReferenceSearch(settings);
                for (SearchResult searchResult : r) {
                    refs.add((BibliographicReference) searchResult.getResultObject());
                }

                Map<BibliographicReference, Collection<ExpressionExperiment>> relatedExperiments = this.bibliographicReferenceService
                        .getRelatedExperiments(refs);
                for (Entry<BibliographicReference, Collection<ExpressionExperiment>> e : relatedExperiments
                        .entrySet()) {
                    results.addAll(dbHitsToSearchResult(e.getValue()));
                }
            }
        }

        watch.stop();
        if (watch.getTime() > 1000)
            log.info("Expression Experiment search for '" + settings + "' took " + watch.getTime() + " ms, "
                    + results.size() + " hits.");

        return results;
    }

    /**
     * @param query
     * @return
     */
    private Set<String> extractTerms(String query) {
        Query lquer = this.makeLuceneQuery(query);

        Set<String> rawTerms = new HashSet<String>();
        if (lquer instanceof BooleanQuery) {
            BooleanClause[] clauses = ((BooleanQuery) lquer).getClauses();
            for (BooleanClause booleanClause : clauses) {
                rawTerms.add(booleanClause.toString().replaceAll("^[\\+-]", ""));
            }
        } else if (lquer instanceof PhraseQuery) {
            rawTerms.add(((PhraseQuery) lquer).toString().replaceAll("\"", ""));
        } else if (lquer instanceof PrefixQuery) {
            rawTerms.add(((PrefixQuery) lquer).getPrefix().field());
        } else {
            rawTerms.add(query);
        }
        return rawTerms;
    }

    /**
     * @param settings
     * @param results
     * @param excludeWithoutTaxon if true: If the SearchResults have no "getTaxon" method then the results will get
     *        filtered out Results with no taxon associated will also get removed.
     */
    private void filterByTaxon(SearchSettings settings, Collection<SearchResult> results,
            boolean excludeWithoutTaxon) {
        if (settings.getTaxon() == null) {
            return;
        }
        Collection<SearchResult> toRemove = new HashSet<SearchResult>();
        Taxon t = settings.getTaxon();

        if (results == null)
            return;

        for (SearchResult sr : results) {

            Object o = sr.getResultObject();
            try {

                Taxon currentTaxon = null;

                if (o instanceof ExpressionExperiment) {
                    ExpressionExperiment ee = (ExpressionExperiment) o;
                    currentTaxon = expressionExperimentService.getTaxon(ee);

                } else if (o instanceof ExpressionExperimentSet) {
                    ExpressionExperimentSet ees = (ExpressionExperimentSet) o;
                    currentTaxon = ees.getTaxon();

                } else if (o instanceof Gene) {
                    Gene gene = (Gene) o;
                    currentTaxon = gene.getTaxon();

                } else if (o instanceof GeneSet) {
                    GeneSet geneSet = (GeneSet) o;
                    currentTaxon = geneSetService.getTaxon(geneSet);

                } else if (o instanceof CharacteristicValueObject) {
                    CharacteristicValueObject charVO = (CharacteristicValueObject) o;
                    currentTaxon = taxonDao.findByCommonName(charVO.getTaxon());

                } else {
                    Method m = o.getClass().getMethod("getTaxon", new Class[] {});
                    currentTaxon = (Taxon) m.invoke(o, new Object[] {});
                }

                if (currentTaxon == null || !currentTaxon.getId().equals(t.getId())) {
                    if (currentTaxon == null) {
                        // Sanity check for bad data in db (could happen if EE has no samples). Can happen that
                        // searchResults have a vaild getTaxon method
                        // but the method returns null (shouldn't make it this far)
                        log.debug("Object has getTaxon method but it returns null. Obj is: " + o);
                    }
                    toRemove.add(sr);
                }
            } catch (SecurityException e) {
                throw new RuntimeException(e);
            } catch (NoSuchMethodException e) {
                /*
                 * In case of a programming error where the results don't have a taxon at all, we assume we should
                 * filter them out but issue a warning.
                 */
                if (excludeWithoutTaxon) {
                    toRemove.add(sr);
                    log.warn("No getTaxon method for: " + o.getClass() + ".  Filtering from results. Error was: "
                            + e);
                }

            } catch (IllegalArgumentException e) {
                throw new RuntimeException(e);
            } catch (IllegalAccessException e) {
                throw new RuntimeException(e);
            } catch (InvocationTargetException e) {
                throw new RuntimeException(e);
            }
        }
        results.removeAll(toRemove);
    }

    /**
     * @param clazz
     * @param characteristic2entity
     * @return
     */
    private Collection<SearchResult> filterCharacteristicOwnersByClass(Collection<Class<?>> classes,
            Map<Characteristic, Object> characteristic2entity) {

        Collection<BioMaterial> biomaterials = new HashSet<BioMaterial>();
        Collection<FactorValue> factorValues = new HashSet<FactorValue>();
        Collection<SearchResult> results = new HashSet<SearchResult>();
        for (Characteristic c : characteristic2entity.keySet()) {
            Object o = characteristic2entity.get(c);
            for (Class<?> clazz : classes) {
                if (clazz.isAssignableFrom(o.getClass())) {
                    String matchedText = c.getValue();

                    if (o instanceof BioMaterial) {
                        biomaterials.add((BioMaterial) o);

                    } else if (o instanceof FactorValue) {
                        factorValues.add((FactorValue) o);
                    } else {

                        if (c instanceof VocabCharacteristic && ((VocabCharacteristic) c).getValueUri() != null) {
                            matchedText = "Ontology term: <a href=\"/Gemma/searcher.html?query="
                                    + ((VocabCharacteristic) c).getValueUri() + "\">" + matchedText + "</a>";
                        }
                        results.add(new SearchResult(o, 1.0, matchedText));
                    }
                }
            }
        }

        if (factorValues.size() > 0) {
            Collection<ExpressionExperiment> ees = expressionExperimentService.findByFactorValues(factorValues);
            for (ExpressionExperiment ee : ees) {
                if (log.isDebugEnabled())
                    log.debug(ee);
                results.add(new SearchResult(ee, INDIRECT_DB_HIT_PENALTY, "Factor characteristic"));
            }
        }

        if (biomaterials.size() > 0) {
            Collection<ExpressionExperiment> ees = expressionExperimentService.findByBioMaterials(biomaterials);
            for (ExpressionExperiment ee : ees) {
                results.add(new SearchResult(ee, INDIRECT_DB_HIT_PENALTY, "BioMaterial characteristic"));
            }
        }
        return results;
    }

    /**
     * Combines compass style search, the db style search, and the compositeSequence search and returns 1 combined list
     * with no duplicates.
     * 
     * @param searchSettings
     * @param returnOnDbHit if true and if there is a match for a gene from the database, return immediately
     * @return
     * @throws Exception
     */
    private Collection<SearchResult> geneSearch(final SearchSettings settings, boolean returnOnDbHit) {

        StopWatch watch = startTiming();

        String searchString = settings.getQuery();

        Collection<SearchResult> geneDbList = databaseGeneSearch(settings);

        if (returnOnDbHit && geneDbList.size() > 0) {
            return geneDbList;
        }

        Set<SearchResult> combinedGeneList = new HashSet<SearchResult>();
        combinedGeneList.addAll(geneDbList);

        Collection<SearchResult> geneCompassList = compassGeneSearch(settings);
        combinedGeneList.addAll(geneCompassList);

        if (combinedGeneList.size() == 0) {
            Collection<SearchResult> geneCsList = databaseCompositeSequenceSearch(settings);
            for (SearchResult res : geneCsList) {
                if (res.getResultClass().isAssignableFrom(Gene.class))
                    combinedGeneList.add(res);
            }
        }

        // filterByTaxon( settings, combinedGeneList); // compass doesn't return filled gene objects, just ids, so do
        // this after objects have been filled

        if (watch.getTime() > 1000)
            log.info("Gene search for " + searchString + " took " + watch.getTime() + " ms; "
                    + combinedGeneList.size() + " results.");
        return combinedGeneList;
    }

    /**
     * @param settings
     * @return
     */
    private Collection<SearchResult> geneSetSearch(SearchSettings settings) {
        Collection<SearchResult> hits;
        if (settings.getTaxon() != null) {
            hits = this
                    .dbHitsToSearchResult(this.geneSetService.findByName(settings.getQuery(), settings.getTaxon()));
        } else {
            hits = this.dbHitsToSearchResult(this.geneSetService.findByName(settings.getQuery()));
        }

        hits.addAll(compassSearch(compassGeneSet, settings));
        return hits;
    }

    /**
     * Given classes to search and characteristics,
     * 
     * @param classes Which classes of entities to look for
     * @param cs
     * @return
     */
    private Collection<SearchResult> getAnnotatedEntities(Collection<Class<?>> classes,
            Collection<Characteristic> cs) {

        Map<Characteristic, Object> characterstic2entity = characteristicService.getParents(classes, cs);
        Collection<SearchResult> matchedEntities = filterCharacteristicOwnersByClass(classes, characterstic2entity);

        if (log.isDebugEnabled()) {
            debugParentFetch(characterstic2entity);
        }
        return matchedEntities;
    }

    /**
     * @param searchResults
     * @return List of ids for the entities held by the search results.
     */
    private List<Long> getIds(List<SearchResult> searchResults) {
        List<Long> list = new ArrayList<Long>();
        for (SearchResult ee : searchResults) {
            list.add(ee.getId());
        }
        return list;
    }

    /**
     * @param hits
     * @return
     */
    private Collection<SearchResult> getSearchResults(CompassHits hits) {
        StopWatch timer = new StopWatch();
        timer.start();
        Collection<SearchResult> results = new HashSet<SearchResult>();
        /*
         * Note that hits come in decreasing score order.
         */
        for (int i = 0, len = Math.min(MAX_LUCENE_HITS, hits.getLength()); i < len; i++) {

            SearchResult r = new SearchResult(hits.data(i));

            /*
             * Always give compass hits a lower score so they can be differentiated from exact database hits.
             */
            r.setScore(new Double(hits.score(i) * COMPASS_HIT_SCORE_PENALTY_FACTOR));

            getHighlightedText(hits, i, r);

            if (log.isDebugEnabled())
                log.debug(i + " " + hits.score(i) + " " + r);

            results.add(r);
        }

        if (timer.getTime() > 100) {
            log.info(results.size() + " hits retrieved (out of " + Math.min(MAX_LUCENE_HITS, hits.getLength())
                    + " raw hits tested) in " + timer.getTime() + "ms");
        }
        if (timer.getTime() > 5000) {
            log.info("****Extremely long Lucene Search processing! " + results.size() + " hits retrieved (out of "
                    + Math.min(MAX_LUCENE_HITS, hits.getLength()) + " raw hits tested) in " + timer.getTime()
                    + "ms");
        }

        return results;
    }

    /**
     * @param hits
     * @param i
     * @param r
     */
    private void getHighlightedText(CompassHits hits, int i, SearchResult r) {
        CompassHighlightedText highlightedText = hits.highlightedText(i);
        if (highlightedText != null && highlightedText.getHighlightedText() != null) {
            r.setHighlightedText(highlightedText.getHighlightedText());
        } else {
            if (log.isDebugEnabled())
                log.debug("No highlighted text for " + r);
        }
    }

    /**
     * @param settings
     * @param results
     * @param rawResults
     * @param fillObjects
     */
    private Map<Class<?>, List<SearchResult>> getSortedLimitedResults(SearchSettings settings,
            List<SearchResult> rawResults, boolean fillObjects) {

        Map<Class<?>, List<SearchResult>> results = new HashMap<Class<?>, List<SearchResult>>();
        Collections.sort(rawResults);

        results.put(ArrayDesign.class, new ArrayList<SearchResult>());
        results.put(BioSequence.class, new ArrayList<SearchResult>());
        results.put(BibliographicReference.class, new ArrayList<SearchResult>());
        results.put(CompositeSequence.class, new ArrayList<SearchResult>());
        results.put(ExpressionExperiment.class, new ArrayList<SearchResult>());
        results.put(Gene.class, new ArrayList<SearchResult>());
        results.put(GeneSet.class, new ArrayList<SearchResult>());
        results.put(ExpressionExperimentSet.class, new ArrayList<SearchResult>());
        results.put(Characteristic.class, new ArrayList<SearchResult>());
        results.put(CharacteristicValueObject.class, new ArrayList<SearchResult>());

        /*
         * Get the top N results, overall (NOT within each class - experimental.)
         */
        for (int i = 0, limit = Math.min(rawResults.size(), settings.getMaxResults()); i < limit; i++) {
            SearchResult sr = rawResults.get(i);

            /*
             * FIXME This is unpleasant and should be removed when BioSequences are correctly detached.
             */
            Class<? extends Object> resultClass = EntityUtils.getImplementationForProxy(sr.getResultObject())
                    .getClass();

            resultClass = ReflectionUtil.getBaseForImpl(resultClass);

            // Class<? extends Object> resultClass = sr.getResultClass();
            assert results.containsKey(resultClass) : "Unknown class " + resultClass;
            results.get(resultClass).add(sr);
        }

        if (fillObjects) {
            /*
             * Now retrieve the entities and put them in the SearchResult. Entities that are filtered out by the
             * SecurityInterceptor will be removed at this stage.
             */
            for (Class<? extends Object> clazz : results.keySet()) {
                List<SearchResult> r = results.get(clazz);
                if (r.size() == 0)
                    continue;
                Map<Long, SearchResult> rMap = new HashMap<Long, SearchResult>();
                for (SearchResult searchResult : r) {
                    if (!rMap.containsKey(searchResult.getId())
                            || (rMap.get(searchResult.getId()).getScore() < searchResult.getScore())) {
                        rMap.put(searchResult.getId(), searchResult);
                    }
                }

                Collection<? extends Object> entities = retrieveResultEntities(clazz, r);
                List<SearchResult> filteredResults = new ArrayList<SearchResult>();
                for (Object entity : entities) {
                    Long id = EntityUtils.getId(entity);
                    SearchResult keeper = rMap.get(id);
                    keeper.setResultObject(entity);
                    filteredResults.add(keeper);
                }

                filterByTaxon(settings, filteredResults, false);

                results.put(clazz, filteredResults);

            }
        } else {
            for (SearchResult sr : rawResults) {
                sr.setResultObject(null);
            }
        }

        List<SearchResult> convertedResults = convertEntitySearchResutsToValueObjectsSearchResults(
                results.get(BioSequence.class));
        results.put(BioSequenceValueObject.class, convertedResults);
        results.remove(BioSequence.class);

        return results;
    }

    /**
     * @param matches
     * @param invertedMatches
     * @return
     * @throws CorruptIndexException
     * @throws LockObtainFailedException
     * @throws IOException
     */
    private RAMDirectory indexCharacteristicHits(Map<SearchResult, String> matches,
            Map<String, Collection<SearchResult>> invertedMatches)
            throws CorruptIndexException, LockObtainFailedException, IOException {
        /*
         * make in in-memory index. See http://javatechniques.com/blog/lucene-in-memory-text-search-engine (somewhat out
         * of date); maybe there is an easier way
         */
        RAMDirectory idx = new RAMDirectory();
        IndexWriter writer = new IndexWriter(idx, this.analyzer, true, MaxFieldLength.LIMITED);

        for (SearchResult o : matches.keySet()) {
            String text = matches.get(o);
            if (!invertedMatches.containsKey(text)) {
                invertedMatches.put(text, new HashSet<SearchResult>());
                writer.addDocument(createDocument(text));
            }
            invertedMatches.get(text).add(o);
        }

        writer.close();

        return idx;
    }

    /**
     * Turn query into a Lucene query.
     * 
     * @param query
     * @return
     */
    private Query makeLuceneQuery(String query) {
        QueryParser parser = new QueryParser(INDEX_KEY, this.analyzer);
        QueryParser.Operator defaultOperator = null;
        String sDefaultOperator = ConfigUtils.getDefaultSearchOperator();
        if (sDefaultOperator.equalsIgnoreCase(("or"))) {
            defaultOperator = QueryParser.OR_OPERATOR;
        } else if (sDefaultOperator.equalsIgnoreCase(("and"))) {
            defaultOperator = QueryParser.AND_OPERATOR;
        } else {
            throw new IllegalArgumentException(
                    "Unknown defaultOperator: " + sDefaultOperator + ", only OR and AND are supported");
        }
        parser.setDefaultOperator(defaultOperator);
        Query parsedQuery;
        try {
            parsedQuery = parser.parse(query);
        } catch (ParseException e) {
            throw new RuntimeException("Cannot parse query: " + e.getMessage());
        }
        return parsedQuery;
    }

    /**
     * Attempts to find an exact match for the search term in the characteristic table (by value and value URI). If the
     * search term is found then uses that URI to find the parents and returns them as SearchResults.
     * 
     * @param classes
     * @param searchString
     * @return
     */
    private Collection<SearchResult> ontologySearchAnnotatedObject(Collection<Class<?>> classes,
            SearchSettings settings) {

        /*
         * Direct search.
         */
        Collection<SearchResult> results = new HashSet<SearchResult>();

        /*
         * Include children in ontologies, if any. This can be slow if there are a lot of children.
         */
        Collection<SearchResult> childResults = characteristicSearchWithChildren(classes, settings);

        results.addAll(childResults);

        return results;

    }

    /**
     * If necessary, screen the results for the logic requested by the user. Thus, "sex AND hypothalamus" will return
     * only results that have both terms associated with them.
     * 
     * @param query
     * @param unprocessedResults
     * @param matches Map of SearchResult to the matching String (the characteristic value, basically)
     * @return
     */
    private Collection<SearchResult> postProcessCharacteristicResults(String query,
            Collection<SearchResult> unprocessedResults, Map<SearchResult, String> matches) {
        Query parsedQuery = makeLuceneQuery(query);
        return doCharacteristicSearchWithLogic(matches, parsedQuery);
    }

    /**
     * Retrieve entities from the persistent store.
     * 
     * @param entityClass
     * @param results
     * @return
     */
    private Collection<? extends Object> retrieveResultEntities(Class<?> entityClass, List<SearchResult> results) {
        List<Long> ids = getIds(results);
        if (ExpressionExperiment.class.isAssignableFrom(entityClass)) {
            return expressionExperimentService.loadMultiple(ids);
        } else if (ArrayDesign.class.isAssignableFrom(entityClass)) {
            return arrayDesignService.loadMultiple(ids);
        } else if (CompositeSequence.class.isAssignableFrom(entityClass)) {
            return compositeSequenceService.loadMultiple(ids);
        } else if (BibliographicReference.class.isAssignableFrom(entityClass)) {
            return bibliographicReferenceService.loadMultiple(ids);
        } else if (Gene.class.isAssignableFrom(entityClass)) {
            return geneService.loadMultiple(ids);
        } else if (BioSequence.class.isAssignableFrom(entityClass)) {
            Collection<BioSequence> bs = bioSequenceService.loadMultiple(ids);
            return bs;
        } else if (GeneSet.class.isAssignableFrom(entityClass)) {
            return geneSetService.load(ids);
        } else if (ExpressionExperimentSet.class.isAssignableFrom(entityClass)) {
            return experimentSetService.load(ids);
        } else if (Characteristic.class.isAssignableFrom(entityClass)) {
            Collection<Characteristic> chars = new ArrayList<Characteristic>();
            for (Long id : ids) {
                chars.add(characteristicService.load(id));
            }
            return chars;
        } else if (CharacteristicValueObject.class.isAssignableFrom(entityClass)) {
            // TEMP HACK this whole method should not be needed in many cases
            Collection<CharacteristicValueObject> chars = new ArrayList<CharacteristicValueObject>();
            for (SearchResult result : results) {
                if (result.getResultClass().isAssignableFrom(CharacteristicValueObject.class)) {
                    chars.add((CharacteristicValueObject) result.getResultObject());
                }
            }
            return chars;
        } else if (ExpressionExperimentSet.class.isAssignableFrom(entityClass)) {
            return experimentSetService.load(ids);
        } else {
            throw new UnsupportedOperationException("Don't know how to retrieve objects for class=" + entityClass);
        }
    }

    private StopWatch startTiming() {
        StopWatch watch = new StopWatch();
        watch.start();
        return watch;
    }

    /**
     * Makes no attempt at resolving the search query as a URI. Will tokenize the search query if there are control
     * characters in the String. URI's will get parsed into multiple query terms and lead to bad results.
     * 
     * @param settings Will try to resolve general terms like brain --> to appropriate OntologyTerms and search for
     *        objects tagged with those terms (if isUseCharacte = true)
     * @param fillObjects If false, the entities will not be filled in inside the searchsettings; instead, they will be
     *        nulled (for security purposes). You can then use the id and Class stored in the SearchSettings to load the
     *        entities at your leisure. If true, the entities are loaded in the usual secure fashion. Setting this to
     *        false can be an optimization if all you need is the id. Note: filtering by taxon will not be done unless
     *        objects are filled
     * @param webSpeedSearch if true, this call is probably coming from a web app combo box and results will be limited to
     *         improve speed
     * @return
     */
    protected Map<Class<?>, List<SearchResult>> generalSearch(SearchSettings settings, boolean fillObjects,
            boolean webSpeedSearch) {
        String searchString = QueryParser.escape(StringUtils.strip(settings.getQuery()));

        if (settings.getTaxon() == null) {

            // split the query around whitespace characters, limit the splitting to 4 terms (may be excessive)
            String[] searchTerms = searchString.split("\\s+", 4);
            for (int i = 0; i < searchTerms.length; i++) {
                searchTerms[i] = searchTerms[i].toLowerCase();
            }
            List<String> searchTermsList = Arrays.asList(searchTerms);

            // this Set is ordered by insertion order(LinkedHashMap)
            Set<String> keywords = nameToTaxonMap.keySet();

            // only strip out taxon terms if there is more than one search term in query and if the entire search string
            // is not itself a keyword
            if (searchTerms.length > 1 && !keywords.contains(searchString.toLowerCase())) {

                for (String keyword : keywords) {

                    int termIndex = searchString.toLowerCase().indexOf(keyword);
                    // make sure that the keyword occurs in the searchString
                    if (termIndex != -1) {
                        // make sure that either the keyword is multi-term or that it occurs as a single term(not as
                        // part of another word)
                        if (keyword.contains(" ") || searchTermsList.contains(keyword)) {
                            searchString = searchString.replaceFirst("(?i)" + keyword, "").trim();
                            settings.setTaxon(nameToTaxonMap.get(keyword));
                            // break on first term found in keywords since they should be(more or less) ordered by
                            // precedence
                            break;
                        }
                    }

                }

            }

        }

        String[] searchTerms = searchString.split("\\s+");

        // some strings of size 1 cause lucene to barf and they were slipping through in multi-term queries, get rid of
        // them
        if (searchTerms.length > 0) {
            searchString = "";
            for (String sTerm : searchTerms) {
                if (sTerm.length() > 1) {
                    searchString = searchString + " " + sTerm;
                }
            }
            searchString = searchString.trim();
        }

        settings.setQuery(searchString);

        // If nothing to search return nothing.
        if (StringUtils.isBlank(searchString)) {
            return new HashMap<Class<?>, List<SearchResult>>();
        }

        List<SearchResult> rawResults = new ArrayList<SearchResult>();

        if (settings.getSearchExperiments()) {
            Collection<SearchResult> foundEEs = expressionExperimentSearch(settings);
            rawResults.addAll(foundEEs);
        }

        Collection<SearchResult> genes = null;
        if (settings.getSearchGenes()) {
            genes = geneSearch(settings, webSpeedSearch);
            accreteResults(rawResults, genes);
        }

        // SearchSettings persistent entity does not contain a usePhenotypes property that these logic requires
        /*
         * if ( settings.getUsePhenotypes() && settings.getSearchGenes() ) {
         * 
         * Collection<SearchResult> phenotypeGenes = dbHitsToSearchResult(
         * geneSearchService.getPhenotypeAssociatedGenes( searchString, settings.getTaxon() ),
         * "From phenotype association" ); accreteResults( rawResults, phenotypeGenes ); }
         */

        Collection<SearchResult> compositeSequences = null;
        if (settings.getSearchProbes()) {
            compositeSequences = compositeSequenceSearch(settings);
            accreteResults(rawResults, compositeSequences);
        }

        if (settings.getSearchPlatforms()) {
            Collection<SearchResult> foundADs = arrayDesignSearch(settings, compositeSequences);
            accreteResults(rawResults, foundADs);
        }

        if (settings.getSearchBioSequences()) {
            Collection<SearchResult> bioSequences = bioSequenceSearch(settings, genes);
            accreteResults(rawResults, bioSequences);
        }

        if (settings.getUseGo()) {
            Collection<SearchResult> ontologyGenes = dbHitsToSearchResult(
                    geneSearchService.getGOGroupGenes(searchString, settings.getTaxon()), "From GO group");
            accreteResults(rawResults, ontologyGenes);
        }

        if (settings.getSearchBibrefs()) {
            Collection<SearchResult> bibliographicReferences = compassBibliographicReferenceSearch(settings);
            accreteResults(rawResults, bibliographicReferences);
        }

        if (settings.getSearchGeneSets()) {
            Collection<SearchResult> geneSets = geneSetSearch(settings);
            accreteResults(rawResults, geneSets);
        }

        if (settings.getSearchExperimentSets()) {
            Collection<SearchResult> experimentSets = experimentSetSearch(settings);
            accreteResults(rawResults, experimentSets);
        }

        if (settings.getSearchPhenotypes()) {
            Collection<SearchResult> phenotypes = phenotypeSearch(settings);
            accreteResults(rawResults, phenotypes);
        }
        Map<Class<?>, List<SearchResult>> sortedLimitedResults = getSortedLimitedResults(settings, rawResults,
                fillObjects);

        log.info("search for: " + settings.getQuery() + " " + rawResults.size()
                + " raw results (final tally may be filtered)");

        return sortedLimitedResults;
    }

    /**
     * Runs inside Compass transaction
     * 
     * @param query
     * @param session
     * @return
     */
    Collection<SearchResult> performSearch(SearchSettings settings, CompassSession session) {
        StopWatch watch = startTiming();

        String query = settings.getQuery().trim();
        // Search results should contain all the words from the query.
        query = query.replaceAll("\\s+", " AND ");

        if (StringUtils.isBlank(query) || query.length() < MINIMUM_STRING_LENGTH_FOR_FREE_TEXT_SEARCH
                || query.equals("*"))
            return new ArrayList<SearchResult>();

        CompassQuery compassQuery = session.queryBuilder().queryString(query).toQuery();
        CompassHits hits = compassQuery.hits();

        watch.stop();
        if (watch.getTime() > 100) {
            log.info("Getting " + hits.getLength() + " lucene hits for " + query + " took " + watch.getTime()
                    + " ms");
        }
        if (watch.getTime() > 5000) {
            log.info("*****Extremely long Lucene Index Search!  " + hits.getLength() + " lucene hits for " + query
                    + " took " + watch.getTime() + " ms");
        }

        return getSearchResults(hits);
    }

    @Override
    public Map<Class<?>, List<SearchResult>> searchForNewlyCreatedUserQueryResults(UserQuery query) {

        // TODO set last run time for query at end of this method? maybe do that outside this method

        Map<Class<?>, List<SearchResult>> searchResults;
        Map<Class<?>, List<SearchResult>> finalResults = new HashMap<Class<?>, List<SearchResult>>();
        ;

        SearchSettings settings = query.getSearchSettings();

        if (StringUtils.isBlank(settings.getTermUri()) && !settings.getQuery().startsWith("http://")) {
            // fill objects=ture, speedySearch=false
            searchResults = generalSearch(settings, true, false);
        } else {
            // we only attempt an ontology search if the uri looks remotely like a url.
            searchResults = ontologyUriSearch(settings);
        }

        if (searchResults != null) {
            for (Class<?> clazz : searchResults.keySet()) {

                List<SearchResult> results = searchResults.get(clazz);

                List<SearchResult> updatedResults = new ArrayList<SearchResult>();

                if (results.size() == 0)
                    continue;

                log.info("Search for newly createdQuery with settings: " + settings + "; result: " + results.size()
                        + " " + clazz.getSimpleName() + "s");

                for (SearchResult sr : results) {

                    // Are SearchResults always auditable? maybe put in some error handling in case they are not or
                    // enforce searchSettings object to be of a certain form
                    Auditable auditableResult = (Auditable) sr.getResultObject();

                    // this list is ordered by date (not descending)
                    List<AuditEvent> eventList = auditTrailService.getEvents(auditableResult);

                    if (eventList == null || eventList.isEmpty())
                        continue;

                    for (AuditEvent ae : eventList) {

                        // assuming there is only one create event
                        if (ae.getAction() == AuditAction.CREATE && ae.getDate().after(query.getLastUsed())) {
                            updatedResults.add(sr);
                            break;
                        }

                    }

                }

                if (!updatedResults.isEmpty()) {
                    finalResults.put(clazz, updatedResults);
                }

            }
        }

        return finalResults;

    }

}