Java tutorial
/* * The Gemma project * * Copyright (c) 2006 University of British Columbia * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package ubic.gemma.core.search; import net.sf.ehcache.Cache; import net.sf.ehcache.CacheException; import net.sf.ehcache.CacheManager; import net.sf.ehcache.Element; import org.apache.commons.lang3.StringEscapeUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.StopWatch; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.compass.core.*; import org.compass.core.mapping.CompassMapping; import org.compass.core.mapping.Mapping; import org.compass.core.mapping.ResourceMapping; import org.compass.core.mapping.osem.ClassMapping; import org.compass.core.mapping.osem.ComponentMapping; import org.compass.core.spi.InternalCompassSession; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.stereotype.Service; import ubic.basecode.ontology.model.OntologyIndividual; import ubic.basecode.ontology.model.OntologyTerm; import ubic.basecode.util.BatchIterator; import ubic.gemma.core.annotation.reference.BibliographicReferenceService; import ubic.gemma.core.association.phenotype.PhenotypeAssociationManagerService; import ubic.gemma.core.genome.gene.service.GeneSearchService; import ubic.gemma.core.genome.gene.service.GeneService; import ubic.gemma.core.genome.gene.service.GeneSetService; import ubic.gemma.core.ontology.OntologyService; import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; import ubic.gemma.model.common.Auditable; import ubic.gemma.model.common.auditAndSecurity.AuditAction; import ubic.gemma.model.common.auditAndSecurity.AuditEvent; import ubic.gemma.model.common.auditAndSecurity.UserQuery; import ubic.gemma.model.common.description.BibliographicReference; import ubic.gemma.model.common.description.BibliographicReferenceValueObject; import ubic.gemma.model.common.description.Characteristic; import ubic.gemma.model.common.search.SearchSettings; import ubic.gemma.model.common.search.SearchSettingsImpl; import ubic.gemma.model.common.search.SearchSettingsValueObject; import ubic.gemma.model.expression.BlacklistedEntity; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.arrayDesign.BlacklistedPlatform; import ubic.gemma.model.expression.biomaterial.BioMaterial; import ubic.gemma.model.expression.designElement.CompositeSequence; import ubic.gemma.model.expression.experiment.BlacklistedExperiment; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.model.expression.experiment.FactorValue; import ubic.gemma.model.genome.Gene; import ubic.gemma.model.genome.Taxon; import ubic.gemma.model.genome.biosequence.BioSequence; import ubic.gemma.model.genome.gene.GeneSet; import ubic.gemma.model.genome.gene.phenotype.valueObject.CharacteristicValueObject; import ubic.gemma.model.genome.gene.phenotype.valueObject.GeneEvidenceValueObject; import ubic.gemma.model.genome.sequenceAnalysis.BioSequenceValueObject; import ubic.gemma.persistence.service.common.auditAndSecurity.AuditTrailService; import ubic.gemma.persistence.service.common.description.CharacteristicService; import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService; import ubic.gemma.persistence.service.expression.designElement.CompositeSequenceService; import ubic.gemma.persistence.service.expression.experiment.BlacklistedEntityDao; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentSetService; import ubic.gemma.persistence.service.genome.biosequence.BioSequenceService; import ubic.gemma.persistence.service.genome.gene.GeneProductService; import ubic.gemma.persistence.service.genome.taxon.TaxonDao; import ubic.gemma.persistence.util.CacheUtils; import ubic.gemma.persistence.util.EntityUtils; import ubic.gemma.persistence.util.Settings; import javax.annotation.PostConstruct; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.util.*; import java.util.Map.Entry; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * This service is used for performing searches using free text or exact matches to items in the database. * <h2>Implementation notes</h2> * Internally, there are generally two kinds of searches performed, precise database searches looking for exact matches * in the database and compass/lucene searches which look for matches in the stored index. * To add more dependencies to this Service edit the applicationContext-search.xml * * @author klc * @author paul * @author keshav */ @Service public class SearchServiceImpl implements SearchService { /** * Penalty applied to all 'index' hits. */ private static final double COMPASS_HIT_SCORE_PENALTY_FACTOR = 0.5; /** * Penalty applied to scores on hits for entities that derive from an association. For example, if a hit to an EE * came from text associated with one of its biomaterials, * the score is penalized by this amount (or, this is just the actual score used) */ private static final double INDIRECT_DB_HIT_PENALTY = 0.8; private static final Log log = LogFactory.getLog(SearchServiceImpl.class.getName()); /** * The maximum number of characteristics to retain; this has to be fairly high since a large number of * characteristics * will typically reduced down to a smaller number of annotated entities. */ private static final int MAX_CHARACTERISTIC_SEARCH_RESULTS = 10000; private static final int MAX_LUCENE_HITS = 3000; private static final int MINIMUM_EE_QUERY_LENGTH = 3; private static final int MINIMUM_STRING_LENGTH_FOR_FREE_TEXT_SEARCH = 2; private static final String NCBI_GENE = "ncbi_gene"; /** * How long after creation before an object is evicted, no matter what (seconds) */ private static final int ONTOLOGY_CACHE_TIME_TO_DIE = 10000; /** * How long an item in the cache lasts when it is not accessed. */ private static final int ONTOLOGY_CACHE_TIME_TO_IDLE = 3600; private static final String ONTOLOGY_CHILDREN_CACHE_NAME = "OntologyChildrenCache"; /** * How many term children can stay in memory */ private static final int ONTOLOGY_INFO_CACHE_SIZE = 30000; /** * If fewer than this number of experiments are returned from the a search of experiment characteristics, then * search for experiments indirectly as well (ex: by finding bioMaterials tagged with the characteristics and * getting the experiments associated with them ). See also MAX_CHARACTERISTIC_SEARCH_RESULTS. */ private static final int SUFFICIENT_EXPERIMENT_RESULTS_FROM_CHARACTERISTICS = 10000; private final HashMap<String, Taxon> nameToTaxonMap = new LinkedHashMap<>(); @Autowired private ArrayDesignService arrayDesignService; @Autowired private AuditTrailService auditTrailService; @Autowired private BibliographicReferenceService bibliographicReferenceService; @Autowired private BioSequenceService bioSequenceService; @Autowired private CacheManager cacheManager; @Autowired private BlacklistedEntityDao blackListDao; @Autowired private CharacteristicService characteristicService; private Cache childTermCache; @Autowired @Qualifier("compassArray") private Compass compassArray; @Autowired @Qualifier("compassBibliographic") private Compass compassBibliographic; @Autowired @Qualifier("compassBiosequence") private Compass compassBiosequence; @Autowired @Qualifier("compassExperimentSet") private Compass compassExperimentSet; @Autowired @Qualifier("compassExpression") private Compass compassExpression; @Autowired @Qualifier("compassGene") private Compass compassGene; @Autowired @Qualifier("compassGeneSet") private Compass compassGeneSet; @Autowired @Qualifier("compassProbe") private Compass compassProbe; @Autowired private CompositeSequenceService compositeSequenceService; @Autowired private ExpressionExperimentSetService experimentSetService; @Autowired private ExpressionExperimentService expressionExperimentService; @Autowired private GeneProductService geneProductService; @Autowired private GeneSearchService geneSearchService; @Autowired private GeneService geneService; @Autowired private GeneSetService geneSetService; @Autowired private OntologyService ontologyService; @Autowired private PhenotypeAssociationManagerService phenotypeAssociationManagerService; @Autowired private TaxonDao taxonDao; @Override public Map<Class<?>, List<SearchResult>> ajaxSearch(SearchSettingsValueObject settingsValueObject) { SearchSettings settings = SearchSettingsValueObject.toEntity(settingsValueObject); return this.search(settings); } @Override public Map<Class<?>, List<SearchResult>> search(SearchSettings settings) { Map<Class<?>, List<SearchResult>> searchResults = new HashMap<>(); try { searchResults = this.search(settings, true, false); } catch (org.compass.core.engine.SearchEngineQueryParseException qpe) { SearchServiceImpl.log.error("Query parse Error: " + settings + "; message=" + qpe.getMessage(), qpe); } catch (Exception e) { SearchServiceImpl.log.error("Search error on settings: " + settings + "; message=" + e.getMessage(), e); } return searchResults; } @Override public Map<Class<?>, List<SearchResult>> speedSearch(SearchSettings settings) { Map<Class<?>, List<SearchResult>> searchResults = new HashMap<>(); try { searchResults = this.search(settings, true, true); } catch (org.compass.core.engine.SearchEngineQueryParseException qpe) { SearchServiceImpl.log.error("Query parse Error: " + settings + "; message=" + qpe.getMessage(), qpe); } catch (Exception e) { SearchServiceImpl.log.error("Search error on settings: " + settings + "; message=" + e.getMessage(), e); } return searchResults; } @Override public Map<Class<?>, List<SearchResult>> search(SearchSettings settings, boolean fillObjects, boolean webSpeedSearch) { if (StringUtils.isBlank(settings.getTermUri()) && !settings.getQuery().startsWith("http://")) { return this.generalSearch(settings, fillObjects, webSpeedSearch); } // we only attempt an ontology search if the uri looks remotely like a url. return this.ontologyUriSearch(settings); } @Override public Collection<Long> searchExpressionExperiments(String query, Long taxonId) { Taxon taxon = null; if (taxonId != null) { taxon = taxonDao.load(taxonId); } Collection<Long> eeIds = new HashSet<>(); if (StringUtils.isNotBlank(query)) { if (query.length() < SearchServiceImpl.MINIMUM_EE_QUERY_LENGTH) return eeIds; // Initial list List<SearchResult> results = this .search(SearchSettingsImpl.expressionExperimentSearch(query, taxon), false, false) .get(ExpressionExperiment.class); for (SearchResult result : results) { eeIds.add(result.getId()); } // // Filter by taxon // if ( taxon != null ) { // // very inefficient. // eeIds.retainAll( EntityUtils.getIds( expressionExperimentService.findByTaxon( taxon ) ) ); // } } else if (taxonId != null) { // can return many, many results ... so we put a limit. eeIds = EntityUtils.getIds(expressionExperimentService.findByTaxon(taxon, MAX_LUCENE_HITS)); } return eeIds; } @SuppressWarnings("unchecked") @Override public <T> List<T> search(SearchSettings settings, Class<T> resultClass) { Map<Class<?>, List<SearchResult>> searchResults = this.search(settings); List<T> resultObjects = new ArrayList<>(); List<SearchResult> searchResultObjects = searchResults.get(resultClass); if (searchResultObjects == null) return resultObjects; for (SearchResult sr : searchResultObjects) { resultObjects.add((T) sr.getResultObject()); } return resultObjects; } @Override public Map<Class<?>, List<SearchResult>> searchForNewlyCreatedUserQueryResults(UserQuery query) { Map<Class<?>, List<SearchResult>> searchResults; Map<Class<?>, List<SearchResult>> finalResults = new HashMap<>(); SearchSettings settings = query.getSearchSettings(); if (StringUtils.isBlank(settings.getTermUri()) && !settings.getQuery().startsWith("http://")) { // fill objects=true, speedySearch=false searchResults = this.generalSearch(settings, true, false); } else { // we only attempt an ontology search if the uri looks remotely like a url. searchResults = this.ontologyUriSearch(settings); } if (searchResults == null) { return finalResults; } for (Class<?> clazz : searchResults.keySet()) { List<SearchResult> results = searchResults.get(clazz); List<SearchResult> updatedResults = new ArrayList<>(); if (results.size() == 0) continue; SearchServiceImpl.log.info("Search for newly createdQuery with settings: " + settings + "; result: " + results.size() + " " + clazz.getSimpleName() + "s"); for (SearchResult sr : results) { // Are SearchResults always auditable? maybe put in some error handling in case they are not or // enforce searchSettings object to be of a certain form Auditable auditableResult = (Auditable) sr.getResultObject(); // this list is ordered by date (not descending) List<AuditEvent> eventList = auditTrailService.getEvents(auditableResult); if (eventList == null || eventList.isEmpty()) continue; for (AuditEvent ae : eventList) { // assuming there is only one create event if (ae.getAction() == AuditAction.CREATE && ae.getDate().after(query.getLastUsed())) { updatedResults.add(sr); break; } } } if (!updatedResults.isEmpty()) { finalResults.put(clazz, updatedResults); } } return finalResults; } @PostConstruct void initializeSearchService() { try { boolean terracottaEnabled = Settings.getBoolean("gemma.cache.clustered", false); this.childTermCache = CacheUtils.createOrLoadCache(cacheManager, SearchServiceImpl.ONTOLOGY_CHILDREN_CACHE_NAME, terracottaEnabled, SearchServiceImpl.ONTOLOGY_INFO_CACHE_SIZE, false, false, SearchServiceImpl.ONTOLOGY_CACHE_TIME_TO_IDLE, SearchServiceImpl.ONTOLOGY_CACHE_TIME_TO_DIE, false); } catch (CacheException e) { throw new RuntimeException(e); } this.initializeNameToTaxonMap(); } private Collection<SearchResult> filterExperimentHitsByTaxon(Collection<SearchResult> unfilteredResults, Taxon t) { if (t == null || unfilteredResults.isEmpty()) return unfilteredResults; Collection<SearchResult> filteredResults = new HashSet<>(); Collection<Long> eeIds = this.expressionExperimentService .filterByTaxon(EntityUtils.getIds(unfilteredResults), t); for (SearchResult sr : unfilteredResults) { if (eeIds.contains(sr.getId())) { filteredResults.add(sr); } } if (filteredResults.size() < unfilteredResults.size()) { log.info("Filtered for taxon = " + t.getCommonName() + ", removed " + (unfilteredResults.size() - filteredResults.size()) + " results"); } return filteredResults; } /** * Add results. * * @param rawResults To add to * @param newResults To be added */ private void accreteResults(List<SearchResult> rawResults, Collection<SearchResult> newResults) { for (SearchResult sr : newResults) { if (!rawResults.contains(sr)) { /* * We do this because we don't want to clobber results, when the same object comes up more than once in * different searches. */ rawResults.add(sr); } } } /** * Checks whether settings have the search genes flag and does the search if needed. * * @param results the results to which should any new results be accreted. */ private void accreteResultsGenes(List<SearchResult> results, SearchSettings settings, boolean webSpeedSearch) { if (settings.getSearchGenes()) { Collection<SearchResult> genes = this.getGenesFromSettings(settings, webSpeedSearch); this.accreteResults(results, genes); } } /** * Checks settings for all do-search flags, except for gene (see * {@link #accreteResultsGenes(List, SearchSettings, boolean)}), and does the search if needed. * * @param results the results to which should any new results be accreted. * @return same object as given, possibly extended by new items from search. */ private List<SearchResult> accreteResultsOthers(List<SearchResult> results, SearchSettings settings, boolean webSpeedSearch) { if (settings.getSearchExperiments()) { Collection<SearchResult> foundEEs = this.expressionExperimentSearch(settings); results.addAll(foundEEs); } Collection<SearchResult> compositeSequences = null; if (settings.getSearchProbes()) { compositeSequences = this.compositeSequenceSearch(settings); this.accreteResults(results, compositeSequences); } if (settings.getSearchPlatforms()) { Collection<SearchResult> foundADs = this.arrayDesignSearch(settings, compositeSequences); this.accreteResults(results, foundADs); } if (settings.getSearchBioSequences()) { Collection<SearchResult> genes = this.getGenesFromSettings(settings, webSpeedSearch); Collection<SearchResult> bioSequences = this.bioSequenceSearch(settings, genes); this.accreteResults(results, bioSequences); } if (settings.getUseGo()) { Collection<SearchResult> ontologyGenes = this.dbHitsToSearchResult( geneSearchService.getGOGroupGenes(settings.getQuery(), settings.getTaxon()), "From GO group"); this.accreteResults(results, ontologyGenes); } if (settings.getSearchBibrefs()) { Collection<SearchResult> bibliographicReferences = this.compassBibliographicReferenceSearch(settings); this.accreteResults(results, bibliographicReferences); } if (settings.getSearchGeneSets()) { Collection<SearchResult> geneSets = this.geneSetSearch(settings); this.accreteResults(results, geneSets); } if (settings.getSearchExperimentSets()) { Collection<SearchResult> experimentSets = this.experimentSetSearch(settings); this.accreteResults(results, experimentSets); } if (settings.getSearchPhenotypes()) { Collection<SearchResult> phenotypes = this.phenotypeSearch(settings); this.accreteResults(results, phenotypes); } return results; } /** * Convert biomaterial hits into their associated ExpressionExperiments * * @param results will go here * @param biomaterials */ private void addEEByBiomaterials(Collection<SearchResult> results, Map<BioMaterial, SearchResult> biomaterials) { if (biomaterials.size() == 0) { return; } Map<ExpressionExperiment, BioMaterial> ees = expressionExperimentService .findByBioMaterials(biomaterials.keySet()); for (ExpressionExperiment ee : ees.keySet()) { SearchResult searchResult = biomaterials.get(ees.get(ee)); results.add(new SearchResult(ee, searchResult.getScore() * SearchServiceImpl.INDIRECT_DB_HIT_PENALTY, searchResult.getHighlightedText() + " (BioMaterial characteristic)")); } } /** * Convert factorValue hits into their associated ExpressionExperiments * * @param results will go here * @param factorValues */ private void addEEByFactorvalues(Collection<SearchResult> results, Map<FactorValue, SearchResult> factorValues) { if (factorValues.size() == 0) { return; } Map<ExpressionExperiment, FactorValue> ees = expressionExperimentService .findByFactorValues(factorValues.keySet()); for (ExpressionExperiment ee : ees.keySet()) { SearchResult searchResult = factorValues.get(ees.get(ee)); results.add(new SearchResult(ee, searchResult.getScore() * SearchServiceImpl.INDIRECT_DB_HIT_PENALTY, searchResult.getHighlightedText() + " (FactorValue characteristic)")); } } private void addTerms(Taxon taxon, String taxonName) { String[] terms; if (StringUtils.isNotBlank(taxonName)) { terms = taxonName.split("\\s+"); // Only continue for multi-word if (terms.length > 1) { for (String s : terms) { if (!nameToTaxonMap.containsKey(s.trim().toLowerCase())) { nameToTaxonMap.put(s.trim().toLowerCase(), taxon); } } } } } /** * A general search for array designs. * This search does both an database search and a compass search. This is also contains an underlying * {@link CompositeSequence} search, returning the {@link ArrayDesign} collection for the given composite sequence * search string (the returned collection of array designs does not contain duplicates). * * @param probeResults Collection of results from a previous CompositeSequence search. Can be null; otherwise used * to avoid a second search for probes. The array designs for the probes are added to the final * results. */ private Collection<SearchResult> arrayDesignSearch(SearchSettings settings, Collection<SearchResult> probeResults) { StopWatch watch = this.startTiming(); String searchString = settings.getQuery(); Collection<SearchResult> results = new HashSet<>(); ArrayDesign shortNameResult = arrayDesignService.findByShortName(searchString); if (shortNameResult != null) { results.add(new SearchResult(shortNameResult, 1.0)); return results; } Collection<ArrayDesign> nameResult = arrayDesignService.findByName(searchString); if (nameResult != null && !nameResult.isEmpty()) { for (ArrayDesign ad : nameResult) { results.add(new SearchResult(ad, 1.0)); } return results; } BlacklistedEntity b = blackListDao.findByAccession(searchString); if (b != null) { results.add(new SearchResult(b, 1.0, "Blacklisted accessions are not loaded into Gemma")); return results; } Collection<ArrayDesign> altNameResults = arrayDesignService.findByAlternateName(searchString); for (ArrayDesign arrayDesign : altNameResults) { results.add(new SearchResult(arrayDesign, 0.9)); } Collection<ArrayDesign> manufacturerResults = arrayDesignService.findByManufacturer(searchString); for (ArrayDesign arrayDesign : manufacturerResults) { results.add(new SearchResult(arrayDesign, 0.9)); } /* * FIXME: add merged platforms and subsumers */ results.addAll(this.compassArrayDesignSearch(settings)); results.addAll(this.databaseArrayDesignSearch(settings)); Collection<SearchResult> probes; if (probeResults == null) { probes = this.compassCompositeSequenceSearch(settings); } else { probes = probeResults; } for (SearchResult r : probes) { CompositeSequence cs = (CompositeSequence) r.getResultObject(); if (cs.getArrayDesign() == null) // This might happen as compass // might not have indexed the AD // for the CS continue; results.add(r); } watch.stop(); if (watch.getTime() > 1000) SearchServiceImpl.log .info("Array Design search for '" + settings + "' took " + watch.getTime() + " ms"); return results; } /** * @param previousGeneSearchResults Can be null, otherwise used to avoid a second search for genes. The biosequences * for the genes are added to the final results. */ private Collection<SearchResult> bioSequenceSearch(SearchSettings settings, Collection<SearchResult> previousGeneSearchResults) { StopWatch watch = this.startTiming(); Collection<SearchResult> searchResults = new HashSet<>(); searchResults.addAll(this.compassBioSequenceSearch(settings, previousGeneSearchResults)); searchResults.addAll(this.databaseBioSequenceSearch(settings)); watch.stop(); if (watch.getTime() > 1000) SearchServiceImpl.log.info("Biosequence search for '" + settings + "' took " + watch.getTime() + " ms " + searchResults.size() + " results."); return searchResults; } private Collection<SearchResult> characteristicExpressionExperimentSearch(final SearchSettings settings) { Collection<Class<?>> classToSearch = new ArrayList<>(1); // this is a collection because of the API // for characteristicService; could add // findByUri(Class<?>...) // order matters if we hit the limits Queue<Class<?>> orderedClassesToSearch = new LinkedList<>(); orderedClassesToSearch.add(ExpressionExperiment.class); orderedClassesToSearch.add(FactorValue.class); orderedClassesToSearch.add(BioMaterial.class); Collection<SearchResult> results = new HashSet<>(); StopWatch watch = new StopWatch(); watch.start(); while (results.size() < SearchServiceImpl.SUFFICIENT_EXPERIMENT_RESULTS_FROM_CHARACTERISTICS && !orderedClassesToSearch.isEmpty()) { classToSearch.clear(); classToSearch.add(orderedClassesToSearch.poll()); // We handle the OR clauses here. String[] subclauses = settings.getQuery().split(" OR "); for (String subclause : subclauses) { /* * Note that the AND is applied only within one entity type. The fix would be to apply AND at this * level. */ Collection<SearchResult> classResults = this.characteristicSearchWithChildren(classToSearch, subclause); if (!classResults.isEmpty()) { String msg = "Found " + classResults.size() + " " + classToSearch.iterator().next().getSimpleName() + " results from characteristic search."; if (results.size() >= SearchServiceImpl.SUFFICIENT_EXPERIMENT_RESULTS_FROM_CHARACTERISTICS) { msg += " Total found > " + SearchServiceImpl.SUFFICIENT_EXPERIMENT_RESULTS_FROM_CHARACTERISTICS + ", will not search for more entities."; } SearchServiceImpl.log.info(msg); } results.addAll(classResults); } } SearchServiceImpl.log.debug("ExpressionExperiment search: " + settings + " -> " + results.size() + " characteristic hits " + watch.getTime() + " ms"); // Note that if we do this earlier (within each query) the limit SUFFICIENT_EXPERIMENT_RESULTS_FROM_CHARACTERISTICS has // more meaning. We would have to unroll the loop above return filterExperimentHitsByTaxon(results, settings.getTaxon()); } /** * Perform a search on a query - it does not have to be one word, it could be "parkinson's disease" */ private Collection<SearchResult> characteristicSearchTerm(Collection<Class<?>> classes, String query) { if (SearchServiceImpl.log.isDebugEnabled()) SearchServiceImpl.log.debug("Starting search for " + query); StopWatch watch = this.startTiming(); Collection<Characteristic> cs = new HashSet<>(); /* * Find terms that match the query (LARQ), and then use that to identify characteristics that have term * associated */ Collection<OntologyIndividual> individuals = ontologyService.findIndividuals(query); for (Collection<OntologyIndividual> individualbatch : BatchIterator.batches(individuals, 10)) { Collection<String> uris = new HashSet<>(); for (OntologyIndividual individual : individualbatch) { uris.add(individual.getUri()); } Collection<SearchResult> dbhits = this .dbHitsToSearchResult(characteristicService.findByUri(classes, uris), null); for (SearchResult crs : dbhits) { cs.add((Characteristic) crs.getResultObject()); } if (cs.size() >= SearchServiceImpl.MAX_CHARACTERISTIC_SEARCH_RESULTS) { break; } } if (individuals.size() > 0 && watch.getTime() > 1000) { SearchServiceImpl.log.info("Found " + individuals.size() + " characteristics matching '" + query + "' via individuals in " + watch.getTime() + "ms"); } // "free text" searches of characteristics here are not necessary, because we have a free-text index of the characteristics now. /* * Add characteristics that have values matching the query; this pulls in items not associated with ontology * terms (free text). We do this here so we can apply the query logic to the matches. */ // if ( cs.size() < SearchServiceImpl.MAX_CHARACTERISTIC_SEARCH_RESULTS ) { // String dbQueryString = query.replaceAll( "\\*", "" ); // note I changed the order of search operations so // // this might not be wanted. // // Collection<Characteristic> valueMatches; // if ( classes.contains( BioMaterial.class ) ) { // valueMatches = characteristicService.findByValueBMEE( dbQueryString ); // } else { // valueMatches = characteristicService.findByValue( classes, dbQueryString ); // } // // if ( valueMatches != null && !valueMatches.isEmpty() ) { // cs.addAll( valueMatches ); // // if ( watch.getTime() > 1000 ) { // SearchServiceImpl.log // .info( "Found " + valueMatches.size() + " characteristics matching value '" + query // + "' in " + watch.getTime() + "ms" ); // } // watch.reset(); // watch.start(); // } // } // keep looking... if (cs.size() < SearchServiceImpl.MAX_CHARACTERISTIC_SEARCH_RESULTS) { /* * Identify initial set of term matches to the query. */ Collection<OntologyTerm> matchingTerms = ontologyService.findTerms(query); if (watch.getTime() > 1000) { SearchServiceImpl.log.info("Found " + matchingTerms.size() + " ontology classes matching '" + query + "' in " + watch.getTime() + "ms"); } /* * Search for child terms. */ if (!matchingTerms.isEmpty()) { Collection<OntologyTerm> seenTerms = new HashSet<>(); for (OntologyTerm term : matchingTerms) { /* * In this loop, each term is a match directly to our query, and we do a depth-first fetch of the * children. */ String uri = term.getUri(); if (StringUtils.isBlank(uri)) continue; if (seenTerms.contains(term)) continue; int sizeBefore = cs.size(); this.getCharacteristicsAnnotatedToChildren(classes, term, cs, seenTerms); seenTerms.add(term); if (SearchServiceImpl.log.isDebugEnabled() && cs.size() > sizeBefore) { SearchServiceImpl.log.debug( (cs.size() - sizeBefore) + " characteristics matching children term of " + term); } if (cs.size() >= SearchServiceImpl.MAX_CHARACTERISTIC_SEARCH_RESULTS) { break; } } if (watch.getTime() > 1000) { SearchServiceImpl.log.info("Found " + cs.size() + " characteristics for '" + query + "' including child terms in " + watch.getTime() + "ms"); } watch.reset(); watch.start(); } } /* * Retrieve the owner objects FIXME because this can be slow, filter by taxon first if we have a constraint! */ watch.reset(); watch.start(); Collection<SearchResult> matchingEntities = this.getAnnotatedEntities(classes, cs); if (watch.getTime() > 1000) { SearchServiceImpl.log.info("Retrieved " + matchingEntities.size() + " entities via characteristics for '" + query + "' in " + watch.getTime() + "ms"); } if (SearchServiceImpl.log.isDebugEnabled()) SearchServiceImpl.log.debug("End search for " + query); return matchingEntities; } /** * Search for the Experiment query in ontologies, including items that are associated with children of matching * query terms. * That is, 'brain' should return entities tagged as 'hippocampus'. This method will return results only up to * MAX_CHARACTERISTIC_SEARCH_RESULTS. It can handle AND in searches, so Parkinson's AND neuron finds items tagged * with both of those terms. The use of OR is handled by the caller. * * @param classes Classes of characteristic-bound entities. For example, to get matching characteristics of * ExpressionExperiments, pass ExpressionExperiments.class in this collection parameter. * @return SearchResults of CharacteristicObjects. Typically to be useful one needs to retrieve the * 'parents' * (entities which have been 'tagged' with the term) of those Characteristics */ private Collection<SearchResult> characteristicSearchWithChildren(Collection<Class<?>> classes, String query) { StopWatch timer = this.startTiming(); /* * The tricky part here is if the user has entered a boolean query. If they put in Parkinson's disease AND * neuron, * then we want to eventually return entities that are associated with both. We don't expect to find single * characteristics that match both. * * But if they put in Parkinson's disease we don't want to do two queries. */ String[] subparts = query.split(" AND "); // we would have to first deal with the separate queries, and then apply the logic. Collection<SearchResult> allResults = new HashSet<>(); SearchServiceImpl.log .info("Starting characteristic search: " + query + " for type=" + StringUtils.join(classes, ",")); for (String rawTerm : subparts) { String trimmed = StringUtils.strip(rawTerm); if (StringUtils.isBlank(trimmed)) { continue; } Collection<SearchResult> subqueryResults = this.characteristicSearchTerm(classes, trimmed); if (allResults.isEmpty()) { allResults.addAll(subqueryResults); } else { // this is our Intersection operation. allResults.retainAll(subqueryResults); // aggregate the highlighted text. Map<SearchResult, String> highlights = new HashMap<>(); for (SearchResult sqr : subqueryResults) { highlights.put(sqr, sqr.getHighlightedText()); } for (SearchResult ar : allResults) { String k = highlights.get(ar); if (StringUtils.isNotBlank(k)) { String highlightedText = ar.getHighlightedText(); if (StringUtils.isBlank(highlightedText)) { ar.setHighlightedText(k); } else { ar.setHighlightedText(highlightedText + "," + k); } } } } if (timer.getTime() > 1000) { SearchServiceImpl.log.info("Characteristic search for '" + rawTerm + "': " + allResults.size() + " hits retained so far; " + timer.getTime() + "ms"); timer.reset(); timer.start(); } } return allResults; } /** * A Compass search on array designs. * * @return {@link Collection} */ private Collection<SearchResult> compassArrayDesignSearch(SearchSettings settings) { return this.compassSearch(compassArray, settings); } private Collection<SearchResult> compassBibliographicReferenceSearch(SearchSettings settings) { return this.compassSearch(compassBibliographic, settings); } /** * A compass backed search that finds biosequences that match the search string. Searches the gene and probe indexes * for matches then converts those results to biosequences * * @param previousGeneSearchResults Can be null, otherwise used to avoid a second search for genes. The biosequences * for the genes are added to the final results. */ private Collection<SearchResult> compassBioSequenceSearch(SearchSettings settings, Collection<SearchResult> previousGeneSearchResults) { Collection<SearchResult> results = this.compassSearch(compassBiosequence, settings); Collection<SearchResult> geneResults; if (previousGeneSearchResults == null) { SearchServiceImpl.log.info("Biosequence Search: running gene search with " + settings.getQuery()); geneResults = this.compassGeneSearch(settings); } else { SearchServiceImpl.log.info("Biosequence Search: using previous results"); geneResults = previousGeneSearchResults; } Map<Gene, SearchResult> genes = new HashMap<>(); for (SearchResult sr : geneResults) { Object resultObject = sr.getResultObject(); if (Gene.class.isAssignableFrom(resultObject.getClass())) { genes.put((Gene) resultObject, sr); } else { // see bug 1774 -- may not be happening any more. SearchServiceImpl.log.warn( "Expected a Gene, got a " + resultObject.getClass() + " on query=" + settings.getQuery()); } } Map<Gene, Collection<BioSequence>> seqsFromDb = bioSequenceService.findByGenes(genes.keySet()); for (Gene gene : seqsFromDb.keySet()) { List<BioSequence> bs = new ArrayList<>(seqsFromDb.get(gene)); // bioSequenceService.thawRawAndProcessed( bs ); results.addAll(this.dbHitsToSearchResult(bs, genes.get(gene), null)); } return results; } private Collection<SearchResult> compassCompositeSequenceSearch(final SearchSettings settings) { return this.compassSearch(compassProbe, settings); } /** * A compass search on expressionExperiments. The reults are filtered by taxon so that our limits are meaningfully * applied to next stages of the querying. * * @return {@link Collection} */ private Collection<SearchResult> compassExpressionSearch(SearchSettings settings) { Collection<SearchResult> unfilteredResults = this.compassSearch(compassExpression, settings); return filterExperimentHitsByTaxon(unfilteredResults, settings.getTaxon()); } private Collection<SearchResult> compassGeneSearch(final SearchSettings settings) { return this.compassSearch(compassGene, settings); } /** * Generic method for searching Lucene indices for entities (excluding ontology terms, which use the OntologySearch) */ private Collection<SearchResult> compassSearch(Compass bean, final SearchSettings settings) { if (!settings.getUseIndices()) return new HashSet<>(); CompassTemplate template = new CompassTemplate(bean); Collection<SearchResult> searchResults = template.execute(new CompassCallback<Collection<SearchResult>>() { @Override public Collection<SearchResult> doInCompass(CompassSession session) throws CompassException { return SearchServiceImpl.this.performSearch(settings, session); } }); if (SearchServiceImpl.log.isDebugEnabled()) { SearchServiceImpl.log.debug("Compass search via " + bean.getSettings().getSetting("compass.name") + " : " + settings + " -> " + searchResults.size() + " hits"); } return searchResults; } /** * Search by name of the composite sequence as well as gene. */ private Collection<SearchResult> compositeSequenceSearch(SearchSettings settings) { StopWatch watch = this.startTiming(); /* * FIXME: this at least partly ignores any array design that was set as a restriction, especially in a gene * search. */ // Skip compass searching of composite sequences because it only bloats the results. Collection<SearchResult> allResults = new HashSet<>(this.databaseCompositeSequenceSearch(settings)); /* * This last step is needed because the compassSearch for compositeSequences returns bioSequences too. */ Collection<SearchResult> finalResults = new HashSet<>(); for (SearchResult sr : allResults) { if (CompositeSequence.class.isAssignableFrom(sr.getResultClass())) { finalResults.add(sr); } } watch.stop(); if (watch.getTime() > 1000) SearchServiceImpl.log.info("Composite sequence search for '" + settings + "' took " + watch.getTime() + " ms, " + finalResults.size() + " results."); return finalResults; } private List<SearchResult> convertEntitySearchResutsToValueObjectsSearchResults( Collection<SearchResult> searchResults) { List<SearchResult> convertedSearchResults = new ArrayList<>(); for (SearchResult searchResult : searchResults) { // this is a special case ... for some reason. if (BioSequence.class.isAssignableFrom(searchResult.getResultClass())) { SearchResult convertedSearchResult = new SearchResult( BioSequenceValueObject .fromEntity(bioSequenceService.thaw((BioSequence) searchResult.getResultObject())), searchResult.getScore(), searchResult.getHighlightedText()); convertedSearchResults.add(convertedSearchResult); } else { convertedSearchResults.add(searchResult); } } return convertedSearchResults; } /** * Searches the DB for array designs which have composite sequences whose names match the given search string. * Because of the underlying database search, this is acl aware. That is, returned array designs are filtered based * on access control list (ACL) permissions. */ private Collection<SearchResult> databaseArrayDesignSearch(SearchSettings settings) { if (!settings.getUseDatabase()) return new HashSet<>(); StopWatch watch = this.startTiming(); Collection<ArrayDesign> adSet = new HashSet<>(); // search by exact composite sequence name Collection<CompositeSequence> matchedCs = compositeSequenceService.findByName(settings.getQuery()); for (CompositeSequence sequence : matchedCs) { adSet.add(sequence.getArrayDesign()); } watch.stop(); if (watch.getTime() > 1000) SearchServiceImpl.log.info("Array Design Composite Sequence DB search for " + settings + " took " + watch.getTime() + " ms" + " found " + adSet.size() + " Ads"); return this.dbHitsToSearchResult(adSet, null); } /** * A database search for biosequences. Biosequence names are already indexed by compass... */ private Collection<SearchResult> databaseBioSequenceSearch(SearchSettings settings) { if (!settings.getUseDatabase()) return new HashSet<>(); StopWatch watch = this.startTiming(); String searchString = settings.getQuery(); // replace * with % for inexact symbol search String inexactString = searchString; Pattern pattern = Pattern.compile("\\*"); Matcher match = pattern.matcher(inexactString); inexactString = match.replaceAll("%"); Collection<BioSequence> bs = bioSequenceService.findByName(inexactString); // bioSequenceService.thawRawAndProcessed( bs ); Collection<SearchResult> bioSequenceList = new HashSet<>(this.dbHitsToSearchResult(bs, null)); watch.stop(); if (watch.getTime() > 1000) SearchServiceImpl.log.info("BioSequence DB search for " + searchString + " took " + watch.getTime() + " ms and found" + bioSequenceList.size() + " BioSequences"); return bioSequenceList; } /** * Takes a list of ontology terms, and classes of objects of interest to be returned. Looks through the * characteristic table for an exact match with the given ontology terms. Only tries to match the uri's. * * @param classes Class of objects to restrict the search to (typically ExpressionExperiment.class, for * example). * @param terms A list of ontology terms to search for * @return Collection of search results for the objects owning the found characteristics, where the owner is * of * class clazz */ private Collection<SearchResult> databaseCharacteristicExactUriSearchForOwners(Collection<Class<?>> classes, Collection<OntologyTerm> terms) { // Collection<Characteristic> characteristicValueMatches = new ArrayList<Characteristic>(); Collection<Characteristic> characteristicURIMatches = new ArrayList<>(); for (OntologyTerm term : terms) { // characteristicValueMatches.addAll( characteristicService.findByValue( term.getUri() )); characteristicURIMatches.addAll(characteristicService.findByUri(classes, term.getUri())); } Map<Characteristic, Object> parentMap = characteristicService.getParents(classes, characteristicURIMatches); // parentMap.putAll( characteristicService.getParents(characteristicValueMatches ) ); return this.filterCharacteristicOwnersByClass(classes, parentMap); } /** * Search the DB for composite sequences and the genes that are matched to them. */ private Collection<SearchResult> databaseCompositeSequenceSearch(final SearchSettings settings) { if (!settings.getUseDatabase()) return new HashSet<>(); StopWatch watch = this.startTiming(); Set<Gene> geneSet = new HashSet<>(); String searchString = settings.getQuery(); ArrayDesign ad = settings.getPlatformConstraint(); // search by exact composite sequence name Collection<CompositeSequence> matchedCs = new HashSet<>(); if (ad != null) { CompositeSequence cs = compositeSequenceService.findByName(ad, searchString); if (cs != null) matchedCs.add(cs); } else { matchedCs = compositeSequenceService.findByName(searchString); } /* * In case the query _is_ a gene */ Collection<SearchResult> rawGeneResults = this.databaseGeneSearch(settings); for (SearchResult searchResult : rawGeneResults) { Object j = searchResult.getResultObject(); if (Gene.class.isAssignableFrom(j.getClass())) { geneSet.add((Gene) j); } } for (Gene g : geneSet) { if (settings.getPlatformConstraint() != null) { matchedCs.addAll(compositeSequenceService.findByGene(g, settings.getPlatformConstraint())); } else { matchedCs.addAll(compositeSequenceService.findByGene(g)); } } // search by associated genes. for (CompositeSequence sequence : matchedCs) { geneSet.addAll(compositeSequenceService.getGenes(sequence)); } watch.stop(); if (watch.getTime() > 1000) SearchServiceImpl.log.info("Gene composite sequence DB search " + searchString + " took " + watch.getTime() + " ms, " + geneSet.size() + " items."); Collection<SearchResult> results = this.dbHitsToSearchResult(geneSet, null); results.addAll(this.dbHitsToSearchResult(matchedCs, null)); return results; } /** * Does search on exact string by: id, name and short name. This only returns results if these fields match exactly, * but it's fast. * * @return {@link Collection} */ private Collection<SearchResult> databaseExpressionExperimentSearch(final SearchSettings settings) { if (!settings.getUseDatabase()) return new HashSet<>(); StopWatch watch = this.startTiming(); Map<ExpressionExperiment, String> results = new HashMap<>(); String query = StringEscapeUtils.unescapeJava(settings.getQuery()); Collection<ExpressionExperiment> ees = expressionExperimentService.findByName(query); if (!ees.isEmpty()) { for (ExpressionExperiment ee : ees) { results.put(ee, ee.getName()); } } else { ExpressionExperiment ee = expressionExperimentService.findByShortName(query); if (ee != null) { results.put(ee, ee.getShortName()); } else { ees = expressionExperimentService.findByAccession(query); for (ExpressionExperiment e : ees) { results.put(e, e.getId().toString()); } if (results.isEmpty()) { try { // maybe user put in a primary key value. ee = expressionExperimentService.load(new Long(query)); if (ee != null) results.put(ee, ee.getId().toString()); } catch (NumberFormatException e) { // no-op - it's not an ID. } } } } if (settings.getTaxon() != null) { Map<Long, ExpressionExperiment> idMap = EntityUtils.getIdMap(results.keySet()); Collection<Long> retainedIds = expressionExperimentService.filterByTaxon(idMap.keySet(), settings.getTaxon()); for (Long id : idMap.keySet()) { if (!retainedIds.contains(id)) { results.remove(idMap.get(id)); } } } watch.stop(); if (watch.getTime() > 1000) SearchServiceImpl.log.info("DB Expression Experiment search for " + settings + " took " + watch.getTime() + " ms and found " + results.size() + " EEs"); return this.dbHitsToSearchResult(results); } /** * Search the DB for genes that exactly match the given search string searches geneProducts, gene and bioSequence * tables */ private Collection<SearchResult> databaseGeneSearch(SearchSettings settings) { if (!settings.getUseDatabase()) return new HashSet<>(); StopWatch watch = this.startTiming(); String searchString = StringEscapeUtils.unescapeJava(settings.getQuery()); if (StringUtils.isBlank(searchString)) return new HashSet<>(); Collection<SearchResult> results = new HashSet<>(); /* * First search by accession. If we find it, stop. */ Gene result = null; try { result = geneService.findByNCBIId(Integer.parseInt(searchString)); } catch (NumberFormatException e) { // } if (result != null) { results.add(this.dbHitToSearchResult(result)); } else { result = geneService.findByAccession(searchString, null); if (result != null) { results.add(this.dbHitToSearchResult(result)); } } if (results.size() > 0) { this.filterByTaxon(settings, results, true); watch.stop(); if (watch.getTime() > 1000) SearchServiceImpl.log.info("Gene DB search for " + searchString + " took " + watch.getTime() + " ms and found " + results.size() + " genes"); return results; } // replace * at end with % for inexact symbol search String inexactString = searchString; Pattern pattern = Pattern.compile("\\*$"); Matcher match = pattern.matcher(inexactString); inexactString = match.replaceAll("%"); // note that at this point, the inexactString might not have a wildcard - only if the user asked for it. String exactString = inexactString.replaceAll("%", ""); // if the query is shortish, always do a wild card search. This gives better behavior in 'live // search' situations. If we do wildcards on very short queries we get too many results. Collection<Gene> geneSet = new HashSet<>(); if (searchString.length() <= 2) { // case 0: we got no result syet, or user entered a very short string. We search only for exact matches. geneSet.addAll(geneService.findByOfficialSymbolInexact(exactString)); } else if (inexactString.endsWith("%")) { // case 1: user explicitly asked for wildcard. We allow this on strings of length 3 or more. geneSet.addAll(geneService.findByOfficialSymbolInexact(inexactString)); } else if (searchString.length() > 3) { // case 2: user did not ask for a wildcard, but we add it anyway, if the string is 4 or 5 characters. if (!inexactString.endsWith("%")) { inexactString = inexactString + "%"; } geneSet.addAll(geneService.findByOfficialSymbolInexact(inexactString)); } else { // case 3: string is long enough, and user did not ask for wildcard. geneSet.addAll(geneService.findByOfficialSymbol(exactString)); } /* * If we found a match using official symbol or name, don't bother with this */ if (geneSet.isEmpty()) { geneSet.addAll(geneService.findByAlias(exactString)); geneSet.addAll(geneProductService.getGenesByName(exactString)); geneSet.addAll(geneProductService.getGenesByNcbiId(exactString)); geneSet.addAll(bioSequenceService.getGenesByAccession(exactString)); geneSet.addAll(bioSequenceService.getGenesByName(exactString)); geneSet.add(geneService.findByEnsemblId(exactString)); } watch.stop(); if (watch.getTime() > 1000) SearchServiceImpl.log.info("Gene DB search for " + searchString + " took " + watch.getTime() + " ms and found " + geneSet.size() + " genes"); results = this.dbHitsToSearchResult(geneSet, null); this.filterByTaxon(settings, results, true); return results; } /** * Convert hits from database searches into SearchResults. * * @param compassHitDerivedFrom SearchResult that these entities were derived from. For example, if you * compass-searched for genes, and then used the genes to get sequences from the * database, the gene is * compassHitsDerivedFrom. If null, we treat this as a direct hit. */ private List<SearchResult> dbHitsToSearchResult(Collection<?> entities, SearchResult compassHitDerivedFrom, String matchText) { StopWatch timer = this.startTiming(); List<SearchResult> results = new ArrayList<>(); for (Object e : entities) { if (e == null) { if (SearchServiceImpl.log.isDebugEnabled()) SearchServiceImpl.log.debug("Null search result object"); continue; } SearchResult esr = this.dbHitToSearchResult(compassHitDerivedFrom, e, matchText); results.add(esr); } if (timer.getTime() > 1000) { SearchServiceImpl.log.info("Unpack " + results.size() + " search resultsS: " + timer.getTime() + "ms"); } return results; } /** * Convert hits from database searches into SearchResults. */ private Collection<SearchResult> dbHitsToSearchResult(Collection<?> entities, String matchText) { return this.dbHitsToSearchResult(entities, null, matchText); } /** * Convert hits from database searches into SearchResults. */ private List<SearchResult> dbHitsToSearchResult(Map<?, String> entities) { List<SearchResult> results = new ArrayList<>(); for (Object e : entities.keySet()) { SearchResult esr = this.dbHitToSearchResult(null, e, entities.get(e)); results.add(esr); } return results; } private SearchResult dbHitToSearchResult(Object e) { return this.dbHitToSearchResult(null, e, null); } /** * @param text that matched the query (for highlighting) */ private SearchResult dbHitToSearchResult(SearchResult compassHitDerivedFrom, Object e, String text) { SearchResult esr; if (compassHitDerivedFrom != null && text == null) { esr = new SearchResult(e, compassHitDerivedFrom.getScore() * SearchServiceImpl.INDIRECT_DB_HIT_PENALTY); esr.setHighlightedText(compassHitDerivedFrom.getHighlightedText()); } else { esr = new SearchResult(e, 1.0, text); } return esr; } private void debugParentFetch(Map<Characteristic, Object> parentMap) { /* * This is purely debugging. */ if (parentMap.size() > 0) { if (SearchServiceImpl.log.isDebugEnabled()) SearchServiceImpl.log.debug("Found " + parentMap.size() + " owners for " + parentMap.keySet().size() + " characteristics:"); } } private Collection<SearchResult> experimentSetSearch(SearchSettings settings) { Collection<SearchResult> results = this .dbHitsToSearchResult(this.experimentSetService.findByName(settings.getQuery()), null); results.addAll(this.compassSearch(compassExperimentSet, settings)); return results; } /** * A general search for expression experiments. This search does both an database search and a compass search. * A problem with this is that we cap the number of results that can be returned. This could be a limitation for * applications like building data set groups. Thus MAX_CHARACTERISTIC_SEARCH_RESULTS should not be too low. * * @return {@link Collection} */ private Collection<SearchResult> expressionExperimentSearch(final SearchSettings settings) { StopWatch watch = this.startTiming(); SearchServiceImpl.log.info("Starting search for '" + settings + "'"); Collection<SearchResult> results = new HashSet<>(); // searches for GEO names, etc - "exact" matches. if (settings.getUseDatabase()) { results.addAll(this.databaseExpressionExperimentSearch(settings)); if (watch.getTime() > 1000) SearchServiceImpl.log.info("Expression Experiment database search for '" + settings + "' took " + watch.getTime() + " ms, " + results.size() + " hits."); /* * If we get results here, probably we want to just stop immediately, because the user is searching for * something exact. */ if (!results.isEmpty()) { return results; } BlacklistedEntity b = blackListDao.findByAccession(settings.getQuery()); if (b != null) { results.add(new SearchResult(b, 1.0, "Blacklisted accessions are not loaded into Gemma")); return results; } watch.reset(); watch.start(); } // fancy search that uses ontologies to infer related terms if (settings.getUseCharacteristics()) { results.addAll(this.characteristicExpressionExperimentSearch(settings)); if (watch.getTime() > 1000) SearchServiceImpl.log.info("Expression Experiment ontology search for '" + settings + "' took " + watch.getTime() + " ms, " + results.size() + " hits."); watch.reset(); watch.start(); } // searches for strings in associated text including factorvalues and biomaterials, this is faster // we have toyed with having this be done before the characteristic search. if (settings.getUseIndices() && results.size() < SUFFICIENT_EXPERIMENT_RESULTS_FROM_CHARACTERISTICS) { results.addAll(this.compassExpressionSearch(settings)); if (watch.getTime() > 1000) SearchServiceImpl.log.info("Expression Experiment index search for '" + settings + "' took " + watch.getTime() + " ms, " + results.size() + " hits."); watch.reset(); watch.start(); } // if we still didn't find anything, keep looking if (results.size() == 0) { /* * Search for bib refs FIXME does this do anything, since we index the bibrefs associated with experiments * directly? */ List<BibliographicReferenceValueObject> bibrefs = bibliographicReferenceService .search(settings.getQuery()); if (!bibrefs.isEmpty()) { Collection<BibliographicReference> refs = new HashSet<>(); Collection<SearchResult> r = this.compassBibliographicReferenceSearch(settings); for (SearchResult searchResult : r) { refs.add((BibliographicReference) searchResult.getResultObject()); } Map<BibliographicReference, Collection<ExpressionExperiment>> relatedExperiments = this.bibliographicReferenceService .getRelatedExperiments(refs); for (Entry<BibliographicReference, Collection<ExpressionExperiment>> e : relatedExperiments .entrySet()) { results.addAll(this.dbHitsToSearchResult(e.getValue(), null)); } if (watch.getTime() > 1000) SearchServiceImpl.log.info("Expression Experiment publication search for '" + settings + "' took " + watch.getTime() + " ms, " + results.size() + " hits."); watch.reset(); watch.start(); } } /* * Find data sets that match a platform. This will probably only be trigged if the search is for a GPL id. */ if (results.size() == 0) { Collection<SearchResult> matchingPlatforms = this.arrayDesignSearch(settings, null); for (SearchResult adRes : matchingPlatforms) { if (adRes.getResultObject() instanceof ArrayDesign) { ArrayDesign ad = (ArrayDesign) adRes.getResultObject(); Collection<ExpressionExperiment> expressionExperiments = this.arrayDesignService .getExpressionExperiments(ad); if (expressionExperiments.size() > 0) results.addAll(this.dbHitsToSearchResult(expressionExperiments, ad.getShortName() + " - " + ad.getName())); } } if (watch.getTime() > 1000) SearchServiceImpl.log.info("Expression Experiment platform search for '" + settings + "' took " + watch.getTime() + " ms, " + results.size() + " hits."); if (!results.isEmpty()) { return results; } watch.reset(); watch.start(); } watch.stop(); if (watch.getTime() > 1000) SearchServiceImpl.log.info("Expression Experiment search for '" + settings + "' took " + watch.getTime() + " ms, " + results.size() + " hits."); return results; } /** * FIXME this comes too late in the process to be effective - for queries that may retrieve many results, we have to * filter as we go. * * @param excludeWithoutTaxon if true: If the SearchResults have no "getTaxon" method then the results will get * filtered out Results with no taxon associated will also get removed. */ private void filterByTaxon(SearchSettings settings, Collection<SearchResult> results, boolean excludeWithoutTaxon) { if (settings.getTaxon() == null) { return; } Collection<SearchResult> toRemove = new HashSet<>(); Taxon t = settings.getTaxon(); if (results == null) return; for (SearchResult sr : results) { Object o = sr.getResultObject(); try { Taxon currentTaxon; if (o instanceof ExpressionExperiment) { ExpressionExperiment ee = (ExpressionExperiment) o; currentTaxon = expressionExperimentService.getTaxon(ee); } else if (o instanceof ExpressionExperimentSet) { ExpressionExperimentSet ees = (ExpressionExperimentSet) o; currentTaxon = ees.getTaxon(); } else if (o instanceof Gene) { Gene gene = (Gene) o; currentTaxon = gene.getTaxon(); } else if (o instanceof GeneSet) { GeneSet geneSet = (GeneSet) o; currentTaxon = geneSetService.getTaxon(geneSet); } else if (o instanceof CharacteristicValueObject) { CharacteristicValueObject charVO = (CharacteristicValueObject) o; currentTaxon = taxonDao.findByCommonName(charVO.getTaxon()); } else { Method m = o.getClass().getMethod("getTaxon"); currentTaxon = (Taxon) m.invoke(o); } if (currentTaxon == null || !currentTaxon.getId().equals(t.getId())) { if (currentTaxon == null) { // Sanity check for bad data in db (could happen if EE has no samples). Can happen that // searchResults have a vaild getTaxon method // but the method returns null (shouldn't make it this far) SearchServiceImpl.log.debug("Object has getTaxon method but it returns null. Obj is: " + o); } toRemove.add(sr); } } catch (SecurityException | IllegalArgumentException | InvocationTargetException | IllegalAccessException e) { throw new RuntimeException(e); } catch (NoSuchMethodException e) { /* * In case of a programming error where the results don't have a taxon at all, we assume we should * filter them out but issue a warning. */ if (excludeWithoutTaxon) { toRemove.add(sr); SearchServiceImpl.log.warn("No getTaxon method for: " + o.getClass() + ". Filtering from results. Error was: " + e); } } } results.removeAll(toRemove); } /** * Only used for experiment searches. * * @param classes * @param characteristic2entity * @return */ private Collection<SearchResult> filterCharacteristicOwnersByClass(Collection<Class<?>> classes, Map<Characteristic, Object> characteristic2entity) { Map<BioMaterial, SearchResult> biomaterials = new HashMap<>(); Map<FactorValue, SearchResult> factorValues = new HashMap<>(); Collection<SearchResult> results = new HashSet<>(); for (Characteristic c : characteristic2entity.keySet()) { Object o = characteristic2entity.get(c); for (Class<?> clazz : classes) { if (clazz.isAssignableFrom(o.getClass())) { String matchedText; if (c.getValueUri() != null) { matchedText = "Tagged term: <a href=\"" + Settings.getRootContext() + "/searcher.html?query=" + c.getValueUri() + "\">" + c.getValue() + "</a>"; } else { matchedText = "Free text: " + c.getValue(); } if (o instanceof BioMaterial) { biomaterials.put((BioMaterial) o, new SearchResult(o, 1.0, matchedText)); } else if (o instanceof FactorValue) { factorValues.put((FactorValue) o, new SearchResult(o, 1.0, matchedText)); } else if (o instanceof ExpressionExperiment) { results.add(new SearchResult(o, 1.0, matchedText)); } else { throw new IllegalStateException(); } } } } this.addEEByFactorvalues(results, factorValues); this.addEEByBiomaterials(results, biomaterials); return results; } /** * Makes no attempt at resolving the search query as a URI. Will tokenize the search query if there are control * characters in the String. URI's will get parsed into multiple query terms and lead to bad results. * * @param settings Will try to resolve general terms like brain --> to appropriate OntologyTerms and search * for * objects tagged with those terms (if isUseCharacte = true) * @param fillObjects If false, the entities will not be filled in inside the searchsettings; instead, they will * be * nulled (for security purposes). You can then use the id and Class stored in the * SearchSettings to load the * entities at your leisure. If true, the entities are loaded in the usual secure fashion. * Setting this to * false can be an optimization if all you need is the id. Note: filtering by taxon will not * be done unless * objects are filled * @param webSpeedSearch if true, this call is probably coming from a web app combo box and results will be limited * to improve speed */ private Map<Class<?>, List<SearchResult>> generalSearch(SearchSettings settings, boolean fillObjects, boolean webSpeedSearch) { settings = SearchSettingsStringUtils.processSettings(settings, this.nameToTaxonMap); List<SearchResult> rawResults = new ArrayList<>(); // do gene first first before we munge the query too much. this.accreteResultsGenes(rawResults, settings, webSpeedSearch); // some strings of size 1 cause lucene to barf and they were slipping through in multi-term queries, get rid of // them settings.setQuery(SearchSettingsStringUtils.stripShortTerms(settings.getQuery())); // If nothing to search return nothing. if (StringUtils.isBlank(settings.getQuery())) { return new HashMap<>(); } rawResults = this.accreteResultsOthers(rawResults, settings, webSpeedSearch); Map<Class<?>, List<SearchResult>> sortedLimitedResults = this.getSortedLimitedResults(settings, rawResults, fillObjects); SearchServiceImpl.log.info("search for: " + settings.getQuery() + " yielded " + rawResults.size() + " raw results (final tally may be filtered)"); return sortedLimitedResults; } /** * Combines compass style search, the db style search, and the compositeSequence search and returns 1 combined list * with no duplicates. * * @param returnOnDbHit if true and if there is a match for a gene from the database, return immediately - much * faster */ private Collection<SearchResult> geneSearch(final SearchSettings settings, boolean returnOnDbHit) { StopWatch watch = this.startTiming(); String searchString = settings.getQuery(); Collection<SearchResult> geneDbList = this.databaseGeneSearch(settings); if (returnOnDbHit && geneDbList.size() > 0) { return geneDbList; } Set<SearchResult> combinedGeneList = new HashSet<>(geneDbList); Collection<SearchResult> geneCompassList = this.compassGeneSearch(settings); combinedGeneList.addAll(geneCompassList); if (combinedGeneList.isEmpty()) { Collection<SearchResult> geneCsList = this.databaseCompositeSequenceSearch(settings); for (SearchResult res : geneCsList) { if (res.getResultClass().isAssignableFrom(Gene.class)) combinedGeneList.add(res); } } /* * Possibly search for genes linked via a phenotype, but only if we don't have anything here. * */ if (combinedGeneList.isEmpty()) { Collection<CharacteristicValueObject> phenotypeTermHits = this.phenotypeAssociationManagerService .searchInDatabaseForPhenotype(settings.getQuery()); for (CharacteristicValueObject phenotype : phenotypeTermHits) { Set<String> phenotypeUris = new HashSet<>(); phenotypeUris.add(phenotype.getValueUri()); // DATABASE HIT! Collection<GeneEvidenceValueObject> phenotypeGenes = phenotypeAssociationManagerService .findCandidateGenes(phenotypeUris, settings.getTaxon()); if (!phenotypeGenes.isEmpty()) { SearchServiceImpl.log.info(phenotypeGenes.size() + " genes associated with " + phenotype + " (via query='" + settings.getQuery() + "')"); for (GeneEvidenceValueObject gvo : phenotypeGenes) { Gene g = Gene.Factory.newInstance(); g.setId(gvo.getId()); g.setTaxon(settings.getTaxon()); SearchResult sr = new SearchResult(g); sr.setHighlightedText(phenotype.getValue() + " (" + phenotype.getValueUri() + ")"); // if ( gvo.getScore() != null ) { // TODO If we get evidence quality, use that in the score. // } sr.setScore(1.0); // maybe lower, if we do this search when combinedGeneList is nonempty. combinedGeneList.add(sr); } if (combinedGeneList.size() > 100 /* some limit */ ) { break; } } } } if (watch.getTime() > 1000) SearchServiceImpl.log.info("Gene search for " + searchString + " took " + watch.getTime() + " ms; " + combinedGeneList.size() + " results."); return combinedGeneList; } private Collection<SearchResult> geneSetSearch(SearchSettings settings) { Collection<SearchResult> hits; if (settings.getTaxon() != null) { hits = this.dbHitsToSearchResult( this.geneSetService.findByName(settings.getQuery(), settings.getTaxon()), null); } else { hits = this.dbHitsToSearchResult(this.geneSetService.findByName(settings.getQuery()), null); } hits.addAll(this.compassSearch(compassGeneSet, settings)); return hits; } /** * Given classes to search and characteristics, * * @param classes Which classes of entities to look for */ private Collection<SearchResult> getAnnotatedEntities(Collection<Class<?>> classes, Collection<Characteristic> cs) { // FIXME time-critical: this can be slow if we get a lot of biomaterial hits Map<Characteristic, Object> characteristic2entity = characteristicService.getParents(classes, cs); Collection<SearchResult> matchedEntities = this.filterCharacteristicOwnersByClass(classes, characteristic2entity); if (SearchServiceImpl.log.isDebugEnabled()) { this.debugParentFetch(characteristic2entity); } return matchedEntities; } /** * Recursively */ private void getCharacteristicsAnnotatedToChildren(Collection<Class<?>> classes, OntologyTerm term, Collection<Characteristic> results, Collection<OntologyTerm> seenTerms) { Collection<OntologyTerm> children = this.getDirectChildTerms(term); /* * Find occurrences of these terms in our system. This is fast, so long as there aren't too many. */ if (!children.isEmpty()) { Collection<String> uris = new ArrayList<>(); for (OntologyTerm ontologyTerm : children) { if (ontologyTerm.getUri() == null) continue; if (seenTerms.contains(ontologyTerm)) continue; uris.add(ontologyTerm.getUri()); seenTerms.add(ontologyTerm); } if (!uris.isEmpty()) { Collection<SearchResult> dbhits = this .dbHitsToSearchResult(characteristicService.findByUri(classes, uris), null); for (SearchResult crs : dbhits) { results.add((Characteristic) crs.getResultObject()); } } } if (results.size() >= SearchServiceImpl.MAX_CHARACTERISTIC_SEARCH_RESULTS) { return; } for (OntologyTerm child : children) { this.getCharacteristicsAnnotatedToChildren(classes, child, results, seenTerms); } } /** * Returns children one step down. getChildren can be very slow for 'high-level' classes like "neoplasm", so we use * a cache. * * @param term starting point */ @SuppressWarnings("unchecked") private Collection<OntologyTerm> getDirectChildTerms(OntologyTerm term) { String uri = term.getUri(); Collection<OntologyTerm> children = null; if (StringUtils.isBlank(uri)) { // shouldn't happen, but just in case SearchServiceImpl.log.warn("Blank uri for " + term); return new HashSet<>(); } Element cachedChildren = this.childTermCache.get(uri); if (cachedChildren == null) { try { children = term.getChildren(true); childTermCache.put(new Element(uri, children)); } catch (com.hp.hpl.jena.ontology.ConversionException ce) { SearchServiceImpl.log.warn("getting children for term: " + term + " caused com.hp.hpl.jena.ontology.ConversionException. " + ce.getMessage()); } } else { children = (Collection<OntologyTerm>) cachedChildren.getObjectValue(); } return children; } /** * @return a collection of SearchResults holding all the genes resulting from the search with given SearchSettings. */ private Collection<SearchResult> getGenesFromSettings(SearchSettings settings, boolean webSpeedSearch) { Collection<SearchResult> genes = null; if (settings.getSearchGenes()) { genes = this.geneSearch(settings, webSpeedSearch); } return genes; } private void getHighlightedText(CompassHits hits, int i, SearchResult r) { CompassHighlightedText highlightedText = hits.highlightedText(i); if (highlightedText != null && highlightedText.getHighlightedText() != null) { r.setHighlightedText(highlightedText.getHighlightedText()); } else { r.setHighlightedText("[Matching text not available]"); } } /** * @return List of ids for the entities held by the search results. */ private List<Long> getIds(List<SearchResult> searchResults) { List<Long> list = new ArrayList<>(); for (SearchResult r : searchResults) { list.add(r.getId()); } assert list.size() == searchResults.size(); return list; } private Collection<SearchResult> getSearchResults(CompassHits hits) { StopWatch timer = new StopWatch(); timer.start(); Collection<SearchResult> results = new HashSet<>(); /* * Note that hits come in decreasing score order. */ for (int i = 0, len = Math.min(SearchServiceImpl.MAX_LUCENE_HITS, hits.getLength()); i < len; i++) { SearchResult r = new SearchResult(hits.data(i)); // FIXME: score is generally (always?) NaN double score = hits.score(i); if (Double.isNaN(score)) { score = 1.0; } /* * Always give compass hits a lower score so they can be differentiated from exact database hits. */ r.setScore(score * SearchServiceImpl.COMPASS_HIT_SCORE_PENALTY_FACTOR); this.getHighlightedText(hits, i, r); if (SearchServiceImpl.log.isDebugEnabled()) SearchServiceImpl.log.debug(i + " " + hits.score(i) + " " + r); results.add(r); } if (timer.getTime() > 100) { SearchServiceImpl.log.info(results.size() + " hits retrieved (out of " + Math.min(SearchServiceImpl.MAX_LUCENE_HITS, hits.getLength()) + " raw hits tested) in " + timer.getTime() + "ms"); } if (timer.getTime() > 5000) { SearchServiceImpl.log.info("****Extremely long Lucene Search processing! " + results.size() + " hits retrieved (out of " + Math.min(SearchServiceImpl.MAX_LUCENE_HITS, hits.getLength()) + " raw hits tested) in " + timer.getTime() + "ms"); } return results; } private Map<Class<?>, List<SearchResult>> getSortedLimitedResults(SearchSettings settings, List<SearchResult> rawResults, boolean fillObjects) { Map<Class<?>, List<SearchResult>> results = new HashMap<>(); Collections.sort(rawResults); results.put(ArrayDesign.class, new ArrayList<SearchResult>()); results.put(BioSequence.class, new ArrayList<SearchResult>()); results.put(BibliographicReference.class, new ArrayList<SearchResult>()); results.put(CompositeSequence.class, new ArrayList<SearchResult>()); results.put(ExpressionExperiment.class, new ArrayList<SearchResult>()); results.put(Gene.class, new ArrayList<SearchResult>()); results.put(GeneSet.class, new ArrayList<SearchResult>()); results.put(ExpressionExperimentSet.class, new ArrayList<SearchResult>()); results.put(Characteristic.class, new ArrayList<SearchResult>()); results.put(CharacteristicValueObject.class, new ArrayList<SearchResult>()); results.put(BlacklistedExperiment.class, new ArrayList<SearchResult>()); results.put(BlacklistedPlatform.class, new ArrayList<SearchResult>()); /* * Get the top N results for each class. */ for (SearchResult sr : rawResults) { Class<?> resultClass = sr.getResultClass(); List<SearchResult> resultsForClass = results.get(resultClass); if (resultsForClass != null && resultsForClass.size() < settings.getMaxResults()) { resultsForClass.add(sr); } } if (fillObjects) { /* * Now retrieve the entities and put them in the SearchResult. Entities that are filtered out by the * SecurityInterceptor will be removed at this stage. */ for (Class<?> clazz : results.keySet()) { List<SearchResult> r = results.get(clazz); if (r.isEmpty()) continue; Map<Long, SearchResult> rMap = new HashMap<>(); for (SearchResult searchResult : r) { if (!rMap.containsKey(searchResult.getId()) || (rMap.get(searchResult.getId()).getScore() < searchResult.getScore())) { rMap.put(searchResult.getId(), searchResult); } } Collection<?> entities = this.retrieveResultEntities(clazz, r); List<SearchResult> filteredResults = new ArrayList<>(); for (Object entity : entities) { Long id = EntityUtils.getId(entity); SearchResult keeper = rMap.get(id); keeper.setResultObject(entity); filteredResults.add(keeper); } this.filterByTaxon(settings, filteredResults, false); results.put(clazz, filteredResults); } } else { for (SearchResult sr : rawResults) { sr.setResultObject(null); } } List<SearchResult> convertedResults = this .convertEntitySearchResutsToValueObjectsSearchResults(results.get(BioSequence.class)); results.put(BioSequenceValueObject.class, convertedResults); results.remove(BioSequence.class); return results; } private void initializeNameToTaxonMap() { Collection<? extends Taxon> taxonCollection = taxonDao.loadAll(); for (Taxon taxon : taxonCollection) { if (taxon.getScientificName() != null) nameToTaxonMap.put(taxon.getScientificName().trim().toLowerCase(), taxon); if (taxon.getCommonName() != null) nameToTaxonMap.put(taxon.getCommonName().trim().toLowerCase(), taxon); } // Loop through again breaking up multi-word taxon database names. // Doing this is a separate loop so that these names take lower precedence when matching than the full terms in // the generated keySet. for (Taxon taxon : taxonCollection) { this.addTerms(taxon, taxon.getCommonName()); this.addTerms(taxon, taxon.getScientificName()); } } /** * @return results, if the settings.termUri is populated. This includes gene uris. */ private Map<Class<?>, List<SearchResult>> ontologyUriSearch(SearchSettings settings) { Map<Class<?>, List<SearchResult>> results = new HashMap<>(); // 1st check to see if the query is a URI (from an ontology). // Do this by seeing if we can find it in the loaded ontologies. // Escape with general utilities because might not be doing a lucene backed search. (just a hibernate one). String termUri = settings.getTermUri(); if (StringUtils.isBlank(termUri)) { termUri = settings.getQuery(); } if (!termUri.startsWith("http://")) { return results; } OntologyTerm matchingTerm; String uriString; uriString = StringEscapeUtils.escapeJava(StringUtils.strip(termUri)); if (StringUtils.containsIgnoreCase(uriString, SearchServiceImpl.NCBI_GENE)) { // Perhaps is a valid gene URL. Want to search for the gene in gemma. // 1st get objects tagged with the given gene identifier Collection<Class<?>> classesToFilterOn = new HashSet<>(); classesToFilterOn.add(ExpressionExperiment.class); Collection<Characteristic> foundCharacteristics = characteristicService.findByUri(classesToFilterOn, uriString); Map<Characteristic, Object> parentMap = characteristicService.getParents(classesToFilterOn, foundCharacteristics); Collection<SearchResult> characteristicOwnerResults = this .filterCharacteristicOwnersByClass(classesToFilterOn, parentMap); if (!characteristicOwnerResults.isEmpty()) { results.put(ExpressionExperiment.class, new ArrayList<SearchResult>()); results.get(ExpressionExperiment.class).addAll(characteristicOwnerResults); } if (settings.getSearchGenes()) { // Get the gene String ncbiAccessionFromUri = StringUtils.substringAfterLast(uriString, "/"); Gene g = null; try { g = geneService.findByNCBIId(Integer.parseInt(ncbiAccessionFromUri)); } catch (NumberFormatException e) { // ok } if (g != null) { results.put(Gene.class, new ArrayList<SearchResult>()); results.get(Gene.class).add(new SearchResult(g)); } } return results; } /* * Not searching for a gene. */ Collection<SearchResult> matchingResults; Collection<Class<?>> classesToSearch = new HashSet<>(); if (settings.getSearchExperiments()) { classesToSearch.add(ExpressionExperiment.class); // not sure ... classesToSearch.add(BioMaterial.class); classesToSearch.add(FactorValue.class); } // this doesn't seem to be implemented yet, LiteratureEvidence and GenericEvidence aren't handled in the // fillValueObjects method downstream /* * if ( settings.getSearchPhenotypes() ) { classesToSearch.add( PhenotypeAssociation.class ); } */ matchingTerm = this.ontologyService.getTerm(uriString); if (matchingTerm == null || matchingTerm.getUri() == null) { /* * Maybe the ontology isn't loaded. Look anyway. */ Map<Characteristic, Object> parentMap = characteristicService.getParents(classesToSearch, characteristicService.findByUri(classesToSearch, uriString)); matchingResults = this.filterCharacteristicOwnersByClass(classesToSearch, parentMap); } else { SearchServiceImpl.log.info("Found ontology term: " + matchingTerm); // Was a URI from a loaded ontology soo get the children. Collection<OntologyTerm> terms2Search4 = matchingTerm.getChildren(true); terms2Search4.add(matchingTerm); matchingResults = this.databaseCharacteristicExactUriSearchForOwners(classesToSearch, terms2Search4); } for (SearchResult searchR : matchingResults) { if (results.containsKey(searchR.getResultClass())) { results.get(searchR.getResultClass()).add(searchR); } else { List<SearchResult> rs = new ArrayList<>(); rs.add(searchR); results.put(searchR.getResultClass(), rs); } } return results; } /** * Runs inside Compass transaction */ private Collection<SearchResult> performSearch(SearchSettings settings, CompassSession session) { StopWatch watch = this.startTiming(); String enhancedQuery = settings.getQuery().trim(); if (StringUtils.isBlank(enhancedQuery) || enhancedQuery.length() < SearchServiceImpl.MINIMUM_STRING_LENGTH_FOR_FREE_TEXT_SEARCH || enhancedQuery.equals("*")) return new ArrayList<>(); CompassQuery compassQuery = session.queryBuilder().queryString(enhancedQuery).toQuery(); SearchServiceImpl.log.debug("Parsed query: " + compassQuery); CompassHits hits = compassQuery.hits(); // highlighting. if (((SearchSettingsImpl) settings).getDoHighlighting()) { if (session instanceof InternalCompassSession) { // always ... CompassMapping mapping = ((InternalCompassSession) session).getMapping(); ResourceMapping[] rootMappings = mapping.getRootMappings(); // should only be one rootMapping. this.process(rootMappings, hits); } else { //? } } watch.stop(); if (watch.getTime() > 100) { SearchServiceImpl.log.info("Getting " + hits.getLength() + " lucene hits for " + enhancedQuery + " took " + watch.getTime() + " ms"); } if (watch.getTime() > 5000) { SearchServiceImpl.log.info("***** Slow Lucene Index Search! " + hits.getLength() + " lucene hits for " + enhancedQuery + " took " + watch.getTime() + " ms"); } return this.getSearchResults(hits); } /** * Find phenotypes. */ private Collection<SearchResult> phenotypeSearch(SearchSettings settings) { return this.dbHitsToSearchResult( this.phenotypeAssociationManagerService.searchInDatabaseForPhenotype(settings.getQuery()), null); } /** * Recursively cache the highlighted text. This must be done during the search transaction. * * @param givenMappings on first call, the root mapping(s) */ private void process(ResourceMapping[] givenMappings, CompassHits hits) { for (ResourceMapping resourceMapping : givenMappings) { Iterator<Mapping> mappings = resourceMapping.mappingsIt(); // one for each property. for (; mappings.hasNext();) { Mapping m = mappings.next(); if (m instanceof ComponentMapping) { ClassMapping[] refClassMappings = ((ComponentMapping) m).getRefClassMappings(); this.process(refClassMappings, hits); } else { // should be a ClassPropertyMapping String name = m.getName(); for (int i = 0; i < hits.getLength(); i++) { try { String frag = hits.highlighter(i).fragment(name); if (log.isDebugEnabled()) log.debug("Highlighted fragment: " + frag + " for " + hits.hit(i)); } catch (Exception e) { break; // skip this property entirely for all hits ... } } } } } } /** * Retrieve entities from the persistent store. */ private Collection<?> retrieveResultEntities(Class<?> entityClass, List<SearchResult> results) { List<Long> ids = this.getIds(results); if (ExpressionExperiment.class.isAssignableFrom(entityClass)) { return expressionExperimentService.load(ids); } else if (ArrayDesign.class.isAssignableFrom(entityClass)) { return arrayDesignService.load(ids); } else if (CompositeSequence.class.isAssignableFrom(entityClass)) { return compositeSequenceService.load(ids); } else if (BibliographicReference.class.isAssignableFrom(entityClass)) { return bibliographicReferenceService.load(ids); } else if (Gene.class.isAssignableFrom(entityClass)) { return geneService.load(ids); } else if (BioSequence.class.isAssignableFrom(entityClass)) { return bioSequenceService.load(ids); } else if (GeneSet.class.isAssignableFrom(entityClass)) { return geneSetService.load(ids); } else if (ExpressionExperimentSet.class.isAssignableFrom(entityClass)) { return experimentSetService.load(ids); } else if (Characteristic.class.isAssignableFrom(entityClass)) { Collection<Characteristic> chars = new ArrayList<>(); for (Long id : ids) { chars.add(characteristicService.load(id)); } return chars; } else if (CharacteristicValueObject.class.isAssignableFrom(entityClass)) { // TEMP HACK this whole method should not be needed in many cases Collection<CharacteristicValueObject> chars = new ArrayList<>(); for (SearchResult result : results) { if (result.getResultClass().isAssignableFrom(CharacteristicValueObject.class)) { chars.add((CharacteristicValueObject) result.getResultObject()); } } return chars; } else if (ExpressionExperimentSet.class.isAssignableFrom(entityClass)) { return experimentSetService.load(ids); } else if (BlacklistedEntity.class.isAssignableFrom(entityClass)) { return blackListDao.load(ids); } else { throw new UnsupportedOperationException("Don't know how to retrieve objects for class=" + entityClass); } } private StopWatch startTiming() { StopWatch watch = new StopWatch(); watch.start(); return watch; } }