Java tutorial
/* * The Gemma project * * Copyright (c) 2007 University of British Columbia * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package ubic.gemma.core.ontology; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.StopWatch; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.compass.core.util.concurrent.ConcurrentHashSet; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import ubic.basecode.ontology.model.OntologyIndividual; import ubic.basecode.ontology.model.OntologyResource; import ubic.basecode.ontology.model.OntologyTerm; import ubic.basecode.ontology.model.OntologyTermSimple; import ubic.basecode.ontology.providers.*; import ubic.basecode.ontology.search.OntologySearch; import ubic.basecode.util.Configuration; import ubic.gemma.core.ontology.providers.GemmaOntologyService; import ubic.gemma.core.ontology.providers.GeneOntologyService; import ubic.gemma.core.search.SearchResult; import ubic.gemma.core.search.SearchService; import ubic.gemma.model.association.GOEvidenceCode; import ubic.gemma.model.common.description.Characteristic; import ubic.gemma.model.common.search.SearchSettings; import ubic.gemma.model.expression.biomaterial.BioMaterial; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.model.genome.Gene; import ubic.gemma.model.genome.Taxon; import ubic.gemma.model.genome.gene.phenotype.valueObject.CharacteristicValueObject; import ubic.gemma.persistence.service.common.description.CharacteristicService; import ubic.gemma.persistence.service.expression.biomaterial.BioMaterialService; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.URL; import java.util.*; /** * Has a static method for finding out which ontologies are loaded into the system and a general purpose find method * that delegates to the many ontology services. NOTE: Logging messages from this service are important for tracking * changes to annotations. * * @author pavlidis */ @Service public class OntologyServiceImpl implements OntologyService { /** * Throttle how many ontology terms we retrieve. We search the ontologies in a favored order, so we can stop when we * find "enough stuff". */ private static final int MAX_TERMS_TO_FETCH = 200; private static final Log log = LogFactory.getLog(OntologyServiceImpl.class.getName()); private static Collection<OntologyTerm> categoryTerms = null; private final CellLineOntologyService cellLineOntologyService = new CellLineOntologyService(); private final CellTypeOntologyService cellTypeOntologyService = new CellTypeOntologyService(); private final ChebiOntologyService chebiOntologyService = new ChebiOntologyService(); private final DiseaseOntologyService diseaseOntologyService = new DiseaseOntologyService(); private final ExperimentalFactorOntologyService experimentalFactorOntologyService = new ExperimentalFactorOntologyService(); @Deprecated private final FMAOntologyService fmaOntologyService = new FMAOntologyService(); private final GemmaOntologyService gemmaOntologyService = new GemmaOntologyService(); private final HumanDevelopmentOntologyService humanDevelopmentOntologyService = new HumanDevelopmentOntologyService(); private final HumanPhenotypeOntologyService humanPhenotypeOntologyService = new HumanPhenotypeOntologyService(); private final MammalianPhenotypeOntologyService mammalianPhenotypeOntologyService = new MammalianPhenotypeOntologyService(); private final MouseDevelopmentOntologyService mouseDevelopmentOntologyService = new MouseDevelopmentOntologyService(); @Deprecated private final NIFSTDOntologyService nifstdOntologyService = new NIFSTDOntologyService(); private final ObiService obiService = new ObiService(); private final Collection<AbstractOntologyService> ontologyServices = new ArrayList<>(); private final SequenceOntologyService sequenceOntologyService = new SequenceOntologyService(); private final UberonOntologyService uberonOntologyService = new UberonOntologyService(); private BioMaterialService bioMaterialService; private CharacteristicService characteristicService; private SearchService searchService; private GeneOntologyService geneOntologyService; @Autowired public void setBioMaterialService(BioMaterialService bioMaterialService) { this.bioMaterialService = bioMaterialService; } @Autowired public void setCharacteristicService(CharacteristicService characteristicService) { this.characteristicService = characteristicService; } @Autowired public void setSearchService(SearchService searchService) { this.searchService = searchService; } @Autowired public void setGeneOntologyService(GeneOntologyService geneOntologyService) { this.geneOntologyService = geneOntologyService; } @Override public void afterPropertiesSet() { this.ontologyServices.add(this.gemmaOntologyService); this.ontologyServices.add(this.experimentalFactorOntologyService); this.ontologyServices.add(this.obiService); this.ontologyServices.add(this.nifstdOntologyService); // DEPRECATED this.ontologyServices.add(this.fmaOntologyService); // DEPRECATED this.ontologyServices.add(this.diseaseOntologyService); this.ontologyServices.add(this.cellTypeOntologyService); this.ontologyServices.add(this.chebiOntologyService); this.ontologyServices.add(this.mammalianPhenotypeOntologyService); this.ontologyServices.add(this.humanPhenotypeOntologyService); this.ontologyServices.add(this.mouseDevelopmentOntologyService); this.ontologyServices.add(this.humanDevelopmentOntologyService); this.ontologyServices.add(this.sequenceOntologyService); this.ontologyServices.add(this.cellLineOntologyService); this.ontologyServices.add(this.uberonOntologyService); /* * If this load.ontologies is NOT configured, we go ahead (per-ontology config will be checked). */ String doLoad = Configuration.getString("load.ontologies"); if (StringUtils.isBlank(doLoad) || Configuration.getBoolean("load.ontologies")) { for (AbstractOntologyService serv : this.ontologyServices) { serv.startInitializationThread(false, false); } } else { log.info("Auto-loading of ontologies suppressed"); } } @SuppressWarnings({ "unused", "WeakerAccess" }) // Possible external use public void countOccurrences(Collection<CharacteristicValueObject> searchResults, Map<String, CharacteristicValueObject> previouslyUsedInSystem) { StopWatch watch = new StopWatch(); watch.start(); Set<String> uris = new HashSet<>(); for (CharacteristicValueObject cvo : searchResults) { uris.add(cvo.getValueUri()); } Collection<Characteristic> existingCharacteristicsUsingTheseTerms = characteristicService.findByUri(uris); for (Characteristic c : existingCharacteristicsUsingTheseTerms) { // count up number of usages; see bug 3897 String key = this.foundValueKey(c); if (previouslyUsedInSystem.containsKey(key)) { previouslyUsedInSystem.get(key).incrementOccurrenceCount(); continue; } if (OntologyServiceImpl.log.isDebugEnabled()) OntologyServiceImpl.log.debug("saw " + key + " (" + key + ")"); CharacteristicValueObject vo = new CharacteristicValueObject(c); vo.setCategory(null); vo.setCategoryUri(null); // to avoid us counting separately by category. vo.setAlreadyPresentInDatabase(true); vo.incrementOccurrenceCount(); previouslyUsedInSystem.put(key, vo); } if (OntologyServiceImpl.log.isDebugEnabled() || (watch.getTime() > 100 && previouslyUsedInSystem.size() > 0)) OntologyServiceImpl.log.info("found " + previouslyUsedInSystem.size() + " matching characteristics used in the database" + " in " + watch.getTime() + " ms " + " Filtered from initial set of " + existingCharacteristicsUsingTheseTerms.size()); } /** * Using the ontology and values in the database, for a search searchQuery given by the client give an ordered list * of possible choices */ @Override public Collection<CharacteristicValueObject> findExperimentsCharacteristicTags(String searchQueryString, boolean useNeuroCartaOntology) { String searchQuery = OntologySearch.stripInvalidCharacters(searchQueryString); if (searchQuery.length() < 3) { return new HashSet<>(); } // this will do like %search% Collection<CharacteristicValueObject> characteristicsFromDatabase = CharacteristicValueObject .characteristic2CharacteristicVO(this.characteristicService.findByValue("%" + searchQuery)); Map<String, CharacteristicValueObject> characteristicFromDatabaseWithValueUri = new HashMap<>(); Collection<CharacteristicValueObject> characteristicFromDatabaseFreeText = new HashSet<>(); for (CharacteristicValueObject characteristicInDatabase : characteristicsFromDatabase) { // flag to let know that it was found in the database characteristicInDatabase.setAlreadyPresentInDatabase(true); if (characteristicInDatabase.getValueUri() != null && !characteristicInDatabase.getValueUri().equals("")) { characteristicFromDatabaseWithValueUri.put(characteristicInDatabase.getValueUri(), characteristicInDatabase); } else { // free txt, no value uri characteristicFromDatabaseFreeText.add(characteristicInDatabase); } } // search the ontology for the given searchTerm, but if already found in the database dont add it again Collection<CharacteristicValueObject> characteristicsFromOntology = this.findCharacteristicsFromOntology( searchQuery, useNeuroCartaOntology, characteristicFromDatabaseWithValueUri); // order to show the the term: 1-exactMatch, 2-startWith, 3-substring and 4- no rule // order to show values for each List : 1-From database with Uri, 2- from Ontology, 3- from from database with // no Uri Collection<CharacteristicValueObject> characteristicsWithExactMatch = new ArrayList<>(); Collection<CharacteristicValueObject> characteristicsStartWithQuery = new ArrayList<>(); Collection<CharacteristicValueObject> characteristicsSubstring = new ArrayList<>(); Collection<CharacteristicValueObject> characteristicsNoRuleFound = new ArrayList<>(); // from the database with a uri this.putCharacteristicsIntoSpecificList(searchQuery, characteristicFromDatabaseWithValueUri.values(), characteristicsWithExactMatch, characteristicsStartWithQuery, characteristicsSubstring, characteristicsNoRuleFound); // from the ontology this.putCharacteristicsIntoSpecificList(searchQuery, characteristicsFromOntology, characteristicsWithExactMatch, characteristicsStartWithQuery, characteristicsSubstring, characteristicsNoRuleFound); // from the database with no uri this.putCharacteristicsIntoSpecificList(searchQuery, characteristicFromDatabaseFreeText, characteristicsWithExactMatch, characteristicsStartWithQuery, characteristicsSubstring, characteristicsNoRuleFound); List<CharacteristicValueObject> allCharacteristicsFound = new ArrayList<>(); allCharacteristicsFound.addAll(characteristicsWithExactMatch); allCharacteristicsFound.addAll(characteristicsStartWithQuery); allCharacteristicsFound.addAll(characteristicsSubstring); allCharacteristicsFound.addAll(characteristicsNoRuleFound); // limit the size of the returned phenotypes to 100 terms if (allCharacteristicsFound.size() > 100) { return allCharacteristicsFound.subList(0, 100); } return allCharacteristicsFound; } @Override public Collection<OntologyIndividual> findIndividuals(String givenSearch) { String query = OntologySearch.stripInvalidCharacters(givenSearch); Collection<OntologyIndividual> results = new HashSet<>(); for (AbstractOntologyService ontology : ontologyServices) { Collection<OntologyIndividual> found = ontology.findIndividuals(query); if (found != null) results.addAll(found); } return results; } @Override public Collection<Characteristic> findTermAsCharacteristic(String search) { String query = OntologySearch.stripInvalidCharacters(search); Collection<Characteristic> results = new HashSet<>(); if (StringUtils.isBlank(query)) { return results; } for (AbstractOntologyService ontology : ontologyServices) { Collection<OntologyTerm> found = ontology.findTerm(query); if (found != null) results.addAll(this.convert(new HashSet<OntologyResource>(found))); } return results; } @Override public Collection<OntologyTerm> findTerms(String search) { Collection<OntologyTerm> results = new HashSet<>(); /* * URI input: just retrieve the term. */ if (search.startsWith("http://")) { for (AbstractOntologyService ontology : ontologyServices) { if (ontology.isOntologyLoaded()) { OntologyTerm found = ontology.getTerm(search); if (found != null) { results.add(found); } } } return results; } /* * Other queries: */ String query = OntologySearch.stripInvalidCharacters(search); if (StringUtils.isBlank(query)) { return results; } for (AbstractOntologyService ontology : ontologyServices) { if (ontology.isOntologyLoaded()) { Collection<OntologyTerm> found = ontology.findTerm(query); if (found != null) { for (OntologyTerm t : found) { if (!t.isTermObsolete()) { results.add(t); } } } } } if (geneOntologyService.isReady()) results.addAll(geneOntologyService.findTerm(search)); return results; } @Override public Collection<CharacteristicValueObject> findTermsInexact(String givenQueryString, Taxon taxon) { if (StringUtils.isBlank(givenQueryString)) return null; StopWatch watch = new StopWatch(); watch.start(); String queryString = OntologySearch.stripInvalidCharacters(givenQueryString); if (StringUtils.isBlank(queryString)) { OntologyServiceImpl.log.warn("The query was not valid (ended up being empty): " + givenQueryString); return new HashSet<>(); } if (OntologyServiceImpl.log.isDebugEnabled()) { OntologyServiceImpl.log .debug("starting findExactTerm for " + queryString + ". Timing information begins from here"); } Collection<? extends OntologyResource> results = null; Collection<CharacteristicValueObject> searchResults = new HashSet<>(); Map<String, CharacteristicValueObject> previouslyUsedInSystem = new HashMap<>(); this.countOccurrences(queryString, previouslyUsedInSystem); this.searchForGenes(queryString, taxon, searchResults); for (AbstractOntologyService service : this.ontologyServices) { if (!service.isOntologyLoaded()) continue; try { results = service.findResources(queryString); } catch (Exception e) { OntologyServiceImpl.log.warn(e.getMessage()); // parse errors, etc. } if (results == null || results.isEmpty()) continue; if (OntologyServiceImpl.log.isDebugEnabled()) OntologyServiceImpl.log.debug("found " + results.size() + " from " + service.getClass().getSimpleName() + " in " + watch.getTime() + " ms"); searchResults.addAll(CharacteristicValueObject .characteristic2CharacteristicVO(this.termsToCharacteristics(results))); if (searchResults.size() > OntologyServiceImpl.MAX_TERMS_TO_FETCH) { break; } } this.countOccurrences(searchResults, previouslyUsedInSystem); // get GO terms, if we don't already have a lot of possibilities. (might have to adjust this) if (searchResults.size() < OntologyServiceImpl.MAX_TERMS_TO_FETCH && geneOntologyService.isReady()) { searchResults.addAll(CharacteristicValueObject.characteristic2CharacteristicVO( this.termsToCharacteristics(geneOntologyService.findTerm(queryString)))); } // Sort the results rather elaborately. Collection<CharacteristicValueObject> sortedResults = this.sort(previouslyUsedInSystem, searchResults, queryString); if (watch.getTime() > 1000) { OntologyServiceImpl.log .info("Ontology term query for: " + givenQueryString + ": " + watch.getTime() + "ms"); } return sortedResults; } @Override public Collection<OntologyTerm> getCategoryTerms() { if (!experimentalFactorOntologyService.isOntologyLoaded()) { OntologyServiceImpl.log.warn("EFO is not loaded"); } /* * Requires EFO, OBI and SO. If one of them isn't loaded, the terms are filled in with placeholders. */ if (OntologyServiceImpl.categoryTerms == null || OntologyServiceImpl.categoryTerms.isEmpty()) { this.initializeCategoryTerms(); } return OntologyServiceImpl.categoryTerms; } @Override public CellLineOntologyService getCellLineOntologyService() { return cellLineOntologyService; } @Override public CellTypeOntologyService getCellTypeOntologyService() { return cellTypeOntologyService; } @Override public GemmaOntologyService getGemmaOntologyService() { return gemmaOntologyService; } @Override public HumanDevelopmentOntologyService getHumanDevelopmentOntologyService() { return humanDevelopmentOntologyService; } @Override public MouseDevelopmentOntologyService getMouseDevelopmentOntologyService() { return mouseDevelopmentOntologyService; } @Override public ChebiOntologyService getChebiOntologyService() { return chebiOntologyService; } @Override public DiseaseOntologyService getDiseaseOntologyService() { return diseaseOntologyService; } @Override public ExperimentalFactorOntologyService getExperimentalFactorOntologyService() { return experimentalFactorOntologyService; } @Override public HumanPhenotypeOntologyService getHumanPhenotypeOntologyService() { return humanPhenotypeOntologyService; } @Override public MammalianPhenotypeOntologyService getMammalianPhenotypeOntologyService() { return mammalianPhenotypeOntologyService; } @Override public ObiService getObiService() { return obiService; } @Override public UberonOntologyService getUberonService() { return this.uberonOntologyService; } @Override public OntologyResource getResource(String uri) { for (AbstractOntologyService ontology : ontologyServices) { OntologyResource resource = ontology.getResource(uri); if (resource != null) return resource; } return null; } @Override public SequenceOntologyService getSequenceOntologyService() { return this.sequenceOntologyService; } @Override public OntologyTerm getTerm(String uri) { for (AbstractOntologyService ontology : ontologyServices) { OntologyTerm term = ontology.getTerm(uri); if (term != null) return term; } // TODO: doesn't include GO. return null; } /** * @return true if the Uri is an ObsoleteClass. This will only work if the ontology in question is loaded. */ @Override public boolean isObsolete(String uri) { if (uri == null) return false; OntologyTerm t = this.getTerm(uri); return t != null && t.isTermObsolete(); } @Override public void reindexAllOntologies() { for (AbstractOntologyService serv : this.ontologyServices) { if (serv.isOntologyLoaded()) { OntologyServiceImpl.log.info("Reindexing: " + serv); try { serv.index(true); } catch (Exception e) { OntologyServiceImpl.log.error("Failed to index " + serv + ": " + e.getMessage(), e); } } else { if (serv.isEnabled()) OntologyServiceImpl.log .info("Not available for reindexing (not enabled or finished initialization): " + serv); } } } @Override public void reinitializeAllOntologies() { for (AbstractOntologyService serv : this.ontologyServices) { serv.startInitializationThread(true, true); } } @Override public void removeBioMaterialStatement(Long characterId, BioMaterial bm) { Characteristic vc = characteristicService.load(characterId); if (vc == null) throw new IllegalArgumentException("No characteristic with id=" + characterId + " was foundF"); bm.getCharacteristics().remove(vc); characteristicService.remove(characterId); } @Override public void saveBioMaterialStatement(Characteristic vc, BioMaterial bm) { OntologyServiceImpl.log.debug("Vocab Characteristic: " + vc); vc.setEvidenceCode(GOEvidenceCode.IC); // manually added characteristic Set<Characteristic> chars = new HashSet<>(); chars.add(vc); Collection<Characteristic> current = bm.getCharacteristics(); if (current == null) current = new HashSet<>(chars); else current.addAll(chars); for (Characteristic characteristic : chars) { OntologyServiceImpl.log.info("Adding characteristic to " + bm + " : " + characteristic); } bm.setCharacteristics(current); bioMaterialService.update(bm); } @Override public void addExpressionExperimentStatement(Characteristic vc, ExpressionExperiment ee) { if (vc == null) { throw new IllegalArgumentException("Null characteristic"); } if (StringUtils.isBlank(vc.getCategory())) { throw new IllegalArgumentException("Must provide a category"); } if (StringUtils.isBlank(vc.getValue())) { throw new IllegalArgumentException("Must provide a value"); } if (vc.getEvidenceCode() == null) { vc.setEvidenceCode(GOEvidenceCode.IC); // assume: manually added characteristic } if (StringUtils.isNotBlank(vc.getValueUri()) && this.isObsolete(vc.getValueUri())) { throw new IllegalArgumentException(vc + " is an obsolete term! Not saving."); } if (ee == null) throw new IllegalArgumentException("Experiment cannot be null"); OntologyServiceImpl.log.info("Adding characteristic '" + vc.getValue() + "' to " + ee.getShortName() + " (ID=" + ee.getId() + ") : " + vc); ee.getCharacteristics().add(vc); } @Override public void sort(List<CharacteristicValueObject> characteristics) { Collections.sort(characteristics, new CharacteristicComparator()); } /** * Convert raw ontology resources into Characteristics. */ @Override public Collection<Characteristic> termsToCharacteristics(final Collection<? extends OntologyResource> terms) { Collection<Characteristic> results = new HashSet<>(); if ((terms == null) || (terms.isEmpty())) return results; for (OntologyResource term : terms) { if (term == null) continue; Characteristic vc = this.termToCharacteristic(term); if (vc == null) continue; results.add(vc); } OntologyServiceImpl.log.debug("returning " + results.size() + " terms after filter"); return results; } @Override public Map<String, CharacteristicValueObject> countObsoleteOccurrences(int start, int stop, int step) { Map<String, CharacteristicValueObject> vos = new HashMap<>(); int minId = start; int maxId = step; int nullCnt = 0; int obsoleteCnt = 0; // Loading all characteristics in steps while (maxId < stop) { OntologyServiceImpl.log.info("Checking characteristics with IDs between " + minId + " and " + maxId); List<Long> ids = new ArrayList<>(step); for (int i = minId; i < maxId + 1; i++) { ids.add((long) i); } minId = maxId + 1; maxId += step; Collection<Characteristic> chars = characteristicService.load(ids); if (chars == null || chars.isEmpty()) { OntologyServiceImpl.log.info("No characteristics in the current ID range, moving on."); continue; } OntologyServiceImpl.log.info( "Found " + chars.size() + " characteristics in the current ID range, checking for obsoletes."); // Detect obsoletes for (Characteristic ch : chars) { if (StringUtils.isBlank(ch.getValueUri())) { nullCnt++; } else if (this.isObsolete(ch.getValueUri())) { String key = this.foundValueKey(ch); if (!vos.containsKey(key)) { vos.put(key, new CharacteristicValueObject(ch)); } vos.get(key).incrementOccurrenceCount(); obsoleteCnt++; OntologyServiceImpl.log .info("Found obsolete term: " + ch.getValue() + " / " + ch.getValueUri()); } } ids.clear(); chars.clear(); } OntologyServiceImpl.log.info("Terms with empty uri: " + nullCnt); OntologyServiceImpl.log.info("Obsolete terms found: " + obsoleteCnt); return vos; } private Characteristic termToCharacteristic(OntologyResource res) { if (this.isObsolete(res.getUri())) { OntologyServiceImpl.log.warn("Skipping an obsolete term: " + res.getLabel() + " / " + res.getUri()); return null; } Characteristic vc = Characteristic.Factory.newInstance(); if (res instanceof OntologyTerm) { OntologyTerm term = (OntologyTerm) res; vc.setValue(term.getTerm()); vc.setValueUri(term.getUri()); vc.setDescription(term.getComment()); } else if (res instanceof OntologyIndividual) { OntologyIndividual indi = (OntologyIndividual) res; vc.setValue(indi.getLabel()); vc.setValueUri(indi.getUri()); vc.setDescription("Individual"); } else { OntologyServiceImpl.log.warn("This is neither an OntologyTerm or an OntologyIndividual: " + res); return null; } if (vc.getValue() == null) { OntologyServiceImpl.log .warn("Skipping a characteristic with no value: " + res.getLabel() + " / " + res.getUri()); return null; } return vc; } /** * Given a collection of ontology terms converts them to a collection of Characteristics */ private Collection<Characteristic> convert(final Collection<OntologyResource> resources) { Collection<Characteristic> converted = new HashSet<>(); if ((resources == null) || (resources.isEmpty())) return converted; for (OntologyResource res : resources) { Characteristic vc = Characteristic.Factory.newInstance(); // If there is no URI we don't want to send it back (ie useless) if ((res.getUri() == null) || StringUtils.isEmpty(res.getUri())) continue; if (res instanceof OntologyTerm) { OntologyTerm term = (OntologyTerm) res; vc.setValue(term.getTerm()); vc.setValueUri(term.getUri()); vc.setDescription(term.getComment()); } if (res instanceof OntologyIndividual) { OntologyIndividual indi = (OntologyIndividual) res; vc.setValue(indi.getLabel()); vc.setValueUri(indi.getUri()); vc.setDescription("Individual"); } converted.add(vc); } return converted; } private void countOccurrences(String queryString, Map<String, CharacteristicValueObject> previouslyUsedInSystem) { StopWatch watch = new StopWatch(); watch.start(); Collection<Characteristic> foundChars = characteristicService.findByValue(queryString); /* * Want to flag in the web interface that these are already used by Gemma (also ignore capitalization; category * is always ignored; remove duplicates.) */ for (Characteristic characteristic : foundChars) { // count up number of usages; see bug 3897 String key = this.foundValueKey(characteristic); if (previouslyUsedInSystem.containsKey(key)) { previouslyUsedInSystem.get(key).incrementOccurrenceCount(); continue; } if (OntologyServiceImpl.log.isDebugEnabled()) OntologyServiceImpl.log.debug("saw " + key + " (" + key + ") for " + characteristic); CharacteristicValueObject vo = new CharacteristicValueObject(characteristic); vo.setCategory(null); vo.setCategoryUri(null); // to avoid us counting separately by category. vo.setAlreadyPresentInDatabase(true); vo.incrementOccurrenceCount(); previouslyUsedInSystem.put(key, vo); } if (OntologyServiceImpl.log.isDebugEnabled() || (watch.getTime() > 100 && previouslyUsedInSystem.size() > 0)) OntologyServiceImpl.log.info("found " + previouslyUsedInSystem.size() + " matching characteristics used in the database" + " in " + watch.getTime() + " ms " + " Filtered from initial set of " + foundChars.size()); } /** * given a collection of characteristics add them to the correct List */ private Collection<CharacteristicValueObject> findCharacteristicsFromOntology(String searchQuery, boolean useNeuroCartaOntology, Map<String, CharacteristicValueObject> characteristicFromDatabaseWithValueUri) { Collection<CharacteristicValueObject> characteristicsFromOntology = new HashSet<>(); // in neurocarta we don't need to search all Ontologies Collection<AbstractOntologyService> ontologyServicesToUse = new HashSet<>(); if (useNeuroCartaOntology) { ontologyServicesToUse.add(this.nifstdOntologyService); ontologyServicesToUse.add(this.fmaOntologyService); ontologyServicesToUse.add(this.obiService); } else { ontologyServicesToUse = this.ontologyServices; } // search all Ontology for (AbstractOntologyService ontologyService : ontologyServicesToUse) { Collection<OntologyTerm> ontologyTerms = ontologyService.findTerm(searchQuery); for (OntologyTerm ontologyTerm : ontologyTerms) { // if the ontology term wasnt already found in the database if (characteristicFromDatabaseWithValueUri.get(ontologyTerm.getUri()) == null) { CharacteristicValueObject phenotype = new CharacteristicValueObject(-1L, ontologyTerm.getLabel().toLowerCase(), ontologyTerm.getUri()); characteristicsFromOntology.add(phenotype); } } } return characteristicsFromOntology; } private String foundValueKey(Characteristic c) { if (StringUtils.isNotBlank(c.getValueUri())) { return c.getValueUri().toLowerCase(); } return c.getValue().toLowerCase(); } private String foundValueKey(CharacteristicValueObject c) { if (c.getValueUri() != null && StringUtils.isNotBlank(c.getValueUri())) { return c.getValueUri().toLowerCase(); } return c.getValue().toLowerCase(); } /** * Allow us to store gene information as a characteristic associated with our entities. This doesn't work so well * for non-ncbi genes. */ private Characteristic gene2Characteristic(Gene g) { Characteristic vc = Characteristic.Factory.newInstance(); vc.setCategory("gene"); vc.setCategoryUri("http://purl.org/commons/hcls/gene"); vc.setValue(g.getOfficialSymbol() + " [" + g.getTaxon().getCommonName() + "]" + " " + g.getOfficialName()); vc.setDescription(g.toString()); if (g.getNcbiGeneId() != null) { vc.setValueUri("http://purl.org/commons/record/ncbi_gene/" + g.getNcbiGeneId()); } return vc; } private synchronized void initializeCategoryTerms() { URL termUrl = OntologyServiceImpl.class.getResource("/ubic/gemma/core/ontology/EFO.factor.categories.txt"); OntologyServiceImpl.categoryTerms = new ConcurrentHashSet<>(); try (BufferedReader reader = new BufferedReader(new InputStreamReader(termUrl.openStream()))) { String line; boolean warned = false; while ((line = reader.readLine()) != null) { if (line.startsWith("#") || StringUtils.isEmpty(line)) continue; String[] f = StringUtils.split(line, '\t'); if (f.length < 2) { continue; } OntologyTerm t = this.getTerm(f[0]); if (t == null) { // this is not great. We might want to let it expire and redo it later if the ontology // becomes // available. Inference will not be available. if (!warned) { OntologyServiceImpl.log .info("Ontology needed is not loaded? Using light-weight placeholder for " + f[0] + " (further warnings hidden)"); warned = true; } t = new OntologyTermSimple(f[0], f[1]); } OntologyServiceImpl.categoryTerms.add(t); } } catch (IOException ioe) { OntologyServiceImpl.log .error("Error reading from term list '" + termUrl + "'; returning general term list", ioe); OntologyServiceImpl.categoryTerms = null; } OntologyServiceImpl.categoryTerms = Collections.unmodifiableCollection(OntologyServiceImpl.categoryTerms); } /** * given a collection of characteristics add them to the correct List */ private void putCharacteristicsIntoSpecificList(String searchQuery, Collection<CharacteristicValueObject> characteristics, Collection<CharacteristicValueObject> characteristicsWithExactMatch, Collection<CharacteristicValueObject> characteristicsStartWithQuery, Collection<CharacteristicValueObject> characteristicsSubstring, Collection<CharacteristicValueObject> characteristicsNoRuleFound) { for (CharacteristicValueObject cha : characteristics) { // Case 1, exact match if (cha.getValue().equalsIgnoreCase(searchQuery)) { characteristicsWithExactMatch.add(cha); } // Case 2, starts with a substring of the word else if (cha.getValue().toLowerCase().startsWith(searchQuery.toLowerCase())) { characteristicsStartWithQuery.add(cha); } // Case 3, contains a substring of the word else if (cha.getValue().toLowerCase().contains(searchQuery.toLowerCase())) { characteristicsSubstring.add(cha); } else { characteristicsNoRuleFound.add(cha); } } } /** * Look for genes, but only for certain category Uris (genotype, etc.) * * @param taxon okay if null, but then all matches returned. * @param searchResults added to this */ private void searchForGenes(String queryString, Taxon taxon, Collection<CharacteristicValueObject> searchResults) { SearchSettings ss = SearchSettings.Factory.newInstance(); ss.setQuery(queryString); ss.noSearches(); ss.setTaxon(taxon); ss.setSearchGenes(true); Map<Class<?>, List<SearchResult>> geneResults = this.searchService.search(ss, true, false); if (geneResults.containsKey(Gene.class)) { for (SearchResult sr : geneResults.get(Gene.class)) { Gene g = (Gene) sr.getResultObject(); if (OntologyServiceImpl.log.isDebugEnabled()) OntologyServiceImpl.log.debug("Search for " + queryString + " returned: " + g); searchResults.add(new CharacteristicValueObject(this.gene2Characteristic(g))); } } } /** * @param alreadyUsedResults items already in the system; remove singleton free-text terms. * @param otherResults other results * @param searchTerm the query */ private Collection<CharacteristicValueObject> sort(Map<String, CharacteristicValueObject> alreadyUsedResults, Collection<CharacteristicValueObject> otherResults, String searchTerm) { /* * Organize the list into 3 parts. Want to get the exact match showing up on top */ List<CharacteristicValueObject> sortedResultsExact = new ArrayList<>(); List<CharacteristicValueObject> sortedResultsStartsWith = new ArrayList<>(); List<CharacteristicValueObject> sortedResultsBottom = new ArrayList<>(); Set<String> foundValues = new HashSet<>(); for (String key : alreadyUsedResults.keySet()) { CharacteristicValueObject c = alreadyUsedResults.get(key); if (foundValues.contains(key)) continue; foundValues.add(key); // don't show singletons of free-text terms. if (c.getValueUri() == null && c.getNumTimesUsed() < 2) { continue; } //Skip obsolete terms if (this.isObsolete(c.getValueUri())) { OntologyServiceImpl.log .warn("Skipping an obsolete term: " + c.getValue() + " / " + c.getValueUri()); continue; } this.addToAppropriateList(searchTerm, sortedResultsExact, sortedResultsStartsWith, sortedResultsBottom, c); } for (CharacteristicValueObject c : otherResults) { assert c.getValueUri() != null; String key = this.foundValueKey(c); if (foundValues.contains(key)) continue; foundValues.add(key); this.addToAppropriateList(searchTerm, sortedResultsExact, sortedResultsStartsWith, sortedResultsBottom, c); } this.sort(sortedResultsExact); this.sort(sortedResultsStartsWith); this.sort(sortedResultsBottom); List<CharacteristicValueObject> sortedTerms = new ArrayList<>(foundValues.size()); sortedTerms.addAll(sortedResultsExact); sortedTerms.addAll(sortedResultsStartsWith); sortedTerms.addAll(sortedResultsBottom); return sortedTerms; } private void addToAppropriateList(String searchTerm, List<CharacteristicValueObject> sortedResultsExact, List<CharacteristicValueObject> sortedResultsStartsWith, List<CharacteristicValueObject> sortedResultsBottom, CharacteristicValueObject c) { if (c.getValue().equalsIgnoreCase(searchTerm)) { sortedResultsExact.add(c); } else if (c.getValue().toLowerCase().startsWith(searchTerm.toLowerCase()) || c.getValueUri() != null) { sortedResultsStartsWith.add(c); } else { sortedResultsBottom.add(c); } } /** * Sorts Characteristics in our preferred ordering */ private class CharacteristicComparator implements Comparator<CharacteristicValueObject> { @Override public int compare(CharacteristicValueObject o1, CharacteristicValueObject o2) { // sort by whether used or not, and then by URI; terms without URIs are listed later; break ties by length if (o1.getValueUri() != null) { if (o2.getValueUri() != null) { // both have uri, break tie. if (o1.isAlreadyPresentInDatabase()) { if (o2.isAlreadyPresentInDatabase()) { // both are used, break tie by who is used most. if (o1.getNumTimesUsed() > o2.getNumTimesUsed()) { return -1; } else if (o2.getNumTimesUsed() > o1.getNumTimesUsed()) { return 1; } // both are used same number of times, compare by length (shorter better, typically...) if (o1.getValue().length() < o2.getValue().length()) { return -1; } else if (o1.getValue().length() > o2.getValue().length()) { return 1; } // equal length, compare by lexig. value. return o1.getValue().toLowerCase().compareTo(o2.getValue().toLowerCase()); } // o1 is used, o2 is not; o1 should be first. return -1; } else if (o2.isAlreadyPresentInDatabase()) { // o2 is used and o1 is not; o2 should be first. return 1; } } // o1 has uri, o2 does not. return -1; } else if (o2.getValueUri() != null) { // we know o1 does not have a uri, o2 goes first. return 1; } // neither has URI. By definition these are in the database, so we just rank by length/text if (o1.getValue().length() < o2.getValue().length()) { return -1; } else if (o1.getValue().length() > o2.getValue().length()) { return 1; } // equal length, compare by lexig. value. return o1.getValue().toLowerCase().compareTo(o2.getValue().toLowerCase()); } } }