ubic.gemma.core.search.GeneSetSearchImpl.java Source code

Java tutorial

Introduction

Here is the source code for ubic.gemma.core.search.GeneSetSearchImpl.java

Source

/*
 * The Gemma project
 *
 * Copyright (c) 2010 University of British Columbia
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

package ubic.gemma.core.search;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import ubic.basecode.ontology.model.OntologyResource;
import ubic.basecode.ontology.model.OntologyTerm;
import ubic.gemma.core.association.phenotype.PhenotypeAssociationManagerService;
import ubic.gemma.core.genome.gene.GOGroupValueObject;
import ubic.gemma.core.genome.gene.GeneSetValueObjectHelper;
import ubic.gemma.core.genome.gene.service.GeneSetService;
import ubic.gemma.core.ontology.providers.GeneOntologyService;
import ubic.gemma.model.genome.Gene;
import ubic.gemma.model.genome.Taxon;
import ubic.gemma.model.genome.gene.GeneSet;
import ubic.gemma.model.genome.gene.GeneSetMember;
import ubic.gemma.model.genome.gene.GeneSetValueObject;
import ubic.gemma.model.genome.gene.GeneValueObject;
import ubic.gemma.model.genome.gene.phenotype.valueObject.CharacteristicValueObject;
import ubic.gemma.persistence.service.association.Gene2GOAssociationService;
import ubic.gemma.persistence.service.genome.taxon.TaxonService;
import ubic.gemma.persistence.util.EntityUtils;

import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;

/**
 * @author paul
 */
@Component
public class GeneSetSearchImpl implements GeneSetSearch {

    /**
     * Also defined in GeneSearchServiceImpl.
     */
    private static final int MAX_GO_GROUP_SIZE = 200;
    private static final Log log = LogFactory.getLog(GeneSetSearchImpl.class);

    @Autowired
    private Gene2GOAssociationService gene2GoService;
    @Autowired
    private GeneOntologyService geneOntologyService;
    @Autowired
    private GeneSetService geneSetService;
    @Autowired
    private GeneSetValueObjectHelper geneSetValueObjectHelper;
    @Autowired
    private PhenotypeAssociationManagerService phenotypeAssociationManagerService;
    @Autowired
    private TaxonService taxonService;

    @Override
    public Collection<GeneSet> findByGene(Gene gene) {
        return geneSetService.findByGene(gene);
    }

    @Override
    public GOGroupValueObject findGeneSetValueObjectByGoId(String goId, Long taxonId) {

        // shouldn't need to set the taxon here, should be taken care of when creating the value object
        Taxon taxon;

        if (taxonId != null) {
            taxon = taxonService.load(taxonId);
            if (taxon == null) {
                GeneSetSearchImpl.log.warn("No such taxon with id=" + taxonId);
            } else {
                GeneSet result = this.findByGoId(goId, taxonService.load(taxonId));
                if (result == null) {
                    GeneSetSearchImpl.log.warn("No matching gene set found for: " + goId);
                    return null;
                }
                GOGroupValueObject ggvo = geneSetValueObjectHelper.convertToGOValueObject(result, goId, goId);

                ggvo.setTaxonId(taxon.getId());
                ggvo.setTaxonName(taxon.getCommonName());

                return ggvo;
            }
        }
        return null;
    }

    @Override
    public GeneSet findByGoId(String goId, Taxon taxon) {
        OntologyTerm goTerm = geneOntologyService.getTermForId(StringUtils.strip(goId));

        if (goTerm == null) {
            return null;
        }
        // if taxon is null, this returns a geneset with genes from different taxons
        return this.goTermToGeneSet(goTerm, taxon);
    }

    @Override
    public Collection<GeneSet> findByGoTermName(String goTermName, Taxon taxon) {
        return this.findByGoTermName(goTermName, taxon, null, null);
    }

    @Override
    public Collection<GeneSet> findByGoTermName(String goTermName, Taxon taxon, Integer maxGoTermsProcessed,
            Integer maxGeneSetSize) {
        Collection<? extends OntologyResource> matches = this.geneOntologyService
                .findTerm(StringUtils.strip(goTermName));

        Collection<GeneSet> results = new HashSet<>();

        for (OntologyResource t : matches) {
            assert t instanceof OntologyTerm;

            if (taxon == null) {
                Collection<GeneSet> sets = this.goTermToGeneSets((OntologyTerm) t, maxGeneSetSize);
                results.addAll(sets);

                // noinspection StatementWithEmptyBody // FIXME should we count each species as one go?
                if (maxGoTermsProcessed != null && results.size() > maxGoTermsProcessed) {
                    // return results;
                }
            } else {

                GeneSet converted = this.goTermToGeneSet(t, taxon, maxGeneSetSize);
                // converted will be null if its size is more than maxGeneSetSize
                if (converted != null) {
                    results.add(converted);

                }
            }

            if (maxGoTermsProcessed != null && results.size() > maxGoTermsProcessed) {
                return results;
            }
        }

        return results;

    }

    @Override
    public Collection<GeneSet> findByName(String name) {
        return geneSetService.findByName(StringUtils.strip(name));
    }

    @Override
    public Collection<GeneSet> findByName(String name, Taxon taxon) {
        return geneSetService.findByName(StringUtils.strip(name), taxon);
    }

    @Override
    public Collection<GeneSet> findGeneSetsByName(String query, Long taxonId) {

        if (StringUtils.isBlank(query)) {
            return new HashSet<>();
        }
        Collection<GeneSet> foundGeneSets;
        Taxon tax;
        tax = taxonService.load(taxonId);

        if (tax == null) {
            // throw new IllegalArgumentException( "Can't locate taxon with id=" + taxonId );
            foundGeneSets = this.findByName(query);
        } else {
            foundGeneSets = this.findByName(query, tax);
        }

        foundGeneSets.clear(); // for testing general search

        /*
         * SEARCH GENE ONTOLOGY
         */

        if (query.toUpperCase().startsWith("GO")) {
            if (tax == null) {
                Collection<GeneSet> goSets = this.findByGoId(query);
                foundGeneSets.addAll(goSets);
            } else {
                GeneSet goSet = this.findByGoId(query, tax);
                if (goSet != null)
                    foundGeneSets.add(goSet);
            }
        } else {
            foundGeneSets.addAll(this.findByGoTermName(query, tax));
        }

        return foundGeneSets;
    }

    @Override
    public Collection<GeneSetValueObject> findByPhenotypeName(String phenotypeQuery, Taxon taxon) {

        StopWatch timer = new StopWatch();
        timer.start();
        Collection<CharacteristicValueObject> phenotypes = phenotypeAssociationManagerService
                .searchOntologyForPhenotypes(StringUtils.strip(phenotypeQuery), null);

        Collection<GeneSetValueObject> results = new HashSet<>();

        if (phenotypes.isEmpty()) {
            return results;
        }

        if (timer.getTime() > 200) {
            GeneSetSearchImpl.log.info("Find phenotypes: " + timer.getTime() + "ms");
        }

        GeneSetSearchImpl.log.debug(" Converting CharacteristicValueObjects collection(size:" + phenotypes.size()
                + ") into GeneSets for  phenotype query " + phenotypeQuery);
        Map<String, CharacteristicValueObject> uris = new HashMap<>();
        for (CharacteristicValueObject cvo : phenotypes) {
            uris.put(cvo.getValueUri(), cvo);
        }

        Map<String, Collection<? extends GeneValueObject>> genes = phenotypeAssociationManagerService
                .findCandidateGenesForEach(uris.keySet(), taxon);

        if (timer.getTime() > 500) {
            GeneSetSearchImpl.log.info("Find phenotype genes done at " + timer.getTime() + "ms");
        }

        for (String uri : genes.keySet()) {

            Collection<? extends GeneValueObject> gvos = genes.get(uri);

            if (gvos.isEmpty())
                continue;

            Collection<Long> geneIds = EntityUtils.getIds(gvos);

            GeneSetValueObject transientGeneSet = new GeneSetValueObject();

            transientGeneSet.setName(this.uri2phenoID(uris.get(uri)));
            transientGeneSet.setDescription(uris.get(uri).getValue());
            transientGeneSet.setGeneIds(geneIds);

            transientGeneSet.setTaxonId(gvos.iterator().next().getTaxonId());
            transientGeneSet.setTaxonName(gvos.iterator().next().getTaxonCommonName());

            results.add(transientGeneSet);

        }

        if (timer.getTime() > 1000) {
            GeneSetSearchImpl.log.info("Loaded " + phenotypes.size() + " phenotype gene sets for query "
                    + phenotypeQuery + " in " + timer.getTime() + "ms");
        }
        return results;

    }

    private Collection<GeneSet> findByGoId(String query) {
        OntologyTerm goTerm = geneOntologyService.getTermForId(StringUtils.strip(query));

        if (goTerm == null) {
            return new HashSet<>();
        }
        // if taxon is null, this returns genesets for all taxa
        return this.goTermToGeneSets(goTerm, GeneSetSearchImpl.MAX_GO_GROUP_SIZE);
    }

    private GeneSet goTermToGeneSet(OntologyResource term, Taxon taxon) {
        return this.goTermToGeneSet(term, taxon, null);
    }

    /**
     * Convert a GO term to a 'GeneSet', including genes from all child terms. Divide up by taxon.
     */
    private GeneSet goTermToGeneSet(OntologyResource term, Taxon taxon, Integer maxGeneSetSize) {
        assert taxon != null;
        if (term == null)
            return null;
        if (term.getUri() == null)
            return null;

        Collection<OntologyResource> allMatches = new HashSet<>();
        allMatches.add(term);
        assert term instanceof OntologyTerm;
        allMatches.addAll(this.geneOntologyService.getAllChildren((OntologyTerm) term));
        GeneSetSearchImpl.log.info(term);
        /*
         * Gather up uris
         */
        Collection<String> termsToFetch = new HashSet<>();
        for (OntologyResource t : allMatches) {
            String goId = this.uri2goid(t);
            termsToFetch.add(goId);
        }

        Collection<Gene> genes = this.gene2GoService.findByGOTerms(termsToFetch, taxon);

        if (genes.isEmpty() || (maxGeneSetSize != null && genes.size() > maxGeneSetSize)) {
            return null;
        }

        GeneSet transientGeneSet = GeneSet.Factory.newInstance();
        transientGeneSet.setName(this.uri2goid(term));

        if (term.getLabel() == null) {
            GeneSetSearchImpl.log.warn(" Label for term " + term.getUri() + " was null");
        }
        //noinspection StatementWithEmptyBody // FIXME this is an individual or a 'resource', not a 'class', but it's a real GO term. How to get the text.
        if (term.getLabel() != null && term.getLabel().toUpperCase().startsWith("GO_")) {
        }

        transientGeneSet.setDescription(term.getLabel());

        for (Gene gene : genes) {
            GeneSetMember gmember = GeneSetMember.Factory.newInstance();
            gmember.setGene(gene);
            transientGeneSet.getMembers().add(gmember);
        }
        return transientGeneSet;
    }

    private Collection<GeneSet> goTermToGeneSets(OntologyTerm term, Integer maxGeneSetSize) {
        if (term == null)
            return null;
        if (term.getUri() == null)
            return null;

        Collection<OntologyResource> allMatches = new HashSet<>();
        allMatches.add(term);
        allMatches.addAll(this.geneOntologyService.getAllChildren(term));
        GeneSetSearchImpl.log.info(term);
        /*
         * Gather up uris
         */
        Collection<String> termsToFetch = new HashSet<>();
        for (OntologyResource t : allMatches) {
            String goId = this.uri2goid(t);
            termsToFetch.add(goId);
        }

        Map<Taxon, Collection<Gene>> genesByTaxon = this.gene2GoService.findByGOTermsPerTaxon(termsToFetch);

        Collection<GeneSet> results = new HashSet<>();
        for (Taxon t : genesByTaxon.keySet()) {
            Collection<Gene> genes = genesByTaxon.get(t);

            if (genes.isEmpty() || (maxGeneSetSize != null && genes.size() > maxGeneSetSize)) {
                continue;
            }

            GeneSet transientGeneSet = GeneSet.Factory.newInstance();
            transientGeneSet.setName(this.uri2goid(term));
            transientGeneSet.setDescription(term.getLabel());

            for (Gene gene : genes) {
                GeneSetMember gmember = GeneSetMember.Factory.newInstance();
                gmember.setGene(gene);
                transientGeneSet.getMembers().add(gmember);
            }
            results.add(transientGeneSet);
        }
        return results;
    }

    private String uri2goid(OntologyResource t) {
        return t.getUri().replaceFirst(".*/", "");
    }

    private String uri2phenoID(CharacteristicValueObject t) {
        return t.getValueUri().replaceFirst(".*/", "");
    }

}