Java tutorial
/* * The Gemma project * * Copyright (c) 2008 University of British Columbia * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package ubic.gemma.analysis.expression.coexpression; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import net.sf.ehcache.Element; import org.apache.commons.lang.time.StopWatch; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Lazy; import org.springframework.stereotype.Component; import ubic.basecode.dataStructure.CountingMap; import ubic.basecode.ontology.model.OntologyTerm; import ubic.gemma.expression.experiment.service.ExpressionExperimentService; import ubic.gemma.expression.experiment.service.ExpressionExperimentSetService; import ubic.gemma.genome.gene.service.GeneService; import ubic.gemma.loader.protein.ProteinLinkOutFormatter; import ubic.gemma.model.analysis.Analysis; import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; import ubic.gemma.model.analysis.expression.coexpression.CoexpressedGenePairValueObject; import ubic.gemma.model.analysis.expression.coexpression.GeneCoexpressionAnalysis; import ubic.gemma.model.analysis.expression.coexpression.GeneCoexpressionAnalysisService; import ubic.gemma.model.analysis.expression.coexpression.QueryGeneCoexpression; import ubic.gemma.model.association.Gene2GeneProteinAssociation; import ubic.gemma.model.association.Gene2GeneProteinAssociationService; import ubic.gemma.model.association.TfGeneAssociation; import ubic.gemma.model.association.TfGeneAssociationService; import ubic.gemma.model.association.coexpression.Gene2GeneCoexpression; import ubic.gemma.model.association.coexpression.Gene2GeneCoexpressionService; import ubic.gemma.model.association.coexpression.GeneCoexpressionNodeDegree; import ubic.gemma.model.expression.experiment.BioAssaySet; import ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject; import ubic.gemma.model.genome.Gene; import ubic.gemma.model.genome.Taxon; import ubic.gemma.model.genome.gene.GeneLightWeightCache; import ubic.gemma.model.genome.gene.GeneValueObject; import ubic.gemma.ontology.providers.GeneOntologyService; import ubic.gemma.util.AnchorTagUtil; import ubic.gemma.util.EntityUtils; /** * Provides access to Gene2Gene and Probe2Probe links. The use of this service provides 'high-level' access to * functionality in the Gene2GeneCoexpressionService and the ProbeLinkCoexpressionAnalyzer. * * @author paul * @version $Id: GeneCoexpressionServiceImpl.java,v 1.25 2013/03/27 04:04:30 paul Exp $ */ @Component @Lazy public class GeneCoexpressionServiceImpl implements GeneCoexpressionService { private static Log log = LogFactory.getLog(GeneCoexpressionServiceImpl.class.getName()); /** * How many genes to fill in the "go overlap" info for. */ private static final int NUM_GENES_TO_DETAIL = 25; @Autowired private ExpressionExperimentService expressionExperimentService; @Autowired private ExpressionExperimentSetService expressionExperimentSetService; @Autowired private Gene2GeneCoexpressionService gene2GeneCoexpressionService; @Autowired private Gene2GeneProteinAssociationService gene2GeneProteinAssociationService = null; @Autowired private GeneCoexpressionAnalysisService geneCoexpressionAnalysisService; @Autowired private GeneOntologyService geneOntologyService; @Autowired private GeneService geneService; @Autowired private ProbeLinkCoexpressionAnalyzer probeLinkCoexpressionAnalyzer; @Autowired private TfGeneAssociationService tfGeneAssociationService; @Autowired private GeneLightWeightCache geneLightWeightCache; public GeneLightWeightCache getGeneLightWeightCache() { return geneLightWeightCache; } /* * (non-Javadoc) * * @see ubic.gemma.analysis.expression.coexpression.GeneCoexpressionService# * coexpressionSearch(java.util.Collection, java.util.Collection, int, int, boolean, boolean) */ @Override public CoexpressionMetaValueObject coexpressionSearch(Collection<Long> inputEeIds, Collection<Gene> genes, int stringency, int maxResults, boolean queryGenesOnly, boolean forceProbeLevelSearch) { if (genes.isEmpty()) { CoexpressionMetaValueObject r = new CoexpressionMetaValueObject(); r.setErrorState("No genes selected"); return r; } /* * repopulate eeIds with the actual eeIds we'll be searching through and load ExpressionExperimentValueObjects * to get summary information about the datasets... */ Collection<? extends BioAssaySet> ees = expressionExperimentService.loadMultiple(inputEeIds); Collection<Long> eeIds = EntityUtils.getIds(ees); /* * If possible: instead of using the probeLinkCoexpressionAnalyzer, Use a canned analysis with a filter. */ if (!forceProbeLevelSearch) { Taxon taxon = genes.iterator().next().getTaxon(); ExpressionExperimentSet eeSet = geneCoexpressionAnalysisService.findCurrent(taxon) .getExpressionExperimentSetAnalyzed(); if (eeSet != null && EntityUtils .getIds(expressionExperimentSetService.getExperimentsInSet(eeSet.getId())).containsAll(eeIds)) { return getFilteredCannedAnalysisResults(eeSet, eeIds, genes, stringency, maxResults, queryGenesOnly); } } /* * If we get this far, there was no matching analysis so we do it using the probe2probe table. This is * relatively slow so should be avoided. */ List<ExpressionExperimentValueObject> eevos = getSortedEEvos(eeIds); CoexpressionMetaValueObject result = initValueObject(genes, eevos, false); if (eeIds.isEmpty()) { result = new CoexpressionMetaValueObject(); result.setErrorState("No experiments selected"); return result; } for (ExpressionExperimentValueObject eevo : eevos) { // FIXME don't reuse this field. eevo.setExternalUri(AnchorTagUtil.getExpressionExperimentUrl(eevo.getId())); } boolean knownGenesOnly = true; // used to be: // !SecurityService.isUserAdmin(); result.setKnownGenesOnly(knownGenesOnly); Collection<Long> geneIds = new HashSet<Long>(genes.size()); for (Gene gene : genes) { geneIds.add(gene.getId()); } Map<Gene, QueryGeneCoexpression> allCoexpressions = new HashMap<Gene, QueryGeneCoexpression>(); if (genes.size() == 1) { Gene soleQueryGene = genes.iterator().next(); allCoexpressions.put(soleQueryGene, probeLinkCoexpressionAnalyzer.linkAnalysis(soleQueryGene, ees, stringency, maxResults)); } else { /* * Batch mode */ allCoexpressions = probeLinkCoexpressionAnalyzer.linkAnalysis(genes, ees, stringency, queryGenesOnly, maxResults); } Collection<Long> allUsedGenes = new HashSet<Long>(); for (Gene queryGene : allCoexpressions.keySet()) { allUsedGenes.add(queryGene.getId()); QueryGeneCoexpression coexpressions = allCoexpressions.get(queryGene); result.setErrorState(coexpressions.getErrorState()); // fill in the protein interaction details if present Map<Long, Gene2GeneProteinAssociation> proteinInteractionsForQueryGene = this .getGene2GeneProteinAssociationForQueryGene(queryGene); addExtCoexpressionValueObjects(queryGene, eevos, coexpressions, stringency, queryGenesOnly, geneIds, result.getKnownGeneResults(), result.getKnownGeneDatasets(), proteinInteractionsForQueryGene); CoexpressionSummaryValueObject summary = new CoexpressionSummaryValueObject(); summary.setDatasetsAvailable(eevos.size()); summary.setDatasetsTested(coexpressions.getNumDataSetsQueryGeneTestedIn()); summary.setLinksFound(coexpressions.getNumberOfGenes()); summary.setLinksMetPositiveStringency(coexpressions.getPositiveStringencyLinkCount()); summary.setLinksMetNegativeStringency(coexpressions.getNegativeStringencyLinkCount()); result.getSummary().put(queryGene.getOfficialSymbol(), summary); coexpressions.finalize(); } Collection<Long> allSupportingDatasets = new HashSet<Long>(); for (CoexpressionValueObjectExt c : result.getKnownGeneResults()) { allSupportingDatasets.addAll(c.getSupportingExperiments()); allUsedGenes.add(c.getFoundGene().getId()); } if (!allSupportingDatasets.isEmpty()) { Map<Gene, GeneCoexpressionNodeDegree> geneNodeDegrees = geneService .getGeneCoexpressionNodeDegree(geneService.loadMultiple(allUsedGenes)); Map<Long, Gene> idMap = EntityUtils.getIdMap(geneNodeDegrees.keySet()); for (CoexpressionValueObjectExt c : result.getKnownGeneResults()) { GeneCoexpressionNodeDegree queryGeneNodeDegree = geneNodeDegrees .get(idMap.get(c.getQueryGene().getId())); if (queryGeneNodeDegree.getNumTests() < 20) { c.setQueryGeneNodeDegree(0.5); } else { c.setQueryGeneNodeDegree(queryGeneNodeDegree.getRank()); } GeneCoexpressionNodeDegree foundGeneNodeDegree = geneNodeDegrees .get(idMap.get(c.getFoundGene().getId())); if (foundGeneNodeDegree.getNumTests() < 20) { c.setQueryGeneNodeDegree(0.5); } else { c.setFoundGeneNodeDegree(foundGeneNodeDegree.getRank()); } } } else { for (CoexpressionValueObjectExt c : result.getKnownGeneResults()) { c.setQueryGeneNodeDegree(0d); c.setFoundGeneNodeDegree(0d); } } return result; } /* * (non-Javadoc) * * @see ubic.gemma.analysis.expression.coexpression.GeneCoexpressionService# * coexpressionSearchQuick(java.util.Collection, java.util.Collection, int, int, boolean, boolean) */ @Override public Collection<CoexpressionValueObjectExt> coexpressionSearchQuick(Collection<Long> inputEeIds, Collection<Gene> genes, int stringency, int maxResults, boolean queryGenesOnly, boolean skipDetails) { if (genes.isEmpty()) { return new HashSet<CoexpressionValueObjectExt>(); } /* * If possible: instead of using the probeLinkCoexpressionAnalyzer, Use a canned analysis with a filter. */ Taxon taxon = genes.iterator().next().getTaxon(); GeneCoexpressionAnalysis currentSet = geneCoexpressionAnalysisService.findCurrent(taxon); if (currentSet == null) { return new HashSet<CoexpressionValueObjectExt>(); } ExpressionExperimentSet eeSet = currentSet.getExpressionExperimentSetAnalyzed(); assert !inputEeIds.isEmpty(); return getFilteredCannedAnalysisResults2(eeSet, inputEeIds, genes, stringency, maxResults, queryGenesOnly, skipDetails); } /* * (non-Javadoc) * * @see ubic.gemma.analysis.expression.coexpression.GeneCoexpressionService# coexpressionSearchQuick(java.lang.Long, * java.util.Collection, int, int, boolean, boolean) */ @Override public Collection<CoexpressionValueObjectExt> coexpressionSearchQuick(Long eeSetId, Collection<Gene> queryGenes, int stringency, int maxResults, boolean queryGenesOnly, boolean skipDetails) { ExpressionExperimentSet eeSet = expressionExperimentSetService.load(eeSetId); expressionExperimentSetService.thaw(eeSet); Collection<Long> allEEIdsInSet = EntityUtils.getIds(eeSet.getExperiments()); return coexpressionSearchQuick(allEEIdsInSet, queryGenes, stringency, maxResults, queryGenesOnly, skipDetails); } /** * For a given query gene retrieve it's protein protein interactions. Iterating through those interactions create a * map keyed on the gene association that was retreived for that given gene. E.g. query gene 'AB' has interactions * with 'BB' and 'CC' then create a map using the ids as keys from BB and CC. and the value using the String url for * that interaction * * @param gene The gene to find associations for * @return Map of gene ids and their protein protein interactions */ protected Map<Long, Gene2GeneProteinAssociation> getGene2GeneProteinAssociationForQueryGene(Gene gene) { Map<Long, Gene2GeneProteinAssociation> stringUrlsMappedByGeneID = new HashMap<Long, Gene2GeneProteinAssociation>(); Collection<Gene2GeneProteinAssociation> proteinInteractions = this.gene2GeneProteinAssociationService .findProteinInteractionsForGene(gene); // check if found any interactions if (proteinInteractions != null && !proteinInteractions.isEmpty()) { for (Gene2GeneProteinAssociation proteinInteraction : proteinInteractions) { gene2GeneProteinAssociationService.thaw(proteinInteraction); if (log.isDebugEnabled()) { log.debug("found interaction for gene " + proteinInteraction.getFirstGene() + " and " + proteinInteraction.getSecondGene()); } if (proteinInteraction.getDatabaseEntry() != null && proteinInteraction.getSecondGene().getId() != null && proteinInteraction.getFirstGene().getId() != null) { // can append extra details to link if required this // formating code should be somewhere else? if (proteinInteraction.getFirstGene().getId().equals(gene.getId())) { stringUrlsMappedByGeneID.put(proteinInteraction.getSecondGene().getId(), proteinInteraction); } else { stringUrlsMappedByGeneID.put(proteinInteraction.getFirstGene().getId(), proteinInteraction); } } } } return stringUrlsMappedByGeneID; } /** * @param gene which is to be treated as a "target" * @return map of the transcription factor to the interaction details */ protected Map<Long, TfGeneAssociation> getTfGeneAssociationsforTargetGene(Gene gene) { Map<Long, TfGeneAssociation> associationsMappedByGeneId = new HashMap<Long, TfGeneAssociation>(); Collection<? extends TfGeneAssociation> interactions = this.tfGeneAssociationService.findByTargetGene(gene); if (interactions != null && !interactions.isEmpty()) { for (TfGeneAssociation interaction : interactions) { if (log.isDebugEnabled()) { log.debug("found interaction for gene " + interaction.getFirstGene() + " and " + interaction.getSecondGene()); } associationsMappedByGeneId.put(interaction.getFirstGene().getId(), interaction); } } return associationsMappedByGeneId; } /** * @param gene which is to be treated as a "transcription factor" * @return map of the target genes to the interaction details. */ protected Map<Long, TfGeneAssociation> getTfGeneAssociationsforTf(Gene gene) { Map<Long, TfGeneAssociation> associationsMappedByGeneId = new HashMap<Long, TfGeneAssociation>(); Collection<? extends TfGeneAssociation> interactions = this.tfGeneAssociationService.findByTf(gene); if (interactions != null && !interactions.isEmpty()) { for (TfGeneAssociation interaction : interactions) { if (log.isDebugEnabled()) { log.debug("found interaction for gene " + interaction.getFirstGene() + " and " + interaction.getSecondGene()); } associationsMappedByGeneId.put(interaction.getSecondGene().getId(), interaction); } } return associationsMappedByGeneId; } /** * Convert CoexpressionValueObject into CoexpressionValueObjectExt objects to be passed to the client for display. * This is used for probe-level queries. * * @param queryGene * @param eevos * @param coexp * @param stringency * @param queryGenesOnly * @param geneIds * @param results object we are adding to * @param datasetResults * @param proteinInteractionsForQueryGene map keyed on geneid of string url for protein interaction */ private void addExtCoexpressionValueObjects(Gene queryGene, List<ExpressionExperimentValueObject> eevos, QueryGeneCoexpression coexp, int stringency, boolean queryGenesOnly, Collection<Long> geneIds, Collection<CoexpressionValueObjectExt> results, Collection<CoexpressionDatasetValueObject> datasetResults, Map<Long, Gene2GeneProteinAssociation> proteinInteractionsForQueryGene) { Collection<Long> coexpIds = new HashSet<Long>(); for (CoexpressedGenePairValueObject cvo : coexp.getCoexpressionData(stringency)) { coexpIds.add(cvo.getCoexpressedGeneId()); } Map<Long, GeneValueObject> coexpedGenes = EntityUtils.getIdMap(geneService.loadValueObjects(coexpIds)); for (CoexpressedGenePairValueObject cvo : coexp.getCoexpressionData(stringency)) { if (queryGenesOnly && !geneIds.contains(cvo.getCoexpressedGeneId())) continue; CoexpressionValueObjectExt ecvo = new CoexpressionValueObjectExt(); ecvo.setQueryGene(new GeneValueObject(queryGene)); ecvo.setFoundGene(coexpedGenes.get(cvo.getCoexpressedGeneId())); // FIXME // too // slow, ecvo.setPosSupp(cvo.getPositiveLinkSupport()); ecvo.setNegSupp(cvo.getNegativeLinkSupport()); ecvo.setSupportKey(10 * Math.max(ecvo.getPosSupp(), ecvo.getNegSupp())); // if there are some protein protein interactions for this gene see // if the given coexpressed gene is in the // map of interactions and if so get the value for the URL. if (proteinInteractionsForQueryGene != null && !(proteinInteractionsForQueryGene.isEmpty())) { Gene2GeneProteinAssociation proteinProteinInteraction = proteinInteractionsForQueryGene .get(cvo.getCoexpressedGeneId()); this.addProteinDetailsToValueObject(proteinProteinInteraction, ecvo); } /* * Fill in the support based on 'non-specific' probes. */ if (!cvo.getExpressionExperiments().isEmpty()) { ecvo.setNonSpecPosSupp( getNonSpecificLinkCount(cvo.getEEContributing2PositiveLinks(), cvo.getNonspecificEE())); ecvo.setNonSpecNegSupp( getNonSpecificLinkCount(cvo.getEEContributing2NegativeLinks(), cvo.getNonspecificEE())); } ecvo.setNumTestedIn(cvo.getNumDatasetsTestedIn()); StringBuilder datasetVector = new StringBuilder(); Collection<Long> supportingEEs = new ArrayList<Long>(); for (int i = 0; i < eevos.size(); ++i) { ExpressionExperimentValueObject eevo = eevos.get(i); Long eeid = eevo.getId(); // this information will not be filled in if the cvo was built // in 'batch' mode, but that's not the case, // here. boolean tested = cvo.getDatasetsTestedIn() != null && cvo.getDatasetsTestedIn().contains(eeid); assert cvo.getExpressionExperiments().size() <= cvo.getPositiveLinkSupport() + cvo.getNegativeLinkSupport() : "got " + cvo.getExpressionExperiments().size() + " expected " + (cvo.getPositiveLinkSupport() + cvo.getNegativeLinkSupport()); boolean supported = cvo.getExpressionExperiments().contains(eeid); boolean specific = !cvo.getNonspecificEE().contains(eeid); if (supported) { if (specific) { datasetVector.append("3"); } else { datasetVector.append("2"); } supportingEEs.add(eeid); } else if (tested) { datasetVector.append("1"); } else { datasetVector.append("0"); } } ecvo.setDatasetVector(datasetVector.toString()); ecvo.setSupportingExperiments(supportingEEs); ecvo.setSortKey(); results.add(ecvo); } for (ExpressionExperimentValueObject eevo : eevos) { if (!coexp.getDataSetsQueryGeneTestedIn().contains(eevo.getId())) continue; CoexpressionDatasetValueObject ecdvo = new CoexpressionDatasetValueObject(); ecdvo.setId(eevo.getId()); ecdvo.setQueryGene(queryGene.getOfficialSymbol()); // NOTE should be accurate (probe-level query) but we won't show it. // See bug 1564 FIXME // ecdvo.setProbeSpecificForQueryGene( // coexpEevo.getHasProbeSpecificForQueryGene() ); ecdvo.setArrayDesignCount(eevo.getArrayDesignCount()); ecdvo.setBioAssayCount(eevo.getBioAssayCount()); datasetResults.add(ecdvo); } } /** * Adds the protein protein interaction data to the value object, that is the url link for string the evidence for * that interaction and the confidence score. * * @param proteinProteinInteraction Protein Protein interaction for the coexpression link * @param cvo The value object used to display coexpression data */ private void addProteinDetailsToValueObject(Gene2GeneProteinAssociation proteinProteinInteraction, CoexpressionValueObjectExt cvo) { if (proteinProteinInteraction == null) return; ProteinLinkOutFormatter proteinFormatter = new ProteinLinkOutFormatter(); String proteinProteinIdUrl = proteinFormatter .getStringProteinProteinInteractionLinkGemmaDefault(proteinProteinInteraction.getDatabaseEntry()); String evidenceText = proteinFormatter .getEvidenceDisplayText(proteinProteinInteraction.getEvidenceVector()); String confidenceText = proteinFormatter .getConfidenceScoreAsPercentage(proteinProteinInteraction.getConfidenceScore()); log.debug("A coexpression link in GEMMA has a interaction in STRING " + proteinProteinIdUrl + " evidence of " + evidenceText); cvo.setGene2GeneProteinAssociationStringUrl(proteinProteinIdUrl); cvo.setGene2GeneProteinInteractionConfidenceScore(confidenceText); cvo.setGene2GeneProteinInteractionEvidence(evidenceText); } /** * @param tfGeneAssociation * @param cvo */ private void addTfInteractionToValueObject(TfGeneAssociation tfGeneAssociation, CoexpressionValueObjectExt cvo) { if (tfGeneAssociation == null) return; if (tfGeneAssociation.getFirstGene().getId().equals(cvo.getQueryGene().getId())) { cvo.setQueryRegulatesFound(true); } else if (tfGeneAssociation.getFirstGene().getId().equals(cvo.getFoundGene().getId())) { cvo.setFoundRegulatesQuery(true); } else { throw new IllegalStateException(); } } /** * This is necessary in case there is more than one gene2gene analysis in the system. The common case is when a new * analysis is in progress. Only one analysis should be enabled at any given time. * * @param queryGenes * @return */ private GeneCoexpressionAnalysis findEnabledCoexpressionAnalysis(Collection<Gene> queryGenes) { GeneCoexpressionAnalysis gA = null; Gene g = queryGenes.iterator().next(); // note: we assume they all come from one taxon. Taxon t = g.getTaxon(); Collection<? extends Analysis> analyses = null; // check if the taxon is a species if it is not then it is a parent // taxon and need to get child taxa // coexpression analyses. if (!t.getIsSpecies()) { analyses = geneCoexpressionAnalysisService.findByParentTaxon(t); } else { analyses = geneCoexpressionAnalysisService.findByTaxon(t); } if (analyses.size() == 0) { throw new IllegalStateException( "No gene coexpression analysis is available for " + t.getScientificName()); } else if (analyses.size() == 1) { gA = (GeneCoexpressionAnalysis) analyses.iterator().next(); } else { for (Analysis analysis : analyses) { GeneCoexpressionAnalysis c = (GeneCoexpressionAnalysis) analysis; if (c.getEnabled()) { if (gA == null) { gA = c; } else { throw new IllegalStateException( "System should only have a single gene2gene coexpression analysis enabled per taxon, found more than one for " + t); } } } } return gA; } /** * @param eevos * @param result * @param supportCount * @param supportingExperimentIds * @param queryGene */ private void generateDatasetSummary(List<ExpressionExperimentValueObject> eevos, CoexpressionMetaValueObject result, CountingMap<Long> supportCount, Collection<Long> supportingExperimentIds, Gene queryGene) { /* * generate dataset summary info for this query gene... */ for (ExpressionExperimentValueObject eevo : eevos) { if (!supportingExperimentIds.contains(eevo.getId())) continue; CoexpressionDatasetValueObject ecdvo = new CoexpressionDatasetValueObject(); ecdvo.setId(eevo.getId()); ecdvo.setQueryGene(queryGene.getOfficialSymbol()); ecdvo.setProbeSpecificForQueryGene(true); // we shouldn't display // this. See bug 1564. ecdvo.setArrayDesignCount(eevo.getArrayDesignCount()); ecdvo.setBioAssayCount(eevo.getBioAssayCount()); result.getKnownGeneDatasets().add(ecdvo); } } /** * @param supporting * @param testing * @param specific * @param allIds * @return String representation of binary vector (might as well be a string, as it gets sent to the browser that * way). 0 = not tested; 1 = tested but not supporting; 2 = supporting but not specific; 3 supporting and * specific. */ private String getDatasetVector(Collection<Long> supporting, Collection<Long> testing, Collection<Long> specific, List<Long> allIds) { StringBuilder datasetVector = new StringBuilder(); for (Long id : allIds) { boolean tested = testing.contains(id); boolean supported = supporting.contains(id); boolean s = specific.contains(id); if (supported) { if (s) { datasetVector.append("3"); } else { datasetVector.append("2"); } } else if (tested) { datasetVector.append("1"); } else { datasetVector.append("0"); } } return datasetVector.toString(); } /** * Get coexpression results using a pure gene2gene query (without visiting the probe2probe tables. This is generally * faster, probably even if we're only interested in data from a subset of the experiments. * * @param baseSet * @param eeIds Experiments to limit the results to (must not be null, and should already be security-filtered) * @param queryGenes * @param stringency * @param maxResults * @param queryGenesOnly return links among the query genes only. * @return */ private CoexpressionMetaValueObject getFilteredCannedAnalysisResults(ExpressionExperimentSet baseSet, Collection<Long> eeIds, Collection<Gene> queryGenes, int stringency, int maxResults, boolean queryGenesOnly) { if (queryGenes.isEmpty()) { throw new IllegalArgumentException("No genes in query"); } List<ExpressionExperimentValueObject> eevos = getSortedEEvos(eeIds); if (eevos.isEmpty()) { throw new IllegalArgumentException("There are no usable experiments in the selected set"); } /* * We get this prior to filtering so it matches the vectors stored with the analysis. */ expressionExperimentSetService.thaw(baseSet); List<Long> positionToIDMap = Gene2GenePopulationServiceImpl .getPositionToIdMap(EntityUtils.getIds(baseSet.getExperiments())); /* * This set of links must be filtered to include those in the data sets being analyzed. */ Map<Long, Collection<Gene2GeneCoexpression>> gg2gs = getRawCoexpression(queryGenes, stringency, maxResults, queryGenesOnly); List<Long> filteredEeIds = (List<Long>) EntityUtils.getIds(eevos); CoexpressionMetaValueObject result = initValueObject(queryGenes, eevos, true); List<CoexpressionValueObjectExt> ecvos = new ArrayList<CoexpressionValueObjectExt>(); Collection<Gene2GeneCoexpression> seen = new HashSet<Gene2GeneCoexpression>(); // queryGenes = geneService.thawLite( gg2gs.keySet() ); // populate the value objects. StopWatch timer = new StopWatch(); Collection<Gene> allUsedGenes = new HashSet<Gene>(); for (Gene queryGene : queryGenes) { timer.start(); if (!queryGene.getTaxon().equals(baseSet.getTaxon())) { throw new IllegalArgumentException( "Mismatch between taxon for expression experiment set selected and gene queries"); } allUsedGenes.add(queryGene); /* * For summary statistics */ CountingMap<Long> supportCount = new CountingMap<Long>(); Collection<Long> allSupportingDatasets = new HashSet<Long>(); Collection<Long> allDatasetsWithSpecificProbes = new HashSet<Long>(); Collection<Long> allTestedDataSets = new HashSet<Long>(); int linksMetPositiveStringency = 0; int linksMetNegativeStringency = 0; Collection<Gene2GeneCoexpression> g2gs = gg2gs.get(queryGene.getId()); assert g2gs != null; List<Long> relevantEEIdList = getRelevantEEidsForBitVector(positionToIDMap, g2gs); relevantEEIdList.retainAll(filteredEeIds); GeneValueObject queryGeneValueObject = new GeneValueObject(queryGene); HashMap<Gene, Collection<Gene2GeneCoexpression>> foundGenes = new HashMap<Gene, Collection<Gene2GeneCoexpression>>(); // for queryGene get the interactions Map<Long, Gene2GeneProteinAssociation> proteinInteractionMap = this .getGene2GeneProteinAssociationForQueryGene(queryGene); Map<Long, TfGeneAssociation> regulatedBy = this.getTfGeneAssociationsforTargetGene(queryGene); Map<Long, TfGeneAssociation> regulates = this.getTfGeneAssociationsforTf(queryGene); if (timer.getTime() > 100) { log.info("Postprocess " + queryGene.getOfficialSymbol() + " Phase I: " + timer.getTime() + "ms"); } timer.stop(); timer.reset(); timer.start(); for (Gene2GeneCoexpression g2g : g2gs) { StopWatch timer2 = new StopWatch(); timer2.start(); Gene foundGene = g2g.getFirstGene().equals(queryGene) ? g2g.getSecondGene() : g2g.getFirstGene(); allUsedGenes.add(foundGene); // FIXME Symptom fix for duplicate found genes // Keep track of the found genes that we can correctly identify // duplicates. // All keep the g2g object for debugging purposes. if (foundGenes.containsKey(foundGene)) { foundGenes.get(foundGene).add(g2g); log.warn("Duplicate gene found in coexpression results, skipping: " + foundGene + " From analysis: " + g2g.getSourceAnalysis().getId()); continue; // Found a duplicate gene, don't add to results // just our debugging list } foundGenes.put(foundGene, new ArrayList<Gene2GeneCoexpression>()); foundGenes.get(foundGene).add(g2g); CoexpressionValueObjectExt cvo = new CoexpressionValueObjectExt(); /* * This Thaw is a big time sink and _should not_ be necessary. */ // foundGene = geneService.thawLite( foundGene ); // db hit cvo.setQueryGene(queryGeneValueObject); cvo.setFoundGene(new GeneValueObject(foundGene)); if (timer2.getTime() > 10) log.info("Coexp. Gene processing phase I:" + timer2.getTime() + "ms"); timer2.stop(); timer2.reset(); timer2.start(); populateInteractions(proteinInteractionMap, regulatedBy, regulates, foundGene, cvo); Collection<Long> testingDatasets = Gene2GenePopulationServiceImpl.getTestedExperimentIds(g2g, positionToIDMap); testingDatasets.retainAll(filteredEeIds); /* * necesssary in case any were filtered out (for example, if this is a virtual analysis; or there were * 'troubled' ees. Note that 'supporting' includes 'non-specific' if they were recorded by the analyzer. */ Collection<Long> supportingDatasets = Gene2GenePopulationServiceImpl.getSupportingExperimentIds(g2g, positionToIDMap); // necessary in case any were filtered out. supportingDatasets.retainAll(filteredEeIds); cvo.setSupportingExperiments(supportingDatasets); Collection<Long> specificDatasets = Gene2GenePopulationServiceImpl.getSpecificExperimentIds(g2g, positionToIDMap); /* * Specific probe EEids contains 1 even if the data set wasn't supporting. */ specificDatasets.retainAll(supportingDatasets); int numTestingDatasets = testingDatasets.size(); int numSupportingDatasets = supportingDatasets.size(); /* * SANITY CHECKS */ assert specificDatasets.size() <= numSupportingDatasets; assert numTestingDatasets >= numSupportingDatasets; assert numTestingDatasets <= eevos.size(); cvo.setDatasetVector( getDatasetVector(supportingDatasets, testingDatasets, specificDatasets, relevantEEIdList)); /* * This check is necessary in case any data sets were filtered out. (i.e., we're not interested in the * full set of data sets that were used in the original analysis. */ if (numSupportingDatasets < stringency) { continue; } allTestedDataSets.addAll(testingDatasets); int supportFromSpecificProbes = specificDatasets.size(); if (g2g.getEffect() < 0) { cvo.setPosSupp(0); cvo.setNegSupp(numSupportingDatasets); if (numSupportingDatasets != supportFromSpecificProbes) cvo.setNonSpecNegSupp(numSupportingDatasets - supportFromSpecificProbes); ++linksMetNegativeStringency; } else { cvo.setPosSupp(numSupportingDatasets); if (numSupportingDatasets != supportFromSpecificProbes) cvo.setNonSpecPosSupp(numSupportingDatasets - supportFromSpecificProbes); cvo.setNegSupp(0); ++linksMetPositiveStringency; } cvo.setSupportKey(Math.max(cvo.getPosSupp(), cvo.getNegSupp())); cvo.setNumTestedIn(numTestingDatasets); for (Long id : supportingDatasets) { supportCount.increment(id); } cvo.setSortKey(); /* * This check prevents links from being shown twice when we do "among query genes". We don't skip * entirely so we get the counts for the summary table populated correctly. */ if (!seen.contains(g2g)) { ecvos.add(cvo); } seen.add(g2g); allSupportingDatasets.addAll(supportingDatasets); allDatasetsWithSpecificProbes.addAll(specificDatasets); } Collection<Long> geneIds = new ArrayList<Long>(); for (Gene g : allUsedGenes) { geneIds.add(g.getId()); } populateNodeDegree(ecvos, geneIds, allTestedDataSets); if (timer.getTime() > 1000) { log.info("Postprocess " + g2gs.size() + " results for " + queryGene.getOfficialSymbol() + "Phase II: " + timer.getTime() + "ms"); } timer.stop(); timer.reset(); timer.start(); // This is only necessary for debugging purposes. Helps us keep // track of duplicate genes found above. if (log.isDebugEnabled()) { for (Gene foundGene : foundGenes.keySet()) { if (foundGenes.get(foundGene).size() > 1) { log.debug("** DUPLICATE: " + foundGene.getOfficialSymbol() + " found multiple times. Gene2Genes objects are: "); for (Gene2GeneCoexpression g1g : foundGenes.get(foundGene)) { log.debug(" ============ Gene2Gene Id: " + g1g.getId() + " 1st gene: " + g1g.getFirstGene().getOfficialSymbol() + " 2nd gene: " + g1g.getSecondGene().getOfficialSymbol() + " Source Analysis: " + g1g.getSourceAnalysis().getId() + " # of dataSets: " + g1g.getNumDataSets()); } } } } CoexpressionSummaryValueObject summary = makeSummary(eevos, allTestedDataSets, allDatasetsWithSpecificProbes, linksMetPositiveStringency, linksMetNegativeStringency); result.getSummary().put(queryGene.getOfficialSymbol(), summary); generateDatasetSummary(eevos, result, supportCount, allSupportingDatasets, queryGene); /* * FIXME I'm lazy and rushed, so I'm using an existing field for this info; probably better to add another * field to the value object... */ for (ExpressionExperimentValueObject eevo : eevos) { eevo.setExternalUri(AnchorTagUtil.getExpressionExperimentUrl(eevo.getId())); } Collections.sort(ecvos); getGoOverlap(ecvos, queryGene); timer.stop(); if (timer.getTime() > 1000) { log.info("Postprocess " + g2gs.size() + " results for " + queryGene.getOfficialSymbol() + " PhaseIII: " + timer.getTime() + "ms"); } timer.reset(); } // Over results. result.getKnownGeneResults().addAll(ecvos); return result; } /** * @param baseSet * @param eeIds * @param queryGenes * @param stringency * @param maxResults * @param queryGenesOnly * @return */ private Collection<CoexpressionValueObjectExt> getFilteredCannedAnalysisResults2( ExpressionExperimentSet baseSet, Collection<Long> eeIds, Collection<Gene> queryGenes, int stringency, int maxResults, boolean queryGenesOnly, boolean skipDetails) { if (queryGenes.isEmpty()) { throw new IllegalArgumentException("No genes in query"); } List<ExpressionExperimentValueObject> eevos = null; List<Long> filteredEeIds = null; eevos = getSortedEEvos(eeIds); if (eevos.isEmpty()) { throw new IllegalArgumentException("There are no usable experiments in the selected set"); } filteredEeIds = (List<Long>) EntityUtils.getIds(eevos); /* * We get this prior to filtering so it matches the vectors stored with the analysis. */ expressionExperimentSetService.thaw(baseSet); List<Long> positionToIDMap = Gene2GenePopulationServiceImpl .getPositionToIdMap(EntityUtils.getIds(baseSet.getExperiments())); /* * This set of links must be filtered to include those in the data sets being analyzed. */ Map<Long, Collection<Gene2GeneCoexpression>> gg2gs = getRawCoexpression(queryGenes, stringency, maxResults, queryGenesOnly); List<CoexpressionValueObjectExt> ecvos = new ArrayList<CoexpressionValueObjectExt>(); Collection<Long> seenGene2Gene = new HashSet<Long>(); Collection<Long> queryGeneIds = gg2gs.keySet(); // return empty collection if no coexpression results if (queryGeneIds.isEmpty()) { return ecvos; } Collection<Long> gidsNeeded = new HashSet<Long>(); StopWatch timerGeneLoad = new StopWatch(); for (Long gid : queryGeneIds) { Element e = this.getGeneLightWeightCache().getCache().get(gid); if (e == null) { gidsNeeded.add(gid); } } if (!gidsNeeded.isEmpty()) { Collection<Gene> justLoadedGenes = geneService.loadThawedLiter(gidsNeeded); for (Gene g : justLoadedGenes) { this.getGeneLightWeightCache().getCache().put(new Element(g.getId(), g)); } } if (timerGeneLoad.getTime() > 100) { log.info("Loading and caching query genes took " + timerGeneLoad.getTime() + "ms"); } timerGeneLoad.reset(); timerGeneLoad.start(); gidsNeeded = new HashSet<Long>(); // load all genes first for (Long queryGid : queryGeneIds) { Collection<Gene2GeneCoexpression> g2gs = gg2gs.get(queryGid); for (Gene2GeneCoexpression g2g : g2gs) { Gene foundGene = g2g.getFirstGene().getId().equals(queryGid) ? g2g.getSecondGene() : g2g.getFirstGene(); if (this.getGeneLightWeightCache().getCache().get(foundGene.getId()) == null) { gidsNeeded.add(foundGene.getId()); } } } // Put all needed genes in Cache to that they are guaranteed to be there // for this method call if (!gidsNeeded.isEmpty()) { Collection<Gene> forCache = geneService.loadThawedLiter(gidsNeeded); for (Gene g : forCache) { this.getGeneLightWeightCache().getCache().put(new Element(g.getId(), g)); } } if (timerGeneLoad.getTime() > 100) { log.info("Loading and caching found genes took " + timerGeneLoad.getTime() + "ms"); } StopWatch timer = new StopWatch(); Collection<Long> allUsedGenes = new HashSet<Long>(); for (Long queryGid : queryGeneIds) { timer.reset(); timer.start(); Gene qGene = (Gene) this.getGeneLightWeightCache().getCache().get(queryGid).getValue(); if (!qGene.getTaxon().equals(baseSet.getTaxon())) { throw new IllegalArgumentException( "Mismatch between taxon for expression experiment set selected and gene queries"); } allUsedGenes.add(queryGid); /* * For summary statistics */ CountingMap<Long> supportCount = new CountingMap<Long>(); Collection<Long> allDatasetsWithSpecificProbes = new HashSet<Long>(); Collection<Long> allTestedDataSets = new HashSet<Long>(); Collection<Gene2GeneCoexpression> g2gs = gg2gs.get(queryGid); assert g2gs != null; List<Long> relevantEEIdList = null; if (!skipDetails) { relevantEEIdList = getRelevantEEidsForBitVector(positionToIDMap, g2gs); relevantEEIdList.retainAll(filteredEeIds); } GeneValueObject queryGeneValueObject = new GeneValueObject(qGene); Map<Long, Collection<Gene2GeneCoexpression>> foundGenes = new HashMap<Long, Collection<Gene2GeneCoexpression>>(); if (timer.getTime() > 100) { log.info("Postprocess " + qGene.getOfficialSymbol() + " Phase I: " + timer.getTime() + "ms"); } timer.stop(); timer.reset(); timer.start(); for (Gene2GeneCoexpression g2g : g2gs) { Gene foundGene = g2g.getFirstGene().getId().equals(queryGid) ? g2g.getSecondGene() : g2g.getFirstGene(); allUsedGenes.add(foundGene.getId()); // use this flag to test for a duplicate link with opposite // stringency support (positive/negative) boolean testForDuplicateFlag = false; // duplicate found genes can occur when there is both positive // and negative support for the same link // Keep track of the found genes that we can correctly identify // duplicates. // All keep the g2g object for debugging purposes. if (foundGenes.containsKey(foundGene.getId())) { testForDuplicateFlag = true; } else { foundGenes.put(foundGene.getId(), new ArrayList<Gene2GeneCoexpression>()); } foundGenes.get(foundGene.getId()).add(g2g); CoexpressionValueObjectExt cvo = new CoexpressionValueObjectExt(); Long idToGet = foundGene.getId(); foundGene = (Gene) this.getGeneLightWeightCache().getCache().get(idToGet).getValue(); if (foundGene == null) { log.error("Gene id:" + idToGet + " Not in the GeneLightWeightCache, something is wrong"); continue; } cvo.setQueryGene(queryGeneValueObject); cvo.setFoundGene(new GeneValueObject(foundGene)); List<Long> supportingDatasets = Gene2GenePopulationServiceImpl.getSupportingExperimentIds(g2g, positionToIDMap); // necessary in case any were filtered out. supportingDatasets.retainAll(filteredEeIds); cvo.setSupportingExperiments(supportingDatasets); List<Long> testingDatasets; List<Long> specificDatasets; if (!skipDetails) { testingDatasets = Gene2GenePopulationServiceImpl.getTestedExperimentIds(g2g, positionToIDMap); testingDatasets.retainAll(filteredEeIds); /* * necesssary in case any were filtered out (for example, if this is a virtual analysis; or there * were 'troubled' ees. Note that 'supporting' includes 'non-specific' if they were recorded by the * analyzer. */ specificDatasets = Gene2GenePopulationServiceImpl.getSpecificExperimentIds(g2g, positionToIDMap); /* * Specific probe EEids contains 1 even if the data set wasn't supporting. */ specificDatasets.retainAll(supportingDatasets); int numTestingDatasets = testingDatasets.size(); int numSupportingDatasets = supportingDatasets.size(); /* * SANITY CHECKS */ assert specificDatasets.size() <= numSupportingDatasets; assert numTestingDatasets >= numSupportingDatasets; assert numTestingDatasets <= eevos.size(); cvo.setDatasetVector(getDatasetVector(supportingDatasets, testingDatasets, specificDatasets, relevantEEIdList)); if (testForDuplicateFlag) { testAndModifyDuplicateResultForOppositeStringency(ecvos, qGene, foundGene, g2g.getEffect(), numSupportingDatasets, specificDatasets.size(), numTestingDatasets); continue; } else if (numSupportingDatasets < stringency) {// check in case any data sets were filtered // out.(i.e., we're not // interested in the full set of data sets that // were used in the // original analysis. continue; } allTestedDataSets.addAll(testingDatasets); int supportFromSpecificProbes = specificDatasets.size(); if (g2g.getEffect() < 0) { cvo.setPosSupp(0); cvo.setNegSupp(numSupportingDatasets); if (numSupportingDatasets != supportFromSpecificProbes) cvo.setNonSpecNegSupp(numSupportingDatasets - supportFromSpecificProbes); } else { cvo.setPosSupp(numSupportingDatasets); if (numSupportingDatasets != supportFromSpecificProbes) cvo.setNonSpecPosSupp(numSupportingDatasets - supportFromSpecificProbes); cvo.setNegSupp(0); } cvo.setSupportKey(Math.max(cvo.getPosSupp(), cvo.getNegSupp())); cvo.setNumTestedIn(numTestingDatasets); for (Long id : supportingDatasets) { supportCount.increment(id); } allDatasetsWithSpecificProbes.addAll(specificDatasets); } else { int numSupportingDatasets = supportingDatasets.size(); if (testForDuplicateFlag) { testAndModifyDuplicateResultForOppositeStringency(ecvos, qGene, foundGene, g2g.getEffect(), numSupportingDatasets, null, null); continue; } else if (numSupportingDatasets < stringency) { continue; } if (g2g.getEffect() < 0) { cvo.setPosSupp(0); cvo.setNegSupp(numSupportingDatasets); } else { cvo.setPosSupp(numSupportingDatasets); cvo.setNegSupp(0); } cvo.setSupportKey(Math.max(cvo.getPosSupp(), cvo.getNegSupp())); } cvo.setSortKey(); /* * This check prevents links from being shown twice when we do "among query genes". We don't skip * entirely so we get the counts for the summary table populated correctly. */ if (!seenGene2Gene.contains(g2g.getId())) { ecvos.add(cvo); } seenGene2Gene.add(g2g.getId()); } if (timer.getTime() > 300) { log.info("Postprocess " + g2gs.size() + " results for " + qGene.getOfficialSymbol() + " Phase II: " + timer.getTime() + "ms"); } timer.stop(); Collections.sort(ecvos); } // Over querygenes populateNodeDegree(ecvos, allUsedGenes, filteredEeIds); return ecvos; } /** * @param ecvos (sorted) * @param queryGene */ private void getGoOverlap(List<CoexpressionValueObjectExt> ecvos, Gene queryGene) { if (!geneOntologyService.isGeneOntologyLoaded()) { return; } /* * get GO overlap info for this query gene... */ StopWatch timer = new StopWatch(); timer.start(); int numQueryGeneGoTerms = geneOntologyService.getGOTerms(queryGene).size(); Collection<Long> overlapIds = new HashSet<Long>(); for (CoexpressionValueObjectExt ecvo : ecvos) { overlapIds.add(ecvo.getFoundGene().getId()); if (overlapIds.size() >= NUM_GENES_TO_DETAIL) break; } Map<Long, Collection<OntologyTerm>> goOverlap = geneOntologyService.calculateGoTermOverlap(queryGene, overlapIds); int i = 0; for (CoexpressionValueObjectExt ecvo : ecvos) { ecvo.setMaxGoSim(numQueryGeneGoTerms); Collection<OntologyTerm> overlap = goOverlap.get(ecvo.getFoundGene().getId()); ecvo.setGoSim(overlap == null ? null : overlap.size()); if (++i >= NUM_GENES_TO_DETAIL) break; } if (timer.getTime() > 1000) { log.info("GO stats for " + queryGene.getName() + " + " + overlapIds.size() + " coexpressed genes :" + timer.getTime() + "ms"); } } /** * @param contributingEEs * @param nonSpecificEEs * @return */ private int getNonSpecificLinkCount(Collection<Long> contributingEEs, Collection<Long> nonSpecificEEs) { int n = 0; for (Long id : contributingEEs) { if (nonSpecificEEs.contains(id)) ++n; } return n; } /** * Retrieve all gene2gene coexpression information for the genes at the specified stringency, using methods that * don't filter by experiment. * * @param queryGenes * @param stringency * @param maxResults * @param queryGenesOnly * @return */ private Map<Long, Collection<Gene2GeneCoexpression>> getRawCoexpression(Collection<Gene> queryGenes, int stringency, int maxResults, boolean queryGenesOnly) { Map<Long, Collection<Gene2GeneCoexpression>> gg2gs = new HashMap<Long, Collection<Gene2GeneCoexpression>>(); if (queryGenes.size() == 0) { return gg2gs; } StopWatch timer = new StopWatch(); timer.start(); GeneCoexpressionAnalysis gA = findEnabledCoexpressionAnalysis(queryGenes); timer.stop(); if (timer.getTime() > 100) { log.info("Get analysis: " + timer.getTime() + "ms"); } timer.reset(); timer.start(); if (queryGenesOnly) { if (queryGenes.size() < 2) { throw new IllegalArgumentException("Must have at least two genes to do 'my genes only'"); } gg2gs = gene2GeneCoexpressionService.findInterCoexpressionRelationship(queryGenes, stringency, gA); } else { gg2gs = gene2GeneCoexpressionService.findCoexpressionRelationships(queryGenes, stringency, maxResults, gA); } if (timer.getTime() > 1000) { log.info("Get raw coexpression: " + timer.getTime() + "ms"); } return gg2gs; } /** * @param positionToIDMap * @param g2gs * @return */ private List<Long> getRelevantEEidsForBitVector(List<Long> positionToIDMap, Collection<Gene2GeneCoexpression> g2gs) { Collection<Long> relevantEEIds = new HashSet<Long>(); List<Long> relevantEEIdList = new ArrayList<Long>(); for (Gene2GeneCoexpression g2g : g2gs) { relevantEEIds.addAll(Gene2GenePopulationServiceImpl.getTestedExperimentIds(g2g, positionToIDMap)); } relevantEEIdList.addAll(relevantEEIds); Collections.sort(relevantEEIdList); return relevantEEIdList; } /** * @param eeIds * @return */ private List<ExpressionExperimentValueObject> getSortedEEvos(Collection<Long> eeIds) { // expressionExperimentService.loadValueObjects is secured List<ExpressionExperimentValueObject> securityFilteredEevos = new ArrayList<ExpressionExperimentValueObject>( expressionExperimentService.loadValueObjects(eeIds, false)); List<ExpressionExperimentValueObject> eevos = new ArrayList<ExpressionExperimentValueObject>(); StopWatch timerFilterTroubled = new StopWatch(); timerFilterTroubled.start(); // only keep untroubled experiments for (ExpressionExperimentValueObject eevo : securityFilteredEevos) { if (!eevo.getTroubled()) { eevos.add(eevo); } } if (timerFilterTroubled.getTime() > 100) { log.info("Filtering troubled eevos took " + timerFilterTroubled.getTime() + "ms"); } Collections.sort(eevos, new Comparator<ExpressionExperimentValueObject>() { @Override public int compare(ExpressionExperimentValueObject eevo1, ExpressionExperimentValueObject eevo2) { return eevo1.getId().compareTo(eevo2.getId()); } }); return eevos; } /** * @param genes * @param eevos * @param isCanned * @return */ private CoexpressionMetaValueObject initValueObject(Collection<Gene> genes, List<ExpressionExperimentValueObject> eevos, boolean isCanned) { CoexpressionMetaValueObject result = new CoexpressionMetaValueObject(); result.setQueryGenes(new ArrayList<GeneValueObject>(GeneValueObject.convert2ValueObjects(genes))); result.setDatasets(eevos); result.setKnownGeneDatasets(new ArrayList<CoexpressionDatasetValueObject>()); result.setKnownGeneResults(new ArrayList<CoexpressionValueObjectExt>()); result.setPredictedGeneDatasets(new ArrayList<CoexpressionDatasetValueObject>()); result.setPredictedGeneResults(new ArrayList<CoexpressionValueObjectExt>()); result.setProbeAlignedRegionDatasets(new ArrayList<CoexpressionDatasetValueObject>()); result.setProbeAlignedRegionResults(new ArrayList<CoexpressionValueObjectExt>()); result.setSummary(new HashMap<String, CoexpressionSummaryValueObject>()); return result; } /** * @param eevos * @param datasetsTested * @param datasetsWithSpecificProbes * @param linksMetPositiveStringency * @param linksMetNegativeStringency * @return */ private CoexpressionSummaryValueObject makeSummary(List<ExpressionExperimentValueObject> eevos, Collection<Long> datasetsTested, Collection<Long> datasetsWithSpecificProbes, int linksMetPositiveStringency, int linksMetNegativeStringency) { CoexpressionSummaryValueObject summary = new CoexpressionSummaryValueObject(); summary.setDatasetsAvailable(eevos.size()); summary.setDatasetsTested(datasetsTested.size()); summary.setDatasetsWithSpecificProbes(datasetsWithSpecificProbes.size()); summary.setLinksFound(linksMetPositiveStringency + linksMetNegativeStringency); summary.setLinksMetPositiveStringency(linksMetPositiveStringency); summary.setLinksMetNegativeStringency(linksMetNegativeStringency); return summary; } /** * @param proteinInteractionMap * @param regulatedBy * @param regulates * @param foundGene * @param cvo */ private void populateInteractions(Map<Long, Gene2GeneProteinAssociation> proteinInteractionMap, Map<Long, TfGeneAssociation> regulatedBy, Map<Long, TfGeneAssociation> regulates, Gene foundGene, CoexpressionValueObjectExt cvo) { StopWatch timer = new StopWatch(); timer.start(); // set the interaction if none null will be put if (proteinInteractionMap != null && !(proteinInteractionMap.isEmpty())) { Gene2GeneProteinAssociation association = proteinInteractionMap.get(foundGene.getId()); if (association != null) this.addProteinDetailsToValueObject(association, cvo); } if (regulatedBy != null && !regulatedBy.isEmpty()) { TfGeneAssociation tfGeneAssociation = regulatedBy.get(foundGene.getId()); if (tfGeneAssociation != null) this.addTfInteractionToValueObject(tfGeneAssociation, cvo); } if (regulates != null && !regulates.isEmpty()) { TfGeneAssociation tfGeneAssociation = regulates.get(foundGene.getId()); if (tfGeneAssociation != null) this.addTfInteractionToValueObject(tfGeneAssociation, cvo); } if (timer.getTime() > 10) log.info("Iteraction population:" + timer.getTime() + "ms"); } /** * @param ecvos * @param allUsedGenes * @param ees */ private void populateNodeDegree(List<CoexpressionValueObjectExt> ecvos, Collection<Long> allUsedGenes, Collection<Long> ees) { StopWatch timer = new StopWatch(); timer.start(); Map<Long, GeneCoexpressionNodeDegree> geneNodeDegrees = geneService .getGeneIdCoexpressionNodeDegree(allUsedGenes); for (CoexpressionValueObjectExt coexp : ecvos) { GeneCoexpressionNodeDegree queryGeneNodeDegree = geneNodeDegrees.get(coexp.getQueryGene().getId()); if (queryGeneNodeDegree == null) { coexp.setQueryGeneNodeDegree(-1.0); } else { coexp.setQueryGeneNodeDegree(queryGeneNodeDegree.getRankNumLinks()); } GeneCoexpressionNodeDegree foundGeneNodeDegree = geneNodeDegrees.get(coexp.getFoundGene().getId()); if (foundGeneNodeDegree == null) { coexp.setFoundGeneNodeDegree(-1.0); } else { coexp.setFoundGeneNodeDegree(foundGeneNodeDegree.getRankNumLinks()); } } /* * Old, slower way */ // if ( !ees.isEmpty() ) { // Map<Gene, Double> geneNodeDegrees = // geneService.getGeneCoexpressionNodeDegree( allUsedGenes, // expressionExperimentService.loadMultiple( ees ) ); // Map<Long, Gene> idMap = EntityUtils.getIdMap( // geneNodeDegrees.keySet() ); // for ( CoexpressionValueObjectExt coexp : ecvos ) { // coexp.setQueryGeneNodeDegree( geneNodeDegrees.get( idMap.get( // coexp.getQueryGene().getId() ) ) ); // coexp.setFoundGeneNodeDegree( geneNodeDegrees.get( idMap.get( // coexp.getFoundGene().getId() ) ) ); // } // } else { // for ( CoexpressionValueObjectExt coexp : ecvos ) { // coexp.setQueryGeneNodeDegree( 0d ); // coexp.setFoundGeneNodeDegree( 0d ); // } // } if (timer.getTime() > 10) log.info("Node degree population:" + timer.getTime() + "ms"); } /** * Sometimes a coexpression link can have both negative and positive support. When this happens two results are * returned and need to be merged. If there is a duplicate(same coexpression link between two genes seen again) * result, this method merges the duplicate result with the result in the List<CoexpressionValueObjectExt> ecvos * passed in */ private boolean testAndModifyDuplicateResultForOppositeStringency(List<CoexpressionValueObjectExt> ecvos, Gene queryGene, Gene foundGene, Double effect, Integer numSupportingDatasets, Integer supportFromSpecificProbes, Integer numTestedIn) { for (CoexpressionValueObjectExt ecvo : ecvos) { if (ecvo.getFoundGene().getId().equals(foundGene.getId()) && ecvo.getQueryGene().getId().equals(queryGene.getId())) { if (ecvo.getNegSupp() > 0 && effect > 0) { ecvo.setPosSupp(numSupportingDatasets); if (supportFromSpecificProbes != null && numSupportingDatasets != supportFromSpecificProbes) { ecvo.setNonSpecPosSupp(numSupportingDatasets - supportFromSpecificProbes); } // this may not be necessary, putting in just in case of a // difference if (numTestedIn != null) { ecvo.setNumTestedIn(Math.max(ecvo.getNumTestedIn(), numTestedIn)); } ecvo.setSupportKey(Math.max(ecvo.getPosSupp(), ecvo.getNegSupp())); return true; } else if (ecvo.getPosSupp() > 0 && effect < 0) { ecvo.setNegSupp(numSupportingDatasets); if (supportFromSpecificProbes != null && numSupportingDatasets != supportFromSpecificProbes) { ecvo.setNonSpecNegSupp(numSupportingDatasets - supportFromSpecificProbes); } // this may not be necessary, putting in just in case of a // difference if (numTestedIn != null) { ecvo.setNumTestedIn(Math.max(ecvo.getNumTestedIn(), numTestedIn)); } ecvo.setSupportKey(Math.max(ecvo.getPosSupp(), ecvo.getNegSupp())); return true; } } } return false; } }