ubic.gemma.core.analysis.expression.coexpression.links.LinkAnalysisServiceTest.java Source code

Java tutorial

Introduction

Here is the source code for ubic.gemma.core.analysis.expression.coexpression.links.LinkAnalysisServiceTest.java

Source

/*
 * The Gemma project
 *
 * Copyright (c) 2010 University of British Columbia
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package ubic.gemma.core.analysis.expression.coexpression.links;

import org.apache.commons.dbcp.BasicDataSource;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.jdbc.core.RowCallbackHandler;
import ubic.gemma.core.analysis.expression.coexpression.links.LinkAnalysisConfig.SingularThreshold;
import ubic.gemma.core.analysis.preprocess.filter.FilterConfig;
import ubic.gemma.core.genome.gene.service.GeneService;
import ubic.gemma.core.util.test.BaseSpringContextTest;
import ubic.gemma.model.analysis.expression.coexpression.CoexpressionAnalysis;
import ubic.gemma.model.association.coexpression.GeneCoexpressionNodeDegreeValueObject;
import ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector;
import ubic.gemma.model.expression.designElement.CompositeSequence;
import ubic.gemma.model.expression.experiment.BioAssaySet;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.model.genome.Gene;
import ubic.gemma.model.genome.Taxon;
import ubic.gemma.persistence.service.TableMaintenanceUtil;
import ubic.gemma.persistence.service.association.coexpression.CoexpressionCache;
import ubic.gemma.persistence.service.association.coexpression.CoexpressionService;
import ubic.gemma.persistence.service.association.coexpression.CoexpressionValueObject;
import ubic.gemma.persistence.service.expression.bioAssayData.ProcessedExpressionDataVectorService;
import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService;
import ubic.gemma.persistence.util.EntityUtils;

import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.*;

import static org.junit.Assert.*;

/**
 * @author paul
 */
public class LinkAnalysisServiceTest extends BaseSpringContextTest {

    private final FilterConfig filterConfig = new FilterConfig();
    private final LinkAnalysisConfig linkAnalysisConfig = new LinkAnalysisConfig();

    @Autowired
    private BasicDataSource dataSource;

    private ExpressionExperiment ee;

    @Autowired
    private ExpressionExperimentService eeService;

    @Autowired
    private CoexpressionCache gene2GeneCoexpressionCache;

    @Autowired
    private CoexpressionService geneCoexpressionService;

    @Autowired
    private GeneService geneService;

    @Autowired
    private LinkAnalysisPersister linkAnalysisPersisterService;

    @Autowired
    private LinkAnalysisService linkAnalysisService;

    @Autowired
    private ProcessedExpressionDataVectorService processedExpressionDataVectorService;

    @Autowired
    private TableMaintenanceUtil tableMaintenanceUtil;

    @Before
    public void setup() {
        super.setTestCollectionSize(100);
        gene2GeneCoexpressionCache.shutdown();
    }

    @After
    public void tearDown() {
        super.resetTestCollectionSize();
    }

    @Test
    public void testLoadAnalyzeSaveAndCoexpSearch() {
        ee = this.getTestPersistentCompleteExpressionExperimentWithSequences();
        processedExpressionDataVectorService.computeProcessedExpressionData(ee);

        tableMaintenanceUtil.disableEmail();
        tableMaintenanceUtil.updateGene2CsEntries();
        linkAnalysisConfig.setCdfCut(0.1);
        linkAnalysisConfig.setSingularThreshold(SingularThreshold.cdfcut);
        linkAnalysisConfig.setProbeDegreeThreshold(25);
        linkAnalysisConfig.setCheckCorrelationDistribution(false);
        linkAnalysisConfig.setCheckForBatchEffect(false);
        filterConfig.setIgnoreMinimumSampleThreshold(true);

        // first time.
        //noinspection UnusedAssignment // we still want to do this for the testing sake
        LinkAnalysis la = linkAnalysisService.process(ee, filterConfig, linkAnalysisConfig);

        // test remove is clean; to check this properly requires checking the db.
        linkAnalysisPersisterService.deleteAnalyses(ee);

        this.checkUnsupportedLinksHaveNoSupport();
        assertEquals(0, geneCoexpressionService.getCoexpression(ee, true).size());

        la = linkAnalysisService.process(ee, filterConfig, linkAnalysisConfig);

        CoexpressionAnalysis analysisObj = la.getAnalysisObj();
        assertEquals(151, analysisObj.getNumberOfElementsAnalyzed().intValue());
        assertTrue(analysisObj.getNumberOfLinks() > 0);

        assertNotNull(analysisObj.getCoexpCorrelationDistribution());

        Collection<BioAssaySet> ees = new HashSet<>();
        ees.add(ee);

        this.updateNodeDegree();
        int totalLinksFirstPass = this.checkResults(ees, 1);

        // should be ~1140.
        assertTrue(totalLinksFirstPass > 1000);

        // test redo
        linkAnalysisService.process(ee, filterConfig, linkAnalysisConfig);
        this.updateNodeDegree();

        int totalLinksRedo = this.checkResults(ees, 1);
        assertEquals(totalLinksFirstPass, totalLinksRedo);

        // now add another experiment that has overlapping links (same data...
        Map<CompositeSequence, byte[]> dataMap = new HashMap<>();
        ee = eeService.thaw(ee);
        for (RawExpressionDataVector v : ee.getRawExpressionDataVectors()) {
            dataMap.put(v.getDesignElement(), v.getData());
        }

        ExpressionExperiment ee2 = this.getTestPersistentCompleteExpressionExperimentWithSequences(ee);
        //eeService.thawRawAndProcessed( ee2 );
        for (RawExpressionDataVector v : ee2.getRawExpressionDataVectors()) {
            assert dataMap.get(v.getDesignElement()) != null;
            v.setData(dataMap.get(v.getDesignElement()));
        }

        eeService.update(ee2);

        processedExpressionDataVectorService.computeProcessedExpressionData(ee2);
        linkAnalysisService.process(ee2, filterConfig, linkAnalysisConfig);

        this.updateNodeDegree();

        // expect to get at least one links with support >1
        ees.add(ee2);
        this.checkResults(ees, 2);

    }

    private void checkUnsupportedLinksHaveNoSupport() {
        JdbcTemplate jt = new JdbcTemplate(dataSource);

        // see SupportDetailsTest for validation that these strings represent empty byte arrays. I think the 1 at
        // position 12 is important.
        final Collection<Long> checkme = new HashSet<>();
        // maybe these patterns aren't this reproducible.
        jt.query(
                // "SELECT ID from MOUSE_LINK_SUPPORT_DETAILS WHERE HEX(BYTES) in ('0000000200000001000000000000000200000000',"
                // + " '000006AA00000001000000000000003600000000', '0000000000000001000000000000000000000000',"
                // + "'0000003E00000001000000000000000200000000','0000003F00000001000000000000000200000000',"
                // + "'0000000500000001000000000000000200000000')", new RowCallbackHandler() {

                // 000002BB00000001000000000000001600000000
                "SELECT ID FROM MOUSE_LINK_SUPPORT_DETAILS WHERE HEX(BYTES) LIKE '00000___0000000100000000000000%'",
                new RowCallbackHandler() {

                    @Override
                    public void processRow(ResultSet rs) throws SQLException {
                        Long id = rs.getLong(1);
                        checkme.add(id);
                    }
                });

        // we should definitely have some of these
        assertTrue(checkme.size() > 0);

        jt.query("SELECT SUPPORT FROM MOUSE_GENE_COEXPRESSION WHERE SUPPORT_DETAILS_FK IN (?) AND SUPPORT > 0",
                new Object[] { checkme.toArray() }, new RowCallbackHandler() {
                    @Override
                    public void processRow(ResultSet rs) {
                        fail("Should not have had any rows");
                    }
                });

    }

    private void checkResult(CoexpressionValueObject coex) {
        assertNotNull(coex.toString(), coex.getQueryGeneId());
        assertNotNull(coex.toString(), coex.getCoexGeneId());
        assertNotNull(coex.toString(), coex.getSupportDetailsId());
        assertNotNull(coex.toString(), coex.getSupportingDatasets());
        assertTrue(coex.toString(), coex.getNumDatasetsSupporting() > 0);
        assertTrue(coex.toString(), coex.getNumDatasetsTestedIn() != 0);

        // assertNotNull( coex.toString(), coex.getTestedInDatasets() );

        if (coex.getNumDatasetsTestedIn() > 0) {
            assertEquals(coex.toString(), coex.getNumDatasetsTestedIn().intValue(),
                    coex.getTestedInDatasets().size());
            assertTrue(
                    coex.toString() + " testedin: " + coex.getTestedInDatasets() + " supportedin: "
                            + coex.getSupportingDatasets(),
                    coex.getNumDatasetsSupporting() <= coex.getNumDatasetsTestedIn());
        }

        assertEquals(coex.toString(), coex.getSupportingDatasets().size(),
                coex.getNumDatasetsSupporting().intValue());

        assertTrue(coex.toString(), !coex.getSupportingDatasets().isEmpty());
    }

    private int checkResults(Collection<BioAssaySet> ees, int expectedMinimumMaxSupport) {
        boolean foundOne = false;

        int maxSupport = 0;
        Taxon mouse = taxonService.findByCommonName("mouse");
        Collection<Gene> genesWithLinks = new ArrayList<>();
        int totalLinks = 0;

        // numdatasetstesting will not be set so we won't bother checking.
        assertTrue(!geneCoexpressionService.getCoexpression(ee, true).isEmpty());

        Collection<CoexpressionValueObject> eeResults = geneCoexpressionService.getCoexpression(ee, false);
        assertTrue(!eeResults.isEmpty());
        for (CoexpressionValueObject coex : eeResults) {
            this.checkResult(coex);
        }

        Map<Long, GeneCoexpressionNodeDegreeValueObject> nodeDegrees = geneCoexpressionService
                .getNodeDegrees(EntityUtils.getIds(geneService.loadAll()));
        assertTrue(!nodeDegrees.isEmpty());

        // experiment-major query
        Map<Long, List<CoexpressionValueObject>> allLinks = geneCoexpressionService.findCoexpressionRelationships(
                mouse, new HashSet<Long>(), EntityUtils.getIds(ees), ees.size(), 10, false);
        assertTrue(!allLinks.isEmpty());

        for (Long g : allLinks.keySet()) {
            for (CoexpressionValueObject coex : allLinks.get(g)) {
                this.checkResult(coex);
            }
        }

        for (Gene gene : geneService.loadAll(mouse)) {

            Collection<CoexpressionValueObject> links = geneCoexpressionService.findCoexpressionRelationships(gene,
                    EntityUtils.getIds(ees), 1, 0, false);

            if (links == null || links.isEmpty()) {
                continue;
            }

            assertEquals(geneCoexpressionService
                    .findCoexpressionRelationships(gene, Collections.singleton(ee.getId()), 0, false).size(),
                    geneCoexpressionService.countLinks(ee, gene).intValue());

            GeneCoexpressionNodeDegreeValueObject nodeDegree = geneCoexpressionService.getNodeDegree(gene);

            if (links.size() != nodeDegree.getLinksWithMinimumSupport(1)) {
                log.info(nodeDegree);
                assertEquals("Node degree check failed for gene " + gene, links.size(),
                        nodeDegree.getLinksWithMinimumSupport(1).intValue());
            }

            assertTrue(nodeDegree.getLinksWithMinimumSupport(1) >= nodeDegree.getLinksWithMinimumSupport(2));

            totalLinks += links.size();
            log.debug(links.size() + " hits for " + gene);
            for (CoexpressionValueObject coex : links) {
                this.checkResult(coex);
                if (coex.getNumDatasetsSupporting() > maxSupport) {
                    maxSupport = coex.getNumDatasetsSupporting();
                }
            }
            foundOne = true;

            if (genesWithLinks.size() == 5) {

                // without specifying stringency
                Map<Long, List<CoexpressionValueObject>> multiGeneResults = geneCoexpressionService
                        .findCoexpressionRelationships(mouse, EntityUtils.getIds(genesWithLinks),
                                EntityUtils.getIds(ees), 100, false);

                if (!multiGeneResults.isEmpty()) {

                    for (Long id : multiGeneResults.keySet()) {
                        for (CoexpressionValueObject coex : multiGeneResults.get(id)) {
                            this.checkResult(coex);
                        }
                    }

                    // with stringency specified, quick.
                    Map<Long, List<CoexpressionValueObject>> multiGeneResults2 = geneCoexpressionService
                            .findCoexpressionRelationships(mouse, EntityUtils.getIds(genesWithLinks),
                                    EntityUtils.getIds(ees), ees.size(), 100, true);
                    if (multiGeneResults.size() != multiGeneResults2.size()) {
                        assertEquals(multiGeneResults.size(), multiGeneResults2.size());
                    }

                    for (Long id : multiGeneResults2.keySet()) {
                        for (CoexpressionValueObject coex : multiGeneResults2.get(id)) {
                            this.checkResult(coex);
                        }
                    }
                }
            }
            genesWithLinks.add(gene);
        }

        assertTrue(foundOne);

        Map<Long, List<CoexpressionValueObject>> mygeneresults = geneCoexpressionService
                .findInterCoexpressionRelationships(mouse, EntityUtils.getIds(genesWithLinks),
                        EntityUtils.getIds(ees), 1, false);
        if (mygeneresults.isEmpty()) {
            //noinspection ConstantConditions // these strange structures are to help with debugger.
            assertTrue(!mygeneresults.isEmpty());
        }
        for (Long id : mygeneresults.keySet()) {
            for (CoexpressionValueObject coex : mygeneresults.get(id)) {
                this.checkResult(coex);
            }
        }

        assertTrue(maxSupport >= expectedMinimumMaxSupport);

        return totalLinks;
    }

    private void updateNodeDegree() {

        geneCoexpressionService.updateNodeDegrees(this.getTaxon("mouse"));

    }
}