ubic.gemma.core.analysis.preprocess.VectorMergingServiceTest.java Source code

Java tutorial

Introduction

Here is the source code for ubic.gemma.core.analysis.preprocess.VectorMergingServiceTest.java

Source

/*
 * The Gemma project
 *
 * Copyright (c) 2011 University of British Columbia
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 */
package ubic.gemma.core.analysis.preprocess;

import org.apache.commons.lang3.RandomStringUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.springframework.beans.factory.annotation.Autowired;
import ubic.gemma.core.loader.expression.ExpressionExperimentPlatformSwitchService;
import ubic.gemma.core.loader.expression.arrayDesign.ArrayDesignMergeService;
import ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest;
import ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal;
import ubic.gemma.core.loader.expression.geo.service.GeoService;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
import ubic.gemma.model.common.quantitationtype.ScaleType;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.bioAssay.BioAssay;
import ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject;
import ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector;
import ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector;
import ubic.gemma.model.expression.designElement.CompositeSequence;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.model.genome.biosequence.BioSequence;
import ubic.gemma.persistence.service.common.quantitationtype.QuantitationTypeService;
import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService;
import ubic.gemma.persistence.service.expression.bioAssayData.ProcessedExpressionDataVectorService;
import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService;

import java.util.Collection;
import java.util.HashSet;
import java.util.Set;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;

/**
 * Tests loading, platform switch, vector merge, and complex deletion (in teardown)
 *
 * @author paul
 */
public class VectorMergingServiceTest extends AbstractGeoServiceTest {

    @Autowired
    private ArrayDesignMergeService arrayDesignMergeService;

    @Autowired
    private ArrayDesignService arrayDesignService;

    private ExpressionExperiment ee = null;

    @Autowired
    private ExpressionExperimentPlatformSwitchService eePlatformSwitchService;

    @Autowired
    private ExpressionExperimentService eeService;

    @Autowired
    private GeoService geoService;

    private ArrayDesign mergedAA = null;

    @Autowired
    private ProcessedExpressionDataVectorService processedExpressionDataVectorService;

    @Autowired
    private VectorMergingService vectorMergingService;

    @Autowired
    private QuantitationTypeService quantitationTypeService;

    @Before
    @After
    public void tearDown() {
        try {
            ee = eeService.findByShortName("GSE3443");

            if (ee != null) {
                mergedAA = eeService.getArrayDesignsUsed(ee).iterator().next();
                eeService.remove(ee);

                if (mergedAA != null) {
                    mergedAA.setMergees(new HashSet<ArrayDesign>());
                    arrayDesignService.update(mergedAA);

                    mergedAA = arrayDesignService.thawLite(mergedAA);
                    for (ArrayDesign arrayDesign : mergedAA.getMergees()) {
                        arrayDesign.setMergedInto(null);
                        arrayDesignService.update(arrayDesign);
                    }

                    for (ExpressionExperiment e : arrayDesignService.getExpressionExperiments(mergedAA)) {
                        eeService.remove(e);
                    }

                    arrayDesignService.remove(mergedAA);
                    for (ArrayDesign arrayDesign : mergedAA.getMergees()) {
                        for (ExpressionExperiment e : arrayDesignService.getExpressionExperiments(arrayDesign)) {
                            eeService.remove(e);
                        }
                        arrayDesignService.remove(arrayDesign);

                    }
                }

            }
        } catch (Exception e) {
            log.info("Tear-down failed: " + e.getMessage());
        }
    }

    @Test
    final public void test() throws Exception {
        /*
         * Need a persistent experiment that uses multiple array designs. Then merge the designs, switch the vectors,
         * and merge the vectors. GSE3443
         */

        /*
         * The experiment uses the following GPLs
         *
         * GPL2868, GPL2933, GPL2934, GPL2935, GPL2936, GPL2937, GPL2938
         *
         * Example of a sequence appearing on more than one platform: N57553
         */

        geoService.setGeoDomainObjectGenerator(
                new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("gse3443merge")));

        Collection<?> results = geoService.fetchAndLoad("GSE3443", false, false, false);
        ee = (ExpressionExperiment) results.iterator().next();

        ee = this.eeService.thawLite(ee);

        // fix for unknown log scale
        for (QuantitationType qt : ee.getQuantitationTypes()) {
            if (qt.getIsPreferred()) {
                qt.setScale(ScaleType.LOG2);
                quantitationTypeService.update(qt);
            }
        }

        Collection<ArrayDesign> aas = eeService.getArrayDesignsUsed(ee);

        assertEquals(7, aas.size());

        /*
         * Check number of sequences across all platforms. This is how many elements we need on the new platform, plus
         * extras for duplicated sequences (e.g. elements that don't have a sequence...)
         */
        Collection<ArrayDesign> taas = new HashSet<>();
        Set<BioSequence> oldbs = new HashSet<>();
        for (ArrayDesign arrayDesign : aas) {
            arrayDesign = arrayDesignService.thaw(arrayDesign);
            taas.add(arrayDesign);
            for (CompositeSequence cs : arrayDesign.getCompositeSequences()) {
                log.info(cs + " " + cs.getBiologicalCharacteristic());
                oldbs.add(cs.getBiologicalCharacteristic());
            }
        }
        assertEquals(63, oldbs.size());

        /*
         * Check total size of elements across all 7 platforms.
         */
        int totalElements = 0;
        for (ArrayDesign arrayDesign : taas) {
            totalElements += arrayDesign.getCompositeSequences().size();
        }
        assertEquals(140, totalElements);

        ArrayDesign firstaa = taas.iterator().next();
        aas.remove(firstaa);

        assertNull(firstaa.getMergedInto());

        mergedAA = arrayDesignMergeService.merge(firstaa, taas, "testMerge" + RandomStringUtils.randomAlphabetic(5),
                "merged" + RandomStringUtils.randomAlphabetic(5), false);

        assertEquals(72, mergedAA.getCompositeSequences().size());

        Set<BioSequence> seenBs = new HashSet<>();
        for (CompositeSequence cs : mergedAA.getCompositeSequences()) {
            seenBs.add(cs.getBiologicalCharacteristic());
        }
        assertEquals(63, seenBs.size());

        // just to make this explicit. The new array design has to contain all the old sequences.
        assertEquals(oldbs.size(), seenBs.size());

        ee = eeService.thaw(ee);

        ee = eePlatformSwitchService.switchExperimentToArrayDesign(ee, mergedAA);
        ee = eeService.thaw(ee);
        // check we actually got switched over.
        for (BioAssay ba : ee.getBioAssays()) {
            assertEquals(mergedAA, ba.getArrayDesignUsed());
        }
        for (RawExpressionDataVector v : ee.getRawExpressionDataVectors()) {
            assertEquals(mergedAA, v.getDesignElement().getArrayDesign());
        }

        assertEquals(16, ee.getQuantitationTypes().size());
        assertEquals(1828, ee.getRawExpressionDataVectors().size());

        ee = vectorMergingService.mergeVectors(ee);

        // check we got the right processed data
        Collection<ProcessedExpressionDataVector> pvs = processedExpressionDataVectorService
                .getProcessedDataVectors(ee);

        assertEquals(72, pvs.size());

        assertEquals(978, ee.getRawExpressionDataVectors().size());

        ee = eeService.thaw(ee);
        Collection<DoubleVectorValueObject> processedDataArrays = processedExpressionDataVectorService
                .getProcessedDataArrays(ee, 50);

        assertEquals(50, processedDataArrays.size());

    }

}