gov.nih.nci.caintegrator.application.analysis.grid.GenePatternGridRunnerImplTestIntegration.java Source code

Java tutorial

Introduction

Here is the source code for gov.nih.nci.caintegrator.application.analysis.grid.GenePatternGridRunnerImplTestIntegration.java

Source

/**
 * Copyright 5AM Solutions Inc, ESAC, ScenPro & SAIC
 *
 * Distributed under the OSI-approved BSD 3-Clause License.
 * See http://ncip.github.com/caintegrator/LICENSE.txt for details.
 */
package gov.nih.nci.caintegrator.application.analysis.grid;

import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.mock;
import gov.nih.nci.caintegrator.TestDataFiles;
import gov.nih.nci.caintegrator.application.analysis.GenePatternGridClientFactoryImpl;
import gov.nih.nci.caintegrator.application.analysis.StatusUpdateListener;
import gov.nih.nci.caintegrator.application.analysis.grid.comparativemarker.ComparativeMarkerSelectionParameters;
import gov.nih.nci.caintegrator.application.analysis.grid.gistic.GisticParameters;
import gov.nih.nci.caintegrator.application.analysis.grid.gistic.GisticRefgeneFileEnum;
import gov.nih.nci.caintegrator.application.analysis.grid.pca.PCAParameters;
import gov.nih.nci.caintegrator.application.analysis.grid.preprocess.PreprocessDatasetParameters;
import gov.nih.nci.caintegrator.application.query.InvalidCriterionException;
import gov.nih.nci.caintegrator.data.StudyHelper;
import gov.nih.nci.caintegrator.domain.application.ComparativeMarkerSelectionAnalysisJob;
import gov.nih.nci.caintegrator.domain.application.GisticAnalysisJob;
import gov.nih.nci.caintegrator.domain.application.PrincipalComponentAnalysisJob;
import gov.nih.nci.caintegrator.domain.application.StudySubscription;
import gov.nih.nci.caintegrator.domain.application.UserWorkspace;
import gov.nih.nci.caintegrator.domain.genomic.ArrayDataType;
import gov.nih.nci.caintegrator.external.ConnectionException;
import gov.nih.nci.caintegrator.external.ParameterException;
import gov.nih.nci.caintegrator.external.ServerConnectionProfile;
import gov.nih.nci.caintegrator.file.FileManager;
import gov.nih.nci.caintegrator.file.FileManagerImpl;
import gov.nih.nci.caintegrator.mockito.AbstractMockitoTest;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.List;

import org.apache.commons.io.FileUtils;
import org.genepattern.cabig.util.ZipUtils;
import org.genepattern.gistic.common.GisticUtils;
import org.genepattern.io.ParseException;
import org.genepattern.io.odf.OdfObject;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;

import au.com.bytecode.opencsv.CSVReader;

/**
 * Tests for evaluating integration with the gene pattern grid services.
 *
 * @author Abraham J. Evans-EL <aevansel@5amsolutions.com>
 */
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration("classpath:integration-test-config.xml")
public class GenePatternGridRunnerImplTestIntegration extends AbstractMockitoTest {

    private static final String PREPROCESS_DATASET_URL = "http://cagrid.broadinstitute.org:6060/wsrf/services/cagrid/PreprocessDatasetMAGEService";
    private static final String COMPARATIVE_MARKER_URL = "http://cagrid.broadinstitute.org:11010/wsrf/services/cagrid/ComparativeMarkerSelMAGESvc";
    private static final String PCA_URL = "http://cagrid.broadinstitute.org:6060/wsrf/services/cagrid/PCA";
    private static final String GISTIC_URL = "http://cagrid.broadinstitute.org:10010/wsrf/services/cagrid/Gistic";

    @Autowired
    private GenePatternGridRunnerImpl genePatternGridRunner;
    private FileManager fileManager;
    private StudyHelper studyHelper;
    private StatusUpdateListener updater;

    @Before
    public void setUp() throws Exception {
        updater = mock(StatusUpdateListener.class);
        FileManagerImpl fManager = new FileManagerImpl();
        fManager.setConfigurationHelper(configurationHelper);

        fileManager = fManager;
        genePatternGridRunner = new GenePatternGridRunnerImpl();
        genePatternGridRunner.setFileManager(fileManager);
        genePatternGridRunner.setGenePatternGridClientFactory(new GenePatternGridClientFactoryImpl());
    }

    private StudySubscription setupStudySubscription(ArrayDataType arrayDataType) {
        studyHelper = new StudyHelper();
        studyHelper.setArrayDataType(arrayDataType);
        StudySubscription subscription = studyHelper.populateAndRetrieveStudy();
        UserWorkspace userWorkspace = new UserWorkspace();
        userWorkspace.setUsername("testCaseUser123");
        subscription.setUserWorkspace(userWorkspace);
        return subscription;
    }

    @Test
    public void testRunPreprocessDataset() throws ConnectionException, InvalidCriterionException, IOException {
        StudySubscription subscription = setupStudySubscription(ArrayDataType.GENE_EXPRESSION);
        ServerConnectionProfile server = new ServerConnectionProfile();
        server.setUrl(PREPROCESS_DATASET_URL);
        PreprocessDatasetParameters preprocessParameters = new PreprocessDatasetParameters();
        preprocessParameters.setServer(server);
        PrincipalComponentAnalysisJob job = new PrincipalComponentAnalysisJob();
        job.setSubscription(subscription);

        preprocessParameters.setProcessedGctFilename("preprocessTest.gct");
        File gctFile = new File(fileManager.getUserDirectory(subscription) + File.separator
                + preprocessParameters.getProcessedGctFilename());
        FileUtils.copyFile(TestDataFiles.PCA_TRAIN_FILE, gctFile);
        preprocessParameters.getDatasetParameters().setLogBaseTwo(true);
        genePatternGridRunner.runPreprocessDataset(updater, job, preprocessParameters, gctFile);
        checkGctFile(gctFile, 97, 38, "4.321928094887363");
    }

    @Test
    public void testRunPreprocessComparativeMarker()
            throws ConnectionException, IOException, InvalidCriterionException, ParseException {
        StudySubscription subscription = setupStudySubscription(ArrayDataType.GENE_EXPRESSION);
        ServerConnectionProfile server = new ServerConnectionProfile();
        server.setUrl(PREPROCESS_DATASET_URL);
        PreprocessDatasetParameters preprocessParameters = new PreprocessDatasetParameters();
        preprocessParameters.setServer(server);
        ComparativeMarkerSelectionAnalysisJob job = new ComparativeMarkerSelectionAnalysisJob();
        job.setSubscription(subscription);
        job.getForm().setPreprocessDatasetparameters(preprocessParameters);
        preprocessParameters.setProcessedGctFilename("cmsTest.gct");
        preprocessParameters.getDatasetParameters().setLogBaseTwo(false);
        preprocessParameters.getDatasetParameters().setThreshold(35);
        ComparativeMarkerSelectionParameters comparativeMarkerParameters = new ComparativeMarkerSelectionParameters();
        comparativeMarkerParameters.setClassificationFileName("cmsTest.cls");
        ServerConnectionProfile comparativeMarkerServer = new ServerConnectionProfile();
        comparativeMarkerServer.setUrl(COMPARATIVE_MARKER_URL);
        comparativeMarkerParameters.setServer(comparativeMarkerServer);
        job.getForm().setComparativeMarkerSelectionParameters(comparativeMarkerParameters);
        File gctFile = new File(fileManager.getUserDirectory(subscription) + File.separator
                + preprocessParameters.getProcessedGctFilename());
        File clsFile = new File(fileManager.getUserDirectory(subscription) + File.separator
                + comparativeMarkerParameters.getClassificationFileName());
        FileUtils.copyFile(TestDataFiles.CMS_GCT_FILE, gctFile);
        FileUtils.copyFile(TestDataFiles.CMS_CLASSIFICATIONS_FILE, clsFile);
        File results = genePatternGridRunner.runPreprocessComparativeMarkerSelection(updater, job, gctFile,
                clsFile);
        assertNotNull(results);
        List<String> filenames = ZipUtils.extractFromZip(results.getAbsolutePath());
        assertEquals(3, filenames.size());
        int validFiles = 0;
        for (String filename : filenames) {
            File file = new File(filename);
            if (filename.contains(".gct")) {
                checkGctFile(file, 41, 35, "-214.0");
                validFiles++;
            } else if (filename.contains(".cls")) {
                validFiles++;
            } else if (filename.contains(".odf")) {
                OdfObject odf = new OdfObject(filename);
                assertEquals(41, odf.getRowCount());
                validFiles++;
            }
            FileUtils.deleteQuietly(file);
        }
        assertEquals(3, validFiles);
        FileUtils.deleteQuietly(fileManager.getUserDirectory(subscription));
    }

    @Test
    public void testRunPCA() throws ConnectionException, InvalidCriterionException, IOException {
        StudySubscription subscription = setupStudySubscription(ArrayDataType.GENE_EXPRESSION);
        ServerConnectionProfile server = new ServerConnectionProfile();
        server.setUrl(PCA_URL);
        PCAParameters parameters = new PCAParameters();
        PrincipalComponentAnalysisJob job = new PrincipalComponentAnalysisJob();
        job.setSubscription(subscription);
        job.getForm().setPcaParameters(parameters);
        parameters.setClusterBy("columns");
        parameters.setGctFileName("testPca.gct");
        parameters.setServer(server);
        File zipFile = null;
        File gctFile = new File(
                fileManager.getUserDirectory(subscription) + File.separator + parameters.getGctFileName());
        FileUtils.copyFile(TestDataFiles.PCA_TRAIN_FILE, gctFile);
        zipFile = genePatternGridRunner.runPCA(updater, job, gctFile);
        assertNotNull(zipFile);
        zipFile.deleteOnExit();
        FileUtils.deleteQuietly(fileManager.getUserDirectory(subscription));
    }

    @Test
    public void testRunGistic()
            throws ConnectionException, InvalidCriterionException, IOException, ParameterException {
        StudySubscription subscription = setupStudySubscription(ArrayDataType.COPY_NUMBER);
        GisticParameters parameters = new GisticParameters();
        parameters.setRefgeneFile(GisticRefgeneFileEnum.HUMAN_HG16);
        parameters.getServer().setUrl(GISTIC_URL);
        File zipFile = null;
        GisticAnalysisJob job = new GisticAnalysisJob();
        job.setSubscription(subscription);
        job.getGisticAnalysisForm().setGisticParameters(parameters);
        File samplesFile = new File(fileManager.getUserDirectory(subscription) + File.separator
                + GisticUtils.SAMPLE_FILE_PREFIX + "gisticSamplesFile.txt");
        File markersFile = new File(fileManager.getUserDirectory(subscription) + File.separator
                + GisticUtils.MARKER_FILE_PREFIX + "gisticMarkersFile.txt");
        File cnvFile = new File(fileManager.getUserDirectory(subscription) + File.separator
                + GisticUtils.CNV_SEGMENT_FILE_PREFIX + "gisticCnvFile.txt");
        FileUtils.copyFile(TestDataFiles.GISTIC_SAMPLES_FILE, samplesFile);
        FileUtils.copyFile(TestDataFiles.GISTIC_MARKERS_FILE, markersFile);
        FileUtils.copyFile(TestDataFiles.GISTIC_CNV_FILE, cnvFile);

        zipFile = genePatternGridRunner.runGistic(updater, job, samplesFile, markersFile, cnvFile);
        assertNotNull(zipFile);
        List<String> filenames = ZipUtils.extractFromZip(zipFile.getAbsolutePath());
        // if ZIP contains 3 files or less, this typically means the GP module failed to complete.
        boolean isNumFilesGreaterThan3 = filenames.size() > 3;
        String holdStr = "ZIP archive from GISTIC contains too few files: ";
        holdStr = holdStr.concat(Integer.toString(filenames.size()));
        assertTrue(holdStr, isNumFilesGreaterThan3);
        // If ZIP contains 9 files, this is the expected output from the GISTIC module.
        assertEquals(9, filenames.size());
        zipFile.deleteOnExit();
        FileUtils.deleteQuietly(fileManager.getUserDirectory(subscription));
    }

    private void checkGctFile(File gctFile, int numSamples, int numReporters, String value) throws IOException {
        assertTrue("GCT file does not exist.", gctFile.exists());
        CSVReader reader = new CSVReader(new FileReader(gctFile), '\t');
        checkLine(reader.readNext(), "#1.2");
        checkLine(reader.readNext(), String.valueOf(numSamples), String.valueOf(numReporters));
        reader.readNext(); // Header
        checkValue(reader.readNext(), value); // Row values
        reader.close();
    }

    private void checkValue(String[] readNext, String f) {
        assertEquals(f, readNext[2]);
    }

    private void checkLine(String[] line, String... expecteds) {
        assertArrayEquals(expecteds, line);
    }

}