ubic.gemma.core.analysis.expression.diff.LinearModelAnalyzer.java Source code

Java tutorial

Introduction

Here is the source code for ubic.gemma.core.analysis.expression.diff.LinearModelAnalyzer.java

Source

/*
 * The Gemma project
 *
 * Copyright (c) 2010 University of British Columbia
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package ubic.gemma.core.analysis.expression.diff;

import cern.colt.matrix.DoubleMatrix1D;
import cern.colt.matrix.impl.DenseDoubleMatrix1D;
import org.apache.commons.collections.Transformer;
import org.apache.commons.collections.TransformerUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.context.annotation.Scope;
import org.springframework.stereotype.Component;
import ubic.basecode.dataStructure.matrix.DenseDoubleMatrix;
import ubic.basecode.dataStructure.matrix.DoubleMatrix;
import ubic.basecode.dataStructure.matrix.ObjectMatrix;
import ubic.basecode.math.MathUtil;
import ubic.basecode.math.linearmodels.*;
import ubic.gemma.core.analysis.util.ExperimentalDesignUtils;
import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix;
import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrixUtil;
import ubic.gemma.core.datastructure.matrix.ExpressionDataMatrixColumnSort;
import ubic.gemma.core.datastructure.matrix.MatrixWriter;
import ubic.gemma.model.analysis.expression.diff.*;
import ubic.gemma.model.common.description.Characteristic;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
import ubic.gemma.model.expression.bioAssay.BioAssay;
import ubic.gemma.model.expression.bioAssayData.BioAssayDimension;
import ubic.gemma.model.expression.biomaterial.BioMaterial;
import ubic.gemma.model.expression.designElement.CompositeSequence;
import ubic.gemma.model.expression.experiment.*;
import ubic.gemma.model.genome.Gene;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.*;

/**
 * Handles fitting linear models with continuous or fixed-level covariates. Data are always log-transformed.
 * Interactions can be included if a DifferentialExpressionAnalysisConfig is passed as an argument to 'run'. Currently
 * we only support interactions if there are two factors in the model (no more).
 * One factor can be constant (the same value for all samples); such a factor will be analyzed by looking at the
 * intercept in the fitted model. This is only appropriate for 'non-reference' designs on ratiometric arrays.
 * This also supports subsetting the data based on a factor. For example, a data set with "tissue" as a factor could be
 * analyzed per-tissue rather than with tissue as a covariate.
 * This only handles the analysis, not the persistence or output of the results.
 *
 * @author paul
 */
@Component
@Scope(value = "prototype")
public class LinearModelAnalyzer extends AbstractDifferentialExpressionAnalyzer {

    /**
     * Preset levels for which we will store the HitListSizes.
     */
    private static final double[] qValueThresholdsForHitLists = new double[] { 0.001, 0.005, 0.01, 0.05, 0.1 };
    private static final Log log = LogFactory.getLog(LinearModelAnalyzer.class);

    /**
     * Factors that are always excluded from analysis
     */
    private static final List<String> EXCLUDE_CHARACTERISTICS_VALUES = new ArrayList<String>() {
        {
            this.add("DE_Exclude");
        }
    };

    private static final String EXCLUDE_WARNING = "Found Factor Value with DE_Exclude characteristic. Skipping current subset.";

    public static void populateFactorValuesFromBASet(BioAssaySet ee, ExperimentalFactor f,
            Collection<FactorValue> fvs) {
        for (BioAssay ba : ee.getBioAssays()) {
            BioMaterial bm = ba.getSampleUsed();
            for (FactorValue fv : bm.getFactorValues()) {
                if (fv.getExperimentalFactor().equals(f)) {
                    fvs.add(fv);
                }
            }
        }
    }

    public static DoubleMatrix<String, String> makeDataMatrix(ObjectMatrix<String, String, Object> designMatrix,
            DoubleMatrix<CompositeSequence, BioMaterial> namedMatrix) {
        /*
         * Convert the data into a string-keyed matrix.
         */
        DoubleMatrix<String, String> sNamedMatrix = new DenseDoubleMatrix<>(namedMatrix.asArray());
        for (int i = 0; i < namedMatrix.rows(); i++) {
            sNamedMatrix.addRowName(namedMatrix.getRowName(i).getId().toString());
        }
        sNamedMatrix.setColumnNames(designMatrix.getRowNames());
        return sNamedMatrix;
    }

    /**
     * This bioAssayDimension shouldn't get persisted; it is only for dealing with subset diff ex. analyses.
     *
     * @param columnsToUse columns to use
     * @return bio assay dimension
     */
    private static BioAssayDimension createBADMap(List<BioMaterial> columnsToUse) {
        /*
         * Indices of the biomaterials in the original matrix.
         */
        List<BioAssay> bioAssays = new ArrayList<>();
        for (BioMaterial bm : columnsToUse) {
            bioAssays.add(bm.getBioAssaysUsedIn().iterator().next());
        }

        /*
         * fix the upper level column name maps.
         */
        BioAssayDimension reorderedDim = BioAssayDimension.Factory.newInstance();
        reorderedDim.setBioAssays(bioAssays);
        reorderedDim.setName("For analysis");
        reorderedDim.setDescription(bioAssays.size() + " bioAssays");

        return reorderedDim;
    }

    /**
     * Determine if any factor should be treated as the intercept term.
     */
    @Override
    public ExperimentalFactor determineInterceptFactor(Collection<ExperimentalFactor> factors,
            QuantitationType quantitationType) {
        ExperimentalFactor interceptFactor = null;
        for (ExperimentalFactor experimentalFactor : factors) {

            /*
             * Check if we need to treat the intercept as a factor.
             */
            boolean useI = this.checkIfNeedToTreatAsIntercept(experimentalFactor, quantitationType);

            if (useI && interceptFactor != null) {
                throw new IllegalStateException("Can only deal with one constant factor (intercept)");
            } else if (useI) {
                interceptFactor = experimentalFactor;
            }
        }
        return interceptFactor;
    }

    @Override
    public Collection<HitListSize> computeHitListSizes(Collection<DifferentialExpressionAnalysisResult> results,
            Map<CompositeSequence, Collection<Gene>> probeToGeneMap) {
        Collection<HitListSize> hitListSizes = new HashSet<>();
        StopWatch timer = new StopWatch();
        timer.start();
        double maxThreshold = MathUtil.max(LinearModelAnalyzer.qValueThresholdsForHitLists);

        assert probeToGeneMap != null;

        Collection<Gene> allGenes = new HashSet<>();
        for (Collection<Gene> genes : probeToGeneMap.values()) {
            allGenes.addAll(genes);
        }

        // maps from Doubles are a bit dodgy...
        Map<Double, Integer> upCounts = new HashMap<>();
        Map<Double, Integer> downCounts = new HashMap<>();
        Map<Double, Integer> eitherCounts = new HashMap<>();

        Map<Double, Integer> upCountGenes = new HashMap<>();
        Map<Double, Integer> downCountGenes = new HashMap<>();
        Map<Double, Integer> eitherCountGenes = new HashMap<>();

        Collection<Gene> seenGenes = new HashSet<>();
        for (DifferentialExpressionAnalysisResult r : results) {

            Double corrP = r.getCorrectedPvalue();
            if (corrP == null || corrP > maxThreshold) {
                continue;
            }

            CompositeSequence probe = r.getProbe();
            Collection<Gene> genesForProbe = probeToGeneMap.get(probe);
            int numGenes = 0;
            if (genesForProbe != null) {
                numGenes = this.countNumberOfGenesNotSeenAlready(genesForProbe, seenGenes);
            }

            // if ( numGenes == 0 ) // This is okay; it might mean we already counted it as differentially expressed.

            Collection<ContrastResult> crs = r.getContrasts();
            boolean up = false;
            boolean down = false;

            /*
             * We set up and down to be true (either or both) if at least on contrast is shown.
             */
            for (ContrastResult cr : crs) {
                Double lf = cr.getLogFoldChange();
                //noinspection StatementWithEmptyBody // Better readability
                if (lf == null) {
                    /*
                     * A contrast which is actually not valid, so it won't be counted in the hit list.
                     */
                } else if (lf < 0) {
                    down = true;
                } else if (lf > 0) {
                    up = true;
                }
            }

            for (double thresh : LinearModelAnalyzer.qValueThresholdsForHitLists) {

                if (!upCounts.containsKey(thresh)) {
                    upCounts.put(thresh, 0);
                    upCountGenes.put(thresh, 0);
                }
                if (!downCounts.containsKey(thresh)) {
                    downCounts.put(thresh, 0);
                    downCountGenes.put(thresh, 0);
                }
                if (!eitherCounts.containsKey(thresh)) {
                    eitherCounts.put(thresh, 0);
                    eitherCountGenes.put(thresh, 0);
                }

                if (corrP < thresh) {
                    if (up) {
                        upCounts.put(thresh, upCounts.get(thresh) + 1);
                        upCountGenes.put(thresh, upCountGenes.get(thresh) + numGenes);
                    }
                    if (down) {
                        downCounts.put(thresh, downCounts.get(thresh) + 1);
                        downCountGenes.put(thresh, downCountGenes.get(thresh) + numGenes);
                    }

                    eitherCounts.put(thresh, eitherCounts.get(thresh) + 1);
                    eitherCountGenes.put(thresh, eitherCountGenes.get(thresh) + numGenes);

                }
            }

        }

        for (double thresh : LinearModelAnalyzer.qValueThresholdsForHitLists) {
            // Ensure we don't set values to null.
            Integer up = upCounts.get(thresh) == null ? 0 : upCounts.get(thresh);
            Integer down = downCounts.get(thresh) == null ? 0 : downCounts.get(thresh);
            Integer either = eitherCounts.get(thresh) == null ? 0 : eitherCounts.get(thresh);

            Integer upGenes = upCountGenes.get(thresh) == null ? 0 : upCountGenes.get(thresh);
            Integer downGenes = downCountGenes.get(thresh) == null ? 0 : downCountGenes.get(thresh);
            Integer eitherGenes = eitherCountGenes.get(thresh) == null ? 0 : eitherCountGenes.get(thresh);

            assert !(allGenes.size() < upGenes || allGenes.size() < downGenes || allGenes
                    .size() < eitherGenes) : "There were more genes differentially expressed than exist in the experiment";

            HitListSize upS = HitListSize.Factory.newInstance(thresh, up, Direction.UP, upGenes);
            HitListSize downS = HitListSize.Factory.newInstance(thresh, down, Direction.DOWN, downGenes);
            HitListSize eitherS = HitListSize.Factory.newInstance(thresh, either, Direction.EITHER, eitherGenes);

            hitListSizes.add(upS);
            hitListSizes.add(downS);
            hitListSizes.add(eitherS);

            assert upGenes <= eitherGenes : "More genes upregulated than upregulated+downregulated";
            assert downGenes <= eitherGenes : "More genes downregulated than upregulated+downregulated";

        }

        if (timer.getTime() > 1000) {
            LinearModelAnalyzer.log.info("Hitlist computation: " + timer.getTime() + "ms");
        }
        return hitListSizes;
    }

    @Override
    public int getNumberOfGenesTested(Collection<DifferentialExpressionAnalysisResult> resultList,
            Map<CompositeSequence, Collection<Gene>> probeToGeneMap) {

        Collection<Gene> gs = new HashSet<>();
        for (DifferentialExpressionAnalysisResult d : resultList) {
            CompositeSequence probe = d.getProbe();
            if (probeToGeneMap.containsKey(probe)) {
                gs.addAll(probeToGeneMap.get(probe));
            }
        }
        return gs.size();
    }

    @Override
    public DifferentialExpressionAnalysis run(ExpressionExperimentSubSet subset,
            DifferentialExpressionAnalysisConfig config) {

        /*
         * Start by setting it up like the full experiment.
         */
        ExpressionDataDoubleMatrix dmatrix = expressionDataMatrixService
                .getProcessedExpressionDataMatrix(subset.getSourceExperiment());

        ExperimentalFactor ef = config.getSubsetFactor();
        Collection<BioMaterial> bmTmp = new HashSet<>();
        for (BioAssay ba : subset.getBioAssays()) {
            bmTmp.add(ba.getSampleUsed());
        }

        List<BioMaterial> samplesInSubset = new ArrayList<>(bmTmp);

        FactorValue subsetFactorValue = null;
        for (BioMaterial bm : samplesInSubset) {
            Collection<FactorValue> fvs = bm.getFactorValues();
            for (FactorValue fv : fvs) {
                if (fv.getExperimentalFactor().equals(ef)) {
                    if (subsetFactorValue == null) {
                        subsetFactorValue = fv;
                    } else if (!subsetFactorValue.equals(fv)) {
                        throw new IllegalStateException(
                                "This subset has more than one factor value for the supposed subset factor: " + fv
                                        + " and " + subsetFactorValue);
                    }
                }
            }
        }

        samplesInSubset = ExpressionDataMatrixColumnSort.orderByExperimentalDesign(samplesInSubset,
                config.getFactorsToInclude());

        // slice.
        ExpressionDataDoubleMatrix subsetMatrix = new ExpressionDataDoubleMatrix(dmatrix, samplesInSubset,
                LinearModelAnalyzer.createBADMap(samplesInSubset));

        Collection<ExperimentalFactor> subsetFactors = this.fixFactorsForSubset(dmatrix, subset,
                config.getFactorsToInclude());

        if (subsetFactors.isEmpty()) {
            LinearModelAnalyzer.log
                    .warn("Experimental design is not valid for subset: " + subsetFactorValue + "; skipping");
            return null;
        }

        DifferentialExpressionAnalysisConfig subsetConfig = this.fixConfigForSubset(config.getFactorsToInclude(),
                config, subsetFactorValue);

        DifferentialExpressionAnalysis analysis = this.doAnalysis(subset, subsetConfig, subsetMatrix,
                samplesInSubset, config.getFactorsToInclude(), subsetFactorValue);

        if (analysis == null) {
            throw new IllegalStateException("Subset could not be analyzed with config: " + config);
        }
        return analysis;
    }

    @Override
    public Collection<DifferentialExpressionAnalysis> run(ExpressionExperiment expressionExperiment,
            DifferentialExpressionAnalysisConfig config) {

        ExpressionDataDoubleMatrix dmatrix = expressionDataMatrixService
                .getProcessedExpressionDataMatrix(expressionExperiment);

        /*
         * FIXME remove flagged outlier samples ... at some point; so we don't carry NaNs around unnecessarily.
         */

        return this.run(expressionExperiment, dmatrix, config);

    }

    @Override
    public Collection<DifferentialExpressionAnalysis> run(ExpressionExperiment expressionExperiment,
            ExpressionDataDoubleMatrix dmatrix, DifferentialExpressionAnalysisConfig config) {

        /*
         * I apologize for this being so complicated. Basically there are four phases:
         *
         * 1. Get the data matrix and factors
         *
         * 2. Determine baseline groups; build model and contrasts
         *
         * 3. Run the analysis
         *
         * 4. Postprocess the analysis
         *
         * By far the most complex is #2 -- it depends on which factors and what kind they are.
         */

        /*
         * Initialize our matrix and factor lists...
         */
        List<ExperimentalFactor> factors = config.getFactorsToInclude();

        /*
         * FIXME this is the place to strip put the outliers.
         */
        List<BioMaterial> samplesUsed = ExperimentalDesignUtils.getOrderedSamples(dmatrix, factors);

        dmatrix = new ExpressionDataDoubleMatrix(dmatrix, samplesUsed,
                LinearModelAnalyzer.createBADMap(samplesUsed)); // enforce ordering

        /*
         * Do the analysis, by subsets if requested
         */
        Collection<DifferentialExpressionAnalysis> results = new HashSet<>();

        ExperimentalFactor subsetFactor = config.getSubsetFactor();
        if (subsetFactor != null) {

            if (factors.contains(subsetFactor)) {
                throw new IllegalStateException(
                        "Subset factor cannot also be included in the analysis [ Factor was: " + subsetFactor
                                + "]");
            }

            Map<FactorValue, ExpressionDataDoubleMatrix> subsets = this.makeSubSets(config, dmatrix, samplesUsed,
                    subsetFactor);

            LinearModelAnalyzer.log.info("Total number of subsets: " + subsets.size());

            /*
             * Now analyze each subset
             */
            for (FactorValue subsetFactorValue : subsets.keySet()) {

                LinearModelAnalyzer.log.info("Analyzing subset: " + subsetFactorValue);

                /*
                 * Checking for DE_Exclude characteristics, which should not be included in the analysis.
                 * As requested in issue #4458 (bugzilla)
                 */
                boolean include = true;
                for (Characteristic c : subsetFactorValue.getCharacteristics()) {
                    if (LinearModelAnalyzer.EXCLUDE_CHARACTERISTICS_VALUES.contains(c.getValue())) {
                        include = false;
                        break;
                    }
                }
                if (!include) {
                    LinearModelAnalyzer.log.warn(LinearModelAnalyzer.EXCLUDE_WARNING);
                    continue;
                }

                List<BioMaterial> bioMaterials = ExperimentalDesignUtils
                        .getOrderedSamples(subsets.get(subsetFactorValue), factors);

                /*
                 * make a EESubSet
                 */
                ExpressionExperimentSubSet eeSubSet = ExpressionExperimentSubSet.Factory.newInstance();
                eeSubSet.setSourceExperiment(expressionExperiment);
                eeSubSet.setName("Subset for " + subsetFactorValue);
                Collection<BioAssay> bioAssays = new HashSet<>();
                for (BioMaterial bm : bioMaterials) {
                    bioAssays.addAll(bm.getBioAssaysUsedIn());
                }
                eeSubSet.getBioAssays().addAll(bioAssays);

                Collection<ExperimentalFactor> subsetFactors = this
                        .fixFactorsForSubset(subsets.get(subsetFactorValue), eeSubSet, factors);

                DifferentialExpressionAnalysisConfig subsetConfig = this.fixConfigForSubset(factors, config,
                        subsetFactorValue);

                if (subsetFactors.isEmpty()) {
                    LinearModelAnalyzer.log.warn(
                            "Experimental design is not valid for subset: " + subsetFactorValue + "; skipping");
                    continue;
                }

                /*
                 * Run analysis on the subset.
                 */
                DifferentialExpressionAnalysis analysis = this.doAnalysis(eeSubSet, subsetConfig,
                        subsets.get(subsetFactorValue), bioMaterials, new ArrayList<>(subsetFactors),
                        subsetFactorValue);

                if (analysis == null) {
                    LinearModelAnalyzer.log
                            .warn("No analysis results were obtained for subset: " + subsetFactorValue);
                    continue;
                }

                results.add(analysis);

            }

        } else {

            /*
             * Analyze the whole thing as one
             */
            DifferentialExpressionAnalysis analysis = this.doAnalysis(expressionExperiment, config, dmatrix,
                    samplesUsed, factors, null);
            if (analysis == null) {
                LinearModelAnalyzer.log.warn("No analysis results were obtained");
            } else {
                results.add(analysis);
            }
        }
        return results;

    }

    /*
     * FIXME Really we should compute library sizes from the data _after_ filtering. But because filtering removes lowly
     * expressed genes, and we don't filter very much, the effect on the total should be minor
     */
    public DoubleMatrix1D getLibrarySizes(DifferentialExpressionAnalysisConfig config,
            ExpressionDataDoubleMatrix dmatrix) {

        DoubleMatrix1D librarySize = new DenseDoubleMatrix1D(dmatrix.columns());
        if (config.getUseWeights()) {
            for (int i = 0; i < dmatrix.columns(); i++) {
                Collection<BioAssay> bas = dmatrix.getBioAssaysForColumn(i);
                assert bas.size() == 1;
                BioAssay ba = bas.iterator().next();
                Integer sequenceReadCount = ba.getSequenceReadCount();
                if (sequenceReadCount == null) {
                    throw new IllegalStateException("stored read count was null for " + ba);
                }
                if (sequenceReadCount <= 0) {
                    throw new IllegalStateException("stored read count was non-positive for " + ba);
                }
                librarySize.set(i, sequenceReadCount);
            }
        }
        return librarySize;
    }

    /**
     * @return boolean true if we need the intercept.
     */
    private boolean checkIfNeedToTreatAsIntercept(ExperimentalFactor experimentalFactor,
            QuantitationType quantitationType) {
        if (experimentalFactor.getFactorValues().size() == 1) {
            if (quantitationType.getIsRatio()) {
                return true;
            }
            throw new IllegalArgumentException(
                    "Cannot deal with constant factors unless the data are ratiometric non-reference design");
        }
        return false;
    }

    /**
     * Create files that can be used in R to check the results.
     */
    private void outputForDebugging(ExpressionDataDoubleMatrix dmatrix,
            ObjectMatrix<String, String, Object> designMatrix) {
        MatrixWriter mw = new MatrixWriter();
        try (FileWriter writer = new FileWriter(File.createTempFile("data.", ".txt"));
                FileWriter out = new FileWriter(File.createTempFile("design.", ".txt"))) {

            mw.write(writer, dmatrix, null, true, false);

            ubic.basecode.io.writer.MatrixWriter<String, String> dem = new ubic.basecode.io.writer.MatrixWriter<>(
                    out);
            dem.writeMatrix(designMatrix, true);

        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * Populate the interactionFactorLists and label2Factors
     *
     * @param interactionFactorLists gets populated
     */
    private void buildModelFormula(final DifferentialExpressionAnalysisConfig config,
            final Map<String, Collection<ExperimentalFactor>> label2Factors,
            final List<String[]> interactionFactorLists) {

        /*
         * Add interaction terms
         */
        boolean hasInteractionTerms = config.getInteractionsToInclude() != null
                && !config.getInteractionsToInclude().isEmpty();

        if (hasInteractionTerms) {
            for (Collection<ExperimentalFactor> interactionTerms : config.getInteractionsToInclude()) {

                List<String> interactionFactorNames = new ArrayList<>();
                for (ExperimentalFactor factor : interactionTerms) {
                    interactionFactorNames.add(ExperimentalDesignUtils.nameForR(factor));
                }

                interactionFactorLists.add(interactionFactorNames.toArray(new String[] {}));

                // In the ANOVA table.
                String factTableLabel = StringUtils.join(interactionFactorNames, ":");
                label2Factors.put(factTableLabel, new HashSet<ExperimentalFactor>());
                label2Factors.get(factTableLabel).addAll(interactionTerms);
            }
        }
    }

    private int countNumberOfGenesNotSeenAlready(Collection<Gene> genesForProbe, Collection<Gene> seenGenes) {
        int numGenes = 0;
        if (genesForProbe != null) {
            for (Gene g : genesForProbe) {
                if (seenGenes.contains(g)) {
                    continue;
                }
                numGenes++;
            }
            seenGenes.addAll(genesForProbe);
        }

        return numGenes;
    }

    /**
     * @param bioAssaySet       source data, could be a SubSet
     * @param dmatrix           data (for the subset, if it's a subset)
     * @param samplesUsed       analyzed
     * @param factors           included in the model
     * @param subsetFactorValue null unless analyzing a subset (only used for book-keeping)
     * @return analysis, or null if there was a problem.
     */
    private DifferentialExpressionAnalysis doAnalysis(BioAssaySet bioAssaySet,
            DifferentialExpressionAnalysisConfig config, ExpressionDataDoubleMatrix dmatrix,
            List<BioMaterial> samplesUsed, List<ExperimentalFactor> factors, FactorValue subsetFactorValue) {

        // We may want to change this to fall back to running normally, though the real fix is to just finish the ebayes implementation.
        if (config.getModerateStatistics() && dmatrix.hasMissingValues()) {
            throw new UnsupportedOperationException(
                    "Ebayes cannot be used when there are values missing in the data");
        }

        if (factors.isEmpty()) {
            LinearModelAnalyzer.log.error("Must provide at least one factor");
            return null;
        }

        if (samplesUsed.size() <= factors.size()) {
            LinearModelAnalyzer.log.error("Must have more samples than factors");
            return null;
        }

        final Map<String, Collection<ExperimentalFactor>> label2Factors = this.getRNames(factors);

        Map<ExperimentalFactor, FactorValue> baselineConditions = ExperimentalDesignUtils
                .getBaselineConditions(samplesUsed, factors);

        this.dropIncompleteFactors(samplesUsed, factors, baselineConditions);

        if (factors.isEmpty()) {
            LinearModelAnalyzer.log
                    .error("Must provide at least one factor; they were all removed due to incomplete values");
            return null;
        }

        QuantitationType quantitationType = dmatrix.getQuantitationTypes().iterator().next();

        ExperimentalFactor interceptFactor = this.determineInterceptFactor(factors, quantitationType);

        /*
         * Build our factor terms, with interactions handled specially
         */
        List<String[]> interactionFactorLists = new ArrayList<>();
        ObjectMatrix<String, String, Object> designMatrix = ExperimentalDesignUtils.buildDesignMatrix(factors,
                samplesUsed, baselineConditions);

        config.setBaseLineFactorValues(baselineConditions);

        boolean oneSampleTTest = interceptFactor != null && factors.size() == 1;
        if (!oneSampleTTest) {
            this.buildModelFormula(config, label2Factors, interactionFactorLists);
        }

        /*
         * FIXME: remove columns that are marked as outliers.
         */
        dmatrix = ExpressionDataDoubleMatrixUtil.filterAndLog2Transform(quantitationType, dmatrix);
        DoubleMatrix<CompositeSequence, BioMaterial> namedMatrix = dmatrix.getMatrix();

        DoubleMatrix1D librarySize = getLibrarySizes(config, dmatrix);

        if (LinearModelAnalyzer.log.isDebugEnabled())
            this.outputForDebugging(dmatrix, designMatrix);

        /*
         * PREPARATION FOR 'NATIVE' FITTING
         */
        DoubleMatrix<String, String> sNamedMatrix = LinearModelAnalyzer.makeDataMatrix(designMatrix, namedMatrix);
        DesignMatrix properDesignMatrix = this.makeDesignMatrix(designMatrix, interactionFactorLists,
                baselineConditions);

        /*
         * Run the analysis NOTE this can be simplified if we strip out R code.
         */
        final Map<String, LinearModelSummary> rawResults = this.runAnalysis(namedMatrix, sNamedMatrix,
                properDesignMatrix, librarySize, config);

        if (rawResults.size() == 0) {
            LinearModelAnalyzer.log.error("Got no results from the analysis");
            return null;
        }

        /*
         * Initialize data structures we need to hold results.
         */

        // this used to be a Set, but a List is much faster.
        Map<String, List<DifferentialExpressionAnalysisResult>> resultLists = new HashMap<>();
        Map<String, List<Double>> pvaluesForQvalue = new HashMap<>();

        for (String factorName : label2Factors.keySet()) {
            resultLists.put(factorName, new ArrayList<DifferentialExpressionAnalysisResult>());
            pvaluesForQvalue.put(factorName, new ArrayList<Double>());
        }

        for (String[] fs : interactionFactorLists) {
            String intF = StringUtils.join(fs, ":");
            resultLists.put(intF, new ArrayList<DifferentialExpressionAnalysisResult>());
            pvaluesForQvalue.put(intF, new ArrayList<Double>());
        }

        if (pvaluesForQvalue.isEmpty()) {
            LinearModelAnalyzer.log.warn("No results were obtained for the current stage of analysis.");
            return null;
        }

        /*
         * Create result objects for each model fit. Keeping things in order is important.
         */
        final Transformer rowNameExtractor = TransformerUtils.invokerTransformer("getId");
        boolean warned = false;
        int notUsable = 0;
        int processed = 0;
        for (CompositeSequence el : namedMatrix.getRowNames()) {

            if (++processed % 15000 == 0) {
                LinearModelAnalyzer.log.info("Processed results for " + processed + " elements ...");
            }

            LinearModelSummary lm = rawResults.get(rowNameExtractor.transform(el).toString());

            if (LinearModelAnalyzer.log.isDebugEnabled())
                LinearModelAnalyzer.log.debug(el.getName() + "\n" + lm);

            if (lm == null) {
                if (!warned) {
                    LinearModelAnalyzer.log.warn("No result for " + el + ", further warnings suppressed");
                    warned = true;
                }
                notUsable++;
                continue;
            }

            for (String factorName : label2Factors.keySet()) {

                if (!pvaluesForQvalue.containsKey(factorName)) {
                    // was dropped.
                    continue;
                }

                Double overallPValue;
                DifferentialExpressionAnalysisResult probeAnalysisResult = DifferentialExpressionAnalysisResult.Factory
                        .newInstance();
                probeAnalysisResult.setProbe(el);

                if (lm.getCoefficients() == null) {
                    // probeAnalysisResult.setPvalue( null );
                    // pvaluesForQvalue.get( factorName ).add( overallPValue );
                    // resultLists.get( factorName ).add( probeAnalysisResult );
                    notUsable++;
                    continue;
                }

                Collection<ExperimentalFactor> factorsForName = label2Factors.get(factorName);

                if (factorsForName.size() > 1) {
                    /*
                     * Interactions
                     */
                    if (factorsForName.size() > 2) {
                        LinearModelAnalyzer.log.error("Handling more than two-way interactions is not implemented");
                        return null;
                    }

                    assert factorName.contains(":");
                    String[] factorNames = StringUtils.split(factorName, ":");
                    assert factorNames.length == factorsForName.size();
                    overallPValue = lm.getInteractionEffectP(factorNames);

                    if (overallPValue != null && !Double.isNaN(overallPValue)) {

                        Map<String, Double> interactionContrastTStats = lm.getContrastTStats(factorName);
                        Map<String, Double> interactionContrastCoeffs = lm.getContrastCoefficients(factorName);
                        Map<String, Double> interactionContrastPValues = lm.getContrastPValues(factorName);

                        for (String term : interactionContrastPValues.keySet()) {
                            Double contrastPvalue = interactionContrastPValues.get(term);

                            this.makeContrast(probeAnalysisResult, factorsForName, term, factorName, contrastPvalue,
                                    interactionContrastTStats, interactionContrastCoeffs);

                        }
                    } else {
                        if (!warned) {
                            LinearModelAnalyzer.log.warn("Interaction could not be computed for " + el
                                    + ", further warnings suppressed");
                            warned = true;
                        }

                        if (LinearModelAnalyzer.log.isDebugEnabled())
                            LinearModelAnalyzer.log.debug("Interaction could not be computed for " + el
                                    + ", further warnings suppressed");

                        notUsable++; // will over count?
                        continue;
                    }

                } else {

                    /*
                     * Main effect
                     */
                    assert factorsForName.size() == 1;
                    ExperimentalFactor experimentalFactor = factorsForName.iterator().next();

                    if (factorsForName.size() == 1 && experimentalFactor.equals(interceptFactor)) {
                        overallPValue = lm.getInterceptP();
                    } else {
                        overallPValue = lm.getMainEffectP(factorName);
                    }

                    /*
                     * Add contrasts unless overall pvalue is NaN
                     */
                    if (overallPValue != null && !Double.isNaN(overallPValue)) {

                        Map<String, Double> mainEffectContrastTStats = lm.getContrastTStats(factorName);
                        Map<String, Double> mainEffectContrastPvalues = lm.getContrastPValues(factorName);
                        Map<String, Double> mainEffectContrastCoeffs = lm.getContrastCoefficients(factorName);

                        for (String term : mainEffectContrastPvalues.keySet()) {

                            Double contrastPvalue = mainEffectContrastPvalues.get(term);

                            this.makeContrast(probeAnalysisResult, factorsForName, term, factorName, contrastPvalue,
                                    mainEffectContrastTStats, mainEffectContrastCoeffs);

                        }
                    } else {
                        if (!warned) {
                            LinearModelAnalyzer.log.warn("ANOVA could not be done for " + experimentalFactor
                                    + " on " + el + ", further warnings suppressed");
                            warned = true;
                        }

                        if (LinearModelAnalyzer.log.isDebugEnabled())
                            LinearModelAnalyzer.log
                                    .debug("ANOVA could not be done for " + experimentalFactor + " on " + el);

                        notUsable++; // will over count?
                        continue;
                    }
                }

                assert !Double.isNaN(overallPValue) : "We should not be keeping non-number pvalues (null or NaNs)";

                probeAnalysisResult.setPvalue(this.nan2Null(overallPValue));
                pvaluesForQvalue.get(factorName).add(overallPValue);
                resultLists.get(factorName).add(probeAnalysisResult);
            } // over terms

        } // over probes

        if (notUsable > 0) {
            LinearModelAnalyzer.log
                    .info(notUsable + " elements or results were not usable - model could not be fit, etc.");
        }

        this.getRanksAndQvalues(resultLists, pvaluesForQvalue);

        DifferentialExpressionAnalysis expressionAnalysis = this.makeAnalysisEntity(bioAssaySet, config,
                label2Factors, baselineConditions, interceptFactor, interactionFactorLists, oneSampleTTest,
                resultLists, subsetFactorValue);

        LinearModelAnalyzer.log.info("Analysis processing phase done ...");

        return expressionAnalysis;
    }

    private void dropIncompleteFactors(List<BioMaterial> samplesUsed, List<ExperimentalFactor> factors,
            Map<ExperimentalFactor, FactorValue> baselineConditions) {
        Collection<ExperimentalFactor> toDrop = new HashSet<>();
        for (ExperimentalFactor f : factors) {
            if (!ExperimentalDesignUtils.isComplete(f, samplesUsed, baselineConditions)) {
                toDrop.add(f);
                LinearModelAnalyzer.log.info("Dropping " + f + ", missing values");
            }
        }
        factors.removeAll(toDrop);

    }

    /**
     * Remove all configurations that have to do with factors that aren't in the selected factors.
     *
     * @param factors the factors that will be included
     * @return an updated config; the baselines are cleared; subset is cleared; interactions are only kept if they only
     * involve the given factors.
     */
    private DifferentialExpressionAnalysisConfig fixConfigForSubset(List<ExperimentalFactor> factors,
            DifferentialExpressionAnalysisConfig config, FactorValue subsetFactorValue) {

        DifferentialExpressionAnalysisConfig newConfig = new DifferentialExpressionAnalysisConfig();

        newConfig.setBaseLineFactorValues(null);

        if (!config.getInteractionsToInclude().isEmpty()) {
            Collection<Collection<ExperimentalFactor>> newInteractionsToInclude = new HashSet<>();
            for (Collection<ExperimentalFactor> interactors : config.getInteractionsToInclude()) {
                if (factors.containsAll(interactors)) {
                    newInteractionsToInclude.add(interactors);
                }
            }

            newConfig.setInteractionsToInclude(newInteractionsToInclude);
        }

        newConfig.setSubsetFactor(null);
        newConfig.setSubsetFactorValue(subsetFactorValue);
        newConfig.setFactorsToInclude(factors);

        return newConfig;

    }

    /**
     * Remove factors which are no longer usable, based on the subset.
     */
    private Collection<ExperimentalFactor> fixFactorsForSubset(ExpressionDataDoubleMatrix dmatrix,
            ExpressionExperimentSubSet eesubSet, List<ExperimentalFactor> factors) {

        List<ExperimentalFactor> result = new ArrayList<>();

        QuantitationType quantitationType = dmatrix.getQuantitationTypes().iterator().next();
        ExperimentalFactor interceptFactor = this.determineInterceptFactor(factors, quantitationType);

        /*
         * Remove any constant factors, unless they are that intercept.
         */
        for (ExperimentalFactor f : factors) {

            if (f.getType().equals(FactorType.CONTINUOUS)) {
                result.add(f);
            } else if (interceptFactor != null && interceptFactor.equals(f)) {
                result.add(f);
            } else {

                Collection<FactorValue> levels = new HashSet<>();
                LinearModelAnalyzer.populateFactorValuesFromBASet(eesubSet, f, levels);

                if (levels.size() > 1) {
                    result.add(f);
                } else {
                    LinearModelAnalyzer.log.info("Dropping " + f + " from subset");
                }

            }

        }

        return result;

    }

    /**
     * Needed to compute the number of genes tested/detected.
     */
    private Map<CompositeSequence, Collection<Gene>> getProbeToGeneMap(
            Map<String, ? extends Collection<DifferentialExpressionAnalysisResult>> resultLists) {
        Map<CompositeSequence, Collection<Gene>> result = new HashMap<>();

        for (Collection<DifferentialExpressionAnalysisResult> resultList : resultLists.values()) {
            for (DifferentialExpressionAnalysisResult d : resultList) {
                CompositeSequence probe = d.getProbe();
                result.put(probe, new HashSet<Gene>());
            }
        }

        // testing environment, etc.
        if (result.isEmpty()) {
            return new HashMap<>();
        }

        return compositeSequenceService.getGenes(result.keySet());

    }

    /**
     * Fill in the ranks and qvalues in the results.
     *
     * @param pvaluesForQvalue Map of factorName to results.
     */
    private void getRanksAndQvalues(
            Map<String, ? extends Collection<DifferentialExpressionAnalysisResult>> resultLists,
            Map<String, List<Double>> pvaluesForQvalue) {
        /*
         * qvalues and ranks, requires second pass over the result objects.
         */
        for (String fName : pvaluesForQvalue.keySet()) {
            Collection<Double> pvals = pvaluesForQvalue.get(fName);

            if (pvals.isEmpty()) {
                LinearModelAnalyzer.log.warn("No pvalues for " + fName + ", ignoring.");
                continue;
            }
            LinearModelAnalyzer.log.info(pvals.size() + " pvalues for " + fName);

            Double[] pvalArray = pvals.toArray(new Double[] {});
            for (Double aPvalArray : pvalArray) {

                assert aPvalArray != null;
                assert !Double.isNaN(aPvalArray);

            }

            double[] ranks = super.computeRanks(ArrayUtils.toPrimitive(pvalArray));

            if (ranks == null) {
                LinearModelAnalyzer.log.error("Ranks could not be computed " + fName);
                continue;
            }

            assert pvalArray.length == resultLists.get(fName).size() : pvalArray.length + " != "
                    + resultLists.get(fName).size();

            assert pvalArray.length == ranks.length;

            double[] qvalues = super.benjaminiHochberg(pvalArray);

            assert qvalues.length == resultLists.get(fName).size();

            int i = 0;
            for (DifferentialExpressionAnalysisResult pr : resultLists.get(fName)) {
                pr.setCorrectedPvalue(this.nan2Null(qvalues[i]));
                pr.setRank(this.nan2Null(ranks[i]));
                i++;
            }
        }
    }

    private Map<String, Collection<ExperimentalFactor>> getRNames(List<ExperimentalFactor> factors) {
        final Map<String, Collection<ExperimentalFactor>> label2Factors = new LinkedHashMap<>();
        for (ExperimentalFactor experimentalFactor : factors) {
            label2Factors.put(ExperimentalDesignUtils.nameForR(experimentalFactor),
                    new HashSet<ExperimentalFactor>());
            label2Factors.get(ExperimentalDesignUtils.nameForR(experimentalFactor)).add(experimentalFactor);
        }
        return label2Factors;
    }

    /**
     * @return Analysis (no-npersistent)
     */
    private DifferentialExpressionAnalysis makeAnalysisEntity(BioAssaySet bioAssaySet,
            DifferentialExpressionAnalysisConfig config,
            final Map<String, Collection<ExperimentalFactor>> label2Factors,
            Map<ExperimentalFactor, FactorValue> baselineConditions, ExperimentalFactor interceptFactor,
            List<String[]> interactionFactorLists, boolean oneSampleTtest,
            Map<String, ? extends Collection<DifferentialExpressionAnalysisResult>> resultLists,
            FactorValue subsetFactorValue) {

        DifferentialExpressionAnalysis expressionAnalysis = super.initAnalysisEntity(bioAssaySet, config);

        /*
         * Complete analysis config
         */
        expressionAnalysis.setName(this.getClass().getSimpleName());
        expressionAnalysis.setDescription("Linear model with " + config.getFactorsToInclude().size() + " factors"
                + (interceptFactor == null ? "" : " with intercept treated as factor")
                + (interactionFactorLists.isEmpty() ? "" : " with interaction") + (subsetFactorValue == null ? ""
                        : "Using subset " + bioAssaySet + " subset value= " + subsetFactorValue));
        expressionAnalysis.setSubsetFactorValue(subsetFactorValue);

        Collection<ExpressionAnalysisResultSet> resultSets = this.makeResultSets(label2Factors, baselineConditions,
                oneSampleTtest, expressionAnalysis, resultLists);

        expressionAnalysis.setResultSets(resultSets);

        return expressionAnalysis;
    }

    /**
     * Add a contrast to the given result.
     */
    private void makeContrast(DifferentialExpressionAnalysisResult probeAnalysisResult,
            Collection<ExperimentalFactor> experimentalFactors, String term, String factorName,
            Double contrastPvalue, Map<String, Double> tstats, Map<String, Double> coeffs) {

        assert experimentalFactors.size() == 1 || experimentalFactors.size() == 2;

        Double contrastTstat = tstats.get(term);
        Double coefficient = coeffs.get(term);

        // no reason to store this. Note: Tiny coefficient could also be treated as a missing value, but hopefully such
        // situations would have been dealt with at a lower level.
        if (coefficient == null || contrastTstat == null) {
            return;
        }

        ContrastResult contrast = ContrastResult.Factory.newInstance();
        contrast.setPvalue(this.nan2Null(contrastPvalue));
        contrast.setTstat(this.nan2Null(contrastTstat));
        contrast.setCoefficient(this.nan2Null(coefficient));

        List<ExperimentalFactor> factorList = new ArrayList<>(experimentalFactors);
        boolean isInteraction = false;
        if (factorList.size() == 2) {
            isInteraction = true;
        }

        /*
         * The coefficient can be treated as fold-change if the data are log-transformed. This is because the
         * coefficient in the contrast is the (fitted;estimated) difference between the means, and log(x) - log(y) =
         * log(x/y). Limma uses this same trick.
         */
        contrast.setLogFoldChange(this.nan2Null(coefficient));

        if (term.contains(ExperimentalDesignUtils.FACTOR_VALUE_RNAME_PREFIX)) {
            // otherwise, it's continuous, and
            // we don't put in a
            // factorvalue.

            String[] terms = new String[2];
            String[] factorNames = new String[2];
            if (term.contains(":")) {
                terms = StringUtils.split(term, ":");
                factorNames = StringUtils.split(factorName, ":");
            } else {
                terms[0] = term;
                factorNames[0] = factorName;
            }

            String firstTerm = terms[0];
            String secondTerm = terms[1];

            Long factorValueId;

            try {
                factorValueId = Long.parseLong(
                        firstTerm.replace(factorNames[0] + ExperimentalDesignUtils.FACTOR_VALUE_RNAME_PREFIX, ""));
            } catch (NumberFormatException e) {
                throw new RuntimeException("Failed to parse: " + firstTerm + " into a factorvalue id");
            }

            for (ExperimentalFactor f : factorList) {
                for (FactorValue fv : f.getFactorValues()) {
                    if (fv.getId().equals(factorValueId)) {
                        contrast.setFactorValue(fv);
                        break;
                    }
                }
            }

            assert contrast.getFactorValue() != null;

            if (isInteraction) {
                LinearModelAnalyzer.log.debug("Interaction term");
                assert secondTerm != null;

                try {
                    factorValueId = Long.parseLong(secondTerm
                            .replace(factorNames[1] + ExperimentalDesignUtils.FACTOR_VALUE_RNAME_PREFIX, ""));
                } catch (NumberFormatException e) {
                    throw new RuntimeException("Failed to parse: " + secondTerm + " into a factorvalue id");
                }

                for (ExperimentalFactor f : factorList) {
                    for (FactorValue fv : f.getFactorValues()) {
                        if (fv.getId().equals(factorValueId)) {
                            contrast.setSecondFactorValue(fv);
                            break;
                        }
                    }
                }

                if (contrast.getSecondFactorValue() == null) {
                    throw new IllegalStateException("Failed to get interaction contrast second factorvalue");
                }
            }

            if (contrast.getFactorValue() == null) {
                throw new IllegalStateException("Failed to get contrast factorvalue");
            }

            if (contrast.getSecondFactorValue() != null
                    && contrast.getSecondFactorValue().equals(contrast.getFactorValue())) {
                throw new IllegalStateException(
                        "Contrast for interactions must be for two different factor values, got the same one twice");
            }
        }

        probeAnalysisResult.getContrasts().add(contrast);

    }

    private DesignMatrix makeDesignMatrix(ObjectMatrix<String, String, Object> designMatrix,
            List<String[]> interactionFactorLists, Map<ExperimentalFactor, FactorValue> baselineConditions) {
        /*
         * Determine the factors and interactions to include.
         */
        DesignMatrix properDesignMatrix = new DesignMatrix(designMatrix, true);

        if (!(interactionFactorLists == null) && !interactionFactorLists.isEmpty()) {
            for (String[] in : interactionFactorLists) {
                // we actually only support (tested etc.) one interaction at a time, but this should actually work for
                // multiple
                properDesignMatrix.addInteraction(in);
            }
        }

        for (ExperimentalFactor ef : baselineConditions.keySet()) {
            if (ExperimentalDesignUtils.isContinuous(ef)) {
                continue;
            }
            String factorName = ExperimentalDesignUtils.nameForR(ef);
            String baselineFactorValue = ExperimentalDesignUtils.nameForR(baselineConditions.get(ef), true);

            /*
             * If this is a subset, it is possible the baseline chosen is not eligible for the subset.
             */
            LinearModelAnalyzer.log.info(ef);

            assert baselineConditions.get(ef).getExperimentalFactor().equals(ef) : baselineConditions.get(ef)
                    + " is not a value of " + ef;
            properDesignMatrix.setBaseline(factorName, baselineFactorValue);
        }
        return properDesignMatrix;
    }

    private Collection<ExpressionAnalysisResultSet> makeResultSets(
            final Map<String, Collection<ExperimentalFactor>> label2Factors,
            Map<ExperimentalFactor, FactorValue> baselineConditions, boolean oneSampleTtest,
            DifferentialExpressionAnalysis expressionAnalysis,
            Map<String, ? extends Collection<DifferentialExpressionAnalysisResult>> resultLists) {

        Map<CompositeSequence, Collection<Gene>> probeToGeneMap = this.getProbeToGeneMap(resultLists);

        /*
         * Result sets
         */
        LinearModelAnalyzer.log.info("Processing " + resultLists.size() + " resultSets");
        // StopWatch timer = new StopWatch();
        // timer.start();
        Collection<ExpressionAnalysisResultSet> resultSets = new HashSet<>();
        for (String fName : resultLists.keySet()) {
            Collection<DifferentialExpressionAnalysisResult> results = resultLists.get(fName);

            Collection<ExperimentalFactor> factorsUsed = new HashSet<>(label2Factors.get(fName));

            FactorValue baselineGroup = null;
            if (!oneSampleTtest && factorsUsed.size() == 1 /* not interaction */ ) {
                ExperimentalFactor factor = factorsUsed.iterator().next();
                assert baselineConditions.containsKey(factor);
                baselineGroup = baselineConditions.get(factor);
            }

            Collection<HitListSize> hitListSizes = this.computeHitListSizes(results, probeToGeneMap);

            int numberOfProbesTested = results.size();
            int numberOfGenesTested = this.getNumberOfGenesTested(results, probeToGeneMap);

            // make List into Set
            ExpressionAnalysisResultSet resultSet = ExpressionAnalysisResultSet.Factory.newInstance(factorsUsed,
                    numberOfProbesTested, numberOfGenesTested, baselineGroup, new HashSet<>(results),
                    expressionAnalysis, null, hitListSizes);
            resultSets.add(resultSet);

            LinearModelAnalyzer.log.info("Finished with result set for " + fName);

        }
        return resultSets;
    }

    private Map<FactorValue, ExpressionDataDoubleMatrix> makeSubSets(DifferentialExpressionAnalysisConfig config,
            ExpressionDataDoubleMatrix dmatrix, List<BioMaterial> samplesUsed, ExperimentalFactor subsetFactor) {
        if (subsetFactor.getType().equals(FactorType.CONTINUOUS)) {
            throw new IllegalArgumentException("You cannot subset on a continuous factor (has a Measurement)");
        }

        if (config.getFactorsToInclude().contains(subsetFactor)) {
            throw new IllegalArgumentException(
                    "You cannot analyze a factor and use it for subsetting at the same time.");
        }

        Map<FactorValue, List<BioMaterial>> subSetSamples = new HashMap<>();
        for (FactorValue fv : subsetFactor.getFactorValues()) {
            assert fv.getMeasurement() == null;
            subSetSamples.put(fv, new ArrayList<BioMaterial>());
        }

        for (BioMaterial sample : samplesUsed) {
            boolean ok = false;
            for (FactorValue fv : sample.getFactorValues()) {
                if (fv.getExperimentalFactor().equals(subsetFactor)) {
                    subSetSamples.get(fv).add(sample);
                    ok = true;
                    break;
                }
            }
            if (!ok) {
                throw new IllegalArgumentException(
                        "Cannot subset on a factor unless each sample has a value for it. Missing value for: "
                                + sample + " " + sample.getBioAssaysUsedIn());
            }
        }

        Map<FactorValue, ExpressionDataDoubleMatrix> subMatrices = new HashMap<>();
        for (FactorValue fv : subSetSamples.keySet()) {
            List<BioMaterial> samplesInSubset = subSetSamples.get(fv);

            if (samplesInSubset.isEmpty()) {
                throw new IllegalArgumentException("The subset was empty for fv: " + fv);
            }
            assert samplesInSubset.size() < samplesUsed.size();

            samplesInSubset = ExpressionDataMatrixColumnSort.orderByExperimentalDesign(samplesInSubset,
                    config.getFactorsToInclude());
            ExpressionDataDoubleMatrix subMatrix = new ExpressionDataDoubleMatrix(dmatrix, samplesInSubset,
                    LinearModelAnalyzer.createBADMap(samplesInSubset));
            subMatrices.put(fv, subMatrix);
        }

        return subMatrices;

    }

    /**
     * Important bit. Run the analysis
     *
     * @return results
     */
    private Map<String, LinearModelSummary> runAnalysis(
            final DoubleMatrix<CompositeSequence, BioMaterial> namedMatrix,
            final DoubleMatrix<String, String> sNamedMatrix, DesignMatrix designMatrix,
            final DoubleMatrix1D librarySize, final DifferentialExpressionAnalysisConfig config) {

        final Map<String, LinearModelSummary> rawResults = new ConcurrentHashMap<>();

        Future<?> f = this.runAnalysisFuture(designMatrix, sNamedMatrix, rawResults, librarySize, config);

        StopWatch timer = new StopWatch();
        timer.start();
        long lastTime = 0;

        // this analysis should take just 10 or 20 seconds for most data sets.
        double MAX_ANALYSIS_TIME = 60 * 1000 * 30; // 30 minutes.
        double updateIntervalMillis = 60 * 1000;// 1 minute
        while (!f.isDone()) {
            try {
                Thread.sleep(1000);

                if (timer.getTime() - lastTime > updateIntervalMillis) {
                    LinearModelAnalyzer.log.info(String.format("Analysis running, %.1f minutes elapsed ...",
                            timer.getTime() / 60000.00));
                    lastTime = timer.getTime();
                }

            } catch (InterruptedException e) {
                LinearModelAnalyzer.log.warn("Analysis interrupted!");
                return rawResults;
            }

            if (timer.getTime() > MAX_ANALYSIS_TIME) {
                LinearModelAnalyzer.log
                        .error("Analysis is taking too long, something bad must have happened; cancelling");
                f.cancel(true);
                throw new RuntimeException("Analysis was taking too long, it was cancelled");
            }
        }

        if (timer.getTime() > updateIntervalMillis) {
            LinearModelAnalyzer.log
                    .info(String.format("Analysis finished in %.1f minutes.", timer.getTime() / 60000.00));
        }

        try {
            f.get();
        } catch (InterruptedException e) {
            LinearModelAnalyzer.log.warn("Job was interrupted");
            return rawResults;
        } catch (ExecutionException e) {
            throw new RuntimeException(e);
        }

        assert rawResults.size() == namedMatrix.rows() : "expected " + namedMatrix.rows() + " results, got "
                + rawResults.size();
        return rawResults;
    }

    /**
     * Linear models solved
     */
    private Future<?> runAnalysisFuture(final DesignMatrix designMatrix, final DoubleMatrix<String, String> data,
            final Map<String, LinearModelSummary> rawResults, final DoubleMatrix1D librarySize,
            final DifferentialExpressionAnalysisConfig config) {
        ExecutorService service = Executors.newSingleThreadExecutor();

        Future<?> f = service.submit(new Runnable() {
            @Override
            public void run() {
                StopWatch timer = new StopWatch();
                timer.start();
                LeastSquaresFit fit;
                if (config.getUseWeights()) {
                    MeanVarianceEstimator mv = new MeanVarianceEstimator(designMatrix, data, librarySize);
                    LinearModelAnalyzer.log
                            .info("Model weights from mean-variance model: " + timer.getTime() + "ms");
                    timer.reset();
                    timer.start();
                    fit = new LeastSquaresFit(designMatrix, data, mv.getWeights());
                } else {
                    fit = new LeastSquaresFit(designMatrix, data);
                }
                LinearModelAnalyzer.log.info("Model fit data matrix " + data.rows() + " x " + data.columns() + ": "
                        + timer.getTime() + "ms");
                timer.reset();

                timer.start();
                if (config.getModerateStatistics()) {
                    if (fit.isHasMissing()) {
                        // not implemented yet.
                        throw new UnsupportedOperationException(
                                "Ebayes cannot be run as there are missing values in the data");
                    }
                    ModeratedTstat.ebayes(fit);
                    LinearModelAnalyzer.log.info("Moderate test statistics: " + timer.getTime() + "ms");

                }

                timer.reset();

                timer.start();
                Map<String, LinearModelSummary> res = fit.summarizeByKeys(true);
                LinearModelAnalyzer.log.info("Model summarize/ANOVA: " + timer.getTime() + "ms");
                rawResults.putAll(res);
                LinearModelAnalyzer.log.info("Analysis phase done ...");
            }
        });

        service.shutdown();
        return f;
    }
}