ubic.gemma.persistence.service.expression.experiment.GeeqServiceImpl.java Source code

Java tutorial

Introduction

Here is the source code for ubic.gemma.persistence.service.expression.experiment.GeeqServiceImpl.java

Source

/*
 * The gemma project
 *
 * Copyright (c) 2018 University of British Columbia
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

package ubic.gemma.persistence.service.expression.experiment;

import com.google.common.base.Stopwatch;

import cern.colt.list.DoubleArrayList;
import cern.jet.stat.Descriptive;

import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.math3.stat.StatUtils;
import org.openjena.atlas.logging.Log;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import ubic.basecode.dataStructure.matrix.DoubleMatrix;
import ubic.gemma.core.analysis.preprocess.OutlierDetectionService;
import ubic.gemma.core.analysis.preprocess.batcheffects.BatchEffectDetails;
import ubic.gemma.core.analysis.service.ExpressionDataMatrixService;
import ubic.gemma.core.analysis.util.ExperimentalDesignUtils;
import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix;
import ubic.gemma.model.common.auditAndSecurity.AuditEvent;
import ubic.gemma.model.common.auditAndSecurity.eventType.GeeqEvent;
import ubic.gemma.model.common.description.BibliographicReference;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.arrayDesign.TechnologyType;
import ubic.gemma.model.expression.bioAssay.BioAssay;
import ubic.gemma.model.expression.experiment.*;
import ubic.gemma.model.genome.Taxon;
import ubic.gemma.persistence.service.AbstractVoEnabledService;
import ubic.gemma.persistence.service.analysis.expression.sampleCoexpression.SampleCoexpressionAnalysisService;
import ubic.gemma.persistence.service.common.auditAndSecurity.AuditTrailService;
import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService;
import ubic.gemma.persistence.service.genome.taxon.TaxonService;
import ubic.gemma.persistence.util.EntityUtils;

import java.util.*;
import java.util.concurrent.TimeUnit;

@Service
public class GeeqServiceImpl extends AbstractVoEnabledService<Geeq, GeeqValueObject> implements GeeqService {

    /**
     * If there are fewer than this number of replicates per condition, but more than GEEQ_WORST_REPLICATION_THRESHOLD,
     * a medium score is given for replicates.
     */
    private static final int GEEQ_MEDIUM_REPLICATION_THRESHOLD = 5;

    /**
     * If there are fewer than this number of replicates per condition, the worst score is given for replicates.
     */
    private static final int GEEQ_WORST_REPLICATION_THRESHOLD = 2;

    /**
     * How many factors to look at to determine conditions that have very few replicates. Since we routinely only do
     * differential expression analysis for up to 3 factors, that value makes sense. (batch and continuous factors not
     * included)
     */
    private static final int MAX_EFS_REPLICATE_CHECK = 3;

    private static final String LOG_PREFIX = "|G|E|E|Q| ";
    private static final String ERR_MSG_MISSING_VALS = "Can not calculate missing values: ";
    private static final String ERR_MSG_CORMAT = "Can not create cormat: ";
    private static final String ERR_MSG_CORMAT_MISSING_VALS = "Cormat retrieval failed because of missing missing values for ee id ";
    private static final String ERR_W_MEAN_BAD_ARGS = "Can not calculate weighted arithmetic mean from null or unequal length arrays.";
    private static final String ERR_B_EFFECT_BAD_STATE = "Batch effect scoring in odd state - null batch effect, but batch info should be present."
            + "The same problem will be present for batch confound as well.";

    private static final double P_00 = 0.0;
    public static final double BATCH_EFF_WEAK = GeeqServiceImpl.P_00;
    private static final double P_03 = 0.3;
    private static final double P_05 = 0.5;
    private static final double P_10 = 1.0;
    public static final double BATCH_CONF_NO_HAS = GeeqServiceImpl.P_10;
    public static final double BATCH_EFF_NONE = GeeqServiceImpl.P_10;
    private static final double N_03 = -GeeqServiceImpl.P_03;
    private static final double N_05 = -GeeqServiceImpl.P_05;
    private static final double N_10 = -GeeqServiceImpl.P_10;
    public static final double BATCH_CONF_HAS = GeeqServiceImpl.N_10;
    public static final double BATCH_EFF_STRONG = GeeqServiceImpl.N_10;
    private static final String DE_EXCLUDE = "DE_Exclude";
    private final ExpressionExperimentService expressionExperimentService;
    private final ArrayDesignService arrayDesignService;
    private final ExpressionDataMatrixService expressionDataMatrixService;
    private final OutlierDetectionService outlierDetectionService;
    private final AuditTrailService auditTrailService;
    private final SampleCoexpressionAnalysisService sampleCoexpressionAnalysisService;
    private final TaxonService taxonService;

    @Autowired
    public GeeqServiceImpl(GeeqDao geeqDao, ExpressionExperimentService expressionExperimentService,
            ArrayDesignService arrayDesignService, ExpressionDataMatrixService expressionDataMatrixService,
            OutlierDetectionService outlierDetectionService, AuditTrailService auditTrailService,
            SampleCoexpressionAnalysisService sampleCoexpressionAnalysisService, TaxonService taxonService) {
        super(geeqDao);
        this.expressionExperimentService = expressionExperimentService;
        this.arrayDesignService = arrayDesignService;
        this.expressionDataMatrixService = expressionDataMatrixService;
        this.outlierDetectionService = outlierDetectionService;
        this.auditTrailService = auditTrailService;
        this.sampleCoexpressionAnalysisService = sampleCoexpressionAnalysisService;
        this.taxonService = taxonService;
    }

    @Override
    public void calculateScore(Long eeId, String mode) {
        this.doScoring(eeId, mode);
    }

    @Override
    public void setManualOverrides(Long eeId, GeeqAdminValueObject gqVo) {
        ExpressionExperiment ee = expressionExperimentService.load(eeId);
        ee = expressionExperimentService.thawLiter(ee);
        Geeq gq = ee.getGeeq();

        // Update manual quality score value
        if (gq.getManualQualityScore() != gqVo.getManualQualityScore()) {
            gq.setLastManualOverride(this.createGeeqEvent(ee, "Manual quality score value changed",
                    this.fromTo(gq.getManualQualityScore(), gqVo.getManualQualityScore())));
            gq.setManualQualityScore(gqVo.getManualQualityScore());
        }
        // Update manual quality score override
        if (gq.isManualQualityOverride() != gqVo.isManualQualityOverride()) {
            gq.setLastManualOverride(this.createGeeqEvent(ee, "Manual quality score override changed",
                    this.fromTo(gq.isManualQualityOverride(), gqVo.isManualQualityOverride())));
            gq.setManualQualityOverride(gqVo.isManualQualityOverride());
        }

        // Update manual suitability score value
        if (gq.getManualSuitabilityScore() != gqVo.getManualSuitabilityScore()) {
            gq.setLastManualOverride(this.createGeeqEvent(ee, "Manual suitability score value changed",
                    this.fromTo(gq.getManualSuitabilityScore(), gqVo.getManualSuitabilityScore())));
            gq.setManualSuitabilityScore(gqVo.getManualSuitabilityScore());
        }
        // Update manual suitability score override
        if (gq.isManualSuitabilityOverride() != gqVo.isManualSuitabilityOverride()) {
            gq.setLastManualOverride(this.createGeeqEvent(ee, "Manual suitability score override changed",
                    this.fromTo(gq.isManualSuitabilityOverride(), gqVo.isManualSuitabilityOverride())));
            gq.setManualSuitabilityOverride(gqVo.isManualSuitabilityOverride());
        }

        // Update manual batch confound value
        if (gq.isManualHasBatchConfound() != gqVo.isManualHasBatchConfound()) {
            gq.setLastBatchConfoundChange(this.createGeeqEvent(ee, "Manual batch confound value changed",
                    this.fromTo(gq.isManualHasBatchConfound(), gqVo.isManualHasBatchConfound())));
            gq.setManualHasBatchConfound(gqVo.isManualHasBatchConfound());
        }
        // Update manual batch confound override
        if (gq.isManualBatchConfoundActive() != gqVo.isManualBatchConfoundActive()) {
            gq.setLastBatchConfoundChange(this.createGeeqEvent(ee, "Manual batch confound override changed",
                    this.fromTo(gq.isManualBatchConfoundActive(), gqVo.isManualBatchConfoundActive())));
            gq.setManualBatchConfoundActive(gqVo.isManualBatchConfoundActive());
        }

        // Update manual batch effect strong value
        if (gq.isManualHasStrongBatchEffect() != gqVo.isManualHasStrongBatchEffect()) {
            gq.setLastBatchEffectChange(this.createGeeqEvent(ee, "Manual strong batch effect value changed",
                    this.fromTo(gq.isManualHasStrongBatchEffect(), gqVo.isManualHasStrongBatchEffect())));
            gq.setManualHasStrongBatchEffect(gqVo.isManualHasStrongBatchEffect());
        }
        // Update manual batch effect no value
        if (gq.isManualHasNoBatchEffect() != gqVo.isManualHasNoBatchEffect()) {
            gq.setLastBatchEffectChange(this.createGeeqEvent(ee, "Manual no batch effect value changed",
                    this.fromTo(gq.isManualHasNoBatchEffect(), gqVo.isManualHasNoBatchEffect())));
            gq.setManualHasNoBatchEffect(gqVo.isManualHasNoBatchEffect());
        }
        // Update manual batch effect override
        if (gq.isManualBatchEffectActive() != gqVo.isManualBatchEffectActive()) {
            gq.setLastBatchEffectChange(this.createGeeqEvent(ee, "Manual batch effect override changed",
                    this.fromTo(gq.isManualBatchEffectActive(), gqVo.isManualBatchEffectActive())));
            gq.setManualBatchEffectActive(gqVo.isManualBatchEffectActive());
        }

        this.update(gq);
        Log.info(this.getClass(),
                GeeqServiceImpl.LOG_PREFIX + " Updated manual override settings for ee id " + eeId);
    }

    /**
     * Does all the preparations and calls the appropriate scoring methods.
     *
     * @param eeId the id of experiment to be scored.
     * @param mode the mode of scoring. All will redo all scores, batchEffect and batchConfound will only recalculate
     *             scores relevant to batch effect and batch confound, respectively.
     *             Scoring batch effect and confound is fairly fast, especially compared to the 'all' mode, which goes
     *             through almost all information associated with the experiment, and can therefore be very slow,
     *             depending on the experiment.
     */
    private void doScoring(Long eeId, String mode) {
        ExpressionExperiment ee = expressionExperimentService.load(eeId);

        if (ee == null) {
            return;
        }

        this.ensureEeHasGeeq(ee);
        Geeq gq = ee.getGeeq();

        Stopwatch stopwatch = new Stopwatch();
        stopwatch.start();

        try {
            // Update score values
            switch (mode) {
            case GeeqService.OPT_MODE_ALL:
                Log.info(this.getClass(),
                        GeeqServiceImpl.LOG_PREFIX + " Starting full geeq scoring for ee id " + eeId);
                gq = this.scoreAll(ee);
                break;
            case GeeqService.OPT_MODE_BATCH:
                Log.info(this.getClass(), GeeqServiceImpl.LOG_PREFIX
                        + " Starting batch info, confound and batch effect geeq re-scoring for ee id " + eeId);
                gq = this.scoreOnlyBatchArtifacts(ee);
                break;
            case GeeqService.OPT_MODE_REPS:
                Log.info(this.getClass(),
                        GeeqServiceImpl.LOG_PREFIX + " Starting replicates geeq re-scoring for ee id " + eeId);
                gq = this.scoreOnlyReplicates(ee);
                break;
            case GeeqService.OPT_MODE_PUB:
                Log.info(this.getClass(),
                        GeeqServiceImpl.LOG_PREFIX + " Starting publication geeq re-scoring for ee id " + eeId);
                gq = this.scoreOnlyPublication(ee);
                break;
            default:
                Log.warn(this.getClass(), GeeqServiceImpl.LOG_PREFIX + " Did not recognize the given mode " + mode
                        + " for ee id " + eeId);
            }
            Log.info(this.getClass(), GeeqServiceImpl.LOG_PREFIX + " Finished geeq re-scoring for ee id " + eeId
                    + ", saving results...");
        } catch (Exception e) {
            Log.info(this.getClass(), GeeqServiceImpl.LOG_PREFIX
                    + " Major problem encountered, scoring did not finish for ee id " + eeId + ".");
            e.printStackTrace();
            gq.addOtherIssues(e.getMessage());
        }

        // Recalculate final scores
        gq = this.updateQualityScore(gq);
        gq = this.updateSuitabilityScore(gq);

        // Add note if experiment curation not finished
        if (ee.getCurationDetails().getNeedsAttention()) {
            gq.addOtherIssues("Experiment was not fully curated when the score was calculated.");
        }

        stopwatch.stop();
        gq.setLastRun(this.createGeeqEvent(ee, "Re-ran geeq scoring (mode: " + mode + ")", "Took "
                + stopwatch.elapsedMillis() + "ms.\nUnexpected problems encountered: \n" + gq.getOtherIssues()));

        this.update(gq);
        Log.info(this.getClass(), GeeqServiceImpl.LOG_PREFIX + " took "
                + Math.round(stopwatch.elapsedTime(TimeUnit.SECONDS) / 60.0) + " minutes to process ee id " + eeId);

    }

    private Geeq updateSuitabilityScore(Geeq gq) {
        double[] suitability = gq.getSuitabilityScoreArray();
        double[] weights = gq.getSuitabilityScoreWeightsArray();
        double score = this.getWeightedMean(suitability, weights);
        gq.setDetectedSuitabilityScore(score);
        return gq;
    }

    private Geeq updateQualityScore(Geeq gq) {
        double[] quality = gq.getQualityScoreArray();
        double[] weights = gq.getQualityScoreWeightsArray();
        double score = this.getWeightedMean(quality, weights);
        gq.setDetectedQualityScore(score);
        return gq;
    }

    private Geeq scoreAll(ExpressionExperiment ee) {
        ee = expressionExperimentService.thaw(ee);
        Geeq gq = ee.getGeeq();
        Collection<ArrayDesign> ads = expressionExperimentService.getArrayDesignsUsed(ee);

        // Reset description of scoring problems
        gq.setOtherIssues("");

        // Suitability score calculation
        this.scorePublication(ee, gq);
        this.scorePlatformAmount(ads, gq);
        this.scorePlatformsTechMulti(ads, gq);
        this.scoreAvgPlatformPopularity(ads, gq);
        this.scoreAvgPlatformSize(ads, gq);
        this.scoreSampleSize(ee, gq);
        boolean hasRawData = this.scoreRawData(ee, gq);
        this.scoreMissingValues(ee, gq, hasRawData);

        // Quality score calculation
        DoubleMatrix<BioAssay, BioAssay> cormat = this.getCormat(ee, gq);
        double[] cormatLTri = this.getLowerTriCormat(cormat);

        this.scoreOutliers(gq, cormat);
        this.scoreSampleMeanCorrelation(gq, cormatLTri);
        this.scoreSampleMedianCorrelation(gq, cormatLTri);
        this.scoreSampleCorrelationVariance(gq, cormatLTri);
        this.scorePlatformsTech(ads, gq);
        this.scoreReplicates(ee, gq);
        boolean hasBatchInfo = this.scoreBatchInfo(ee, gq);
        boolean hasBatchConfound = this.scoreBatchConfound(ee, gq, hasBatchInfo);
        this.scoreBatchEffect(ee, gq, hasBatchInfo, hasBatchConfound);

        return gq;
    }

    private Geeq scoreOnlyBatchArtifacts(ExpressionExperiment ee) {
        ee = expressionExperimentService.thawLiter(ee);
        Geeq gq = ee.getGeeq();
        boolean info = this.scoreBatchInfo(ee, gq);
        boolean confound = this.scoreBatchConfound(ee, gq, info);
        this.scoreBatchEffect(ee, gq, info, confound);
        return gq;
    }

    private Geeq scoreOnlyReplicates(ExpressionExperiment ee) {
        ee = expressionExperimentService.thaw(ee);
        Geeq gq = ee.getGeeq();
        this.scoreReplicates(ee, gq);
        return gq;
    }

    private Geeq scoreOnlyPublication(ExpressionExperiment ee) {
        ee = expressionExperimentService.thawLiter(ee);
        Geeq gq = ee.getGeeq();
        this.scorePublication(ee, gq);
        return gq;
    }

    private void ensureEeHasGeeq(ExpressionExperiment ee) {
        Geeq gq = ee.getGeeq();
        if (gq == null) {
            gq = new Geeq();
            gq = this.create(gq);
            ee.setGeeq(gq);
            expressionExperimentService.update(ee);
        }
    }

    /*
     * Suitability scoring methods
     */

    private void scorePublication(ExpressionExperiment ee, Geeq gq) {
        double score;
        boolean hasBib;
        BibliographicReference bib = null;

        if (ee.getPrimaryPublication() != null) {
            bib = ee.getPrimaryPublication();
        } else if (ee.getOtherRelevantPublications() != null && ee.getOtherRelevantPublications().size() > 0) {
            bib = ee.getOtherRelevantPublications().iterator().next();
        }

        hasBib = bib != null;

        score = !hasBib ? GeeqServiceImpl.N_10 : GeeqServiceImpl.P_10;
        gq.setsScorePublication(score);

    }

    private void scorePlatformAmount(Collection<ArrayDesign> ads, Geeq gq) {
        double score;
        score = ads.size() > 2 ? GeeqServiceImpl.N_10
                : ads.size() > 1 ? GeeqServiceImpl.N_05 : GeeqServiceImpl.P_10;
        gq.setsScorePlatformAmount(score);
    }

    private void scorePlatformsTechMulti(Collection<ArrayDesign> ads, Geeq gq) {
        double score;
        boolean mismatch = false;

        ArrayDesign prev = null;
        for (ArrayDesign ad : ads) {
            if (prev == null) {
                prev = ad;
            } else {
                mismatch = !ad.getTechnologyType().equals(prev.getTechnologyType());
            }
        }

        score = mismatch ? GeeqServiceImpl.N_10 : GeeqServiceImpl.P_10;
        gq.setsScorePlatformsTechMulti(score);
    }

    private void scoreAvgPlatformPopularity(Collection<ArrayDesign> ads, Geeq gq) {
        double score;
        double scores[] = new double[ads.size()];

        // FIXME factor out magic numbers. Rationale: rarely used platforms are less favored
        int i = 0;
        for (ArrayDesign ad : ads) {
            int cnt = arrayDesignService.numExperiments(ad);
            scores[i++] = cnt < 10 ? GeeqServiceImpl.N_10
                    : cnt < 20 ? GeeqServiceImpl.N_05
                            : cnt < 50 ? GeeqServiceImpl.P_00
                                    : cnt < 100 ? GeeqServiceImpl.P_05 : GeeqServiceImpl.P_10;
        }

        score = this.getMean(scores);
        gq.setsScoreAvgPlatformPopularity(score);
    }

    /**
     * 
     * @param ads
     * @param gq
     */
    private void scoreAvgPlatformSize(Collection<ArrayDesign> ads, Geeq gq) {
        double score;
        double scores[] = new double[ads.size()];

        int i = 0;
        for (ArrayDesign ad : ads) {

            Taxon taxon = arrayDesignService.getTaxon(ad.getId());
            taxonService.thaw(taxon);
            long cnt = arrayDesignService.numGenes(ad);

            /*
             * FIXME we don't deal with miRNA platforms correctly
             */

            // human, rat, mouse, zebrafish and worm all have on the order 20k protein-coding genes.
            if (taxon.getCommonName().equals("human") || taxon.getCommonName().equals("rat")
                    || taxon.getCommonName().equals("mouse") || taxon.getCommonName().equals("zebrafish")
                    || taxon.getCommonName().equals("worm")) {
                scores[i++] = cnt < 5000 ? GeeqServiceImpl.N_10
                        : cnt < 10000 ? GeeqServiceImpl.N_05
                                : cnt < 15000 ? GeeqServiceImpl.P_00
                                        : cnt < 18000 ? GeeqServiceImpl.P_05 : GeeqServiceImpl.P_10;
            } else if (taxon.getCommonName().equals("yeast")) {
                // Yeast has about 6k protein-coding genes
                scores[i++] = cnt < 1000 ? GeeqServiceImpl.N_10
                        : cnt < 2500 ? GeeqServiceImpl.N_05
                                : cnt < 4000 ? GeeqServiceImpl.P_00
                                        : cnt < 5000 ? GeeqServiceImpl.P_05 : GeeqServiceImpl.P_10;
            } else if (taxon.getCommonName().equals("fly")) {
                // Fly has about 14k protein coding genes
                scores[i++] = cnt < 2000 ? GeeqServiceImpl.N_10
                        : cnt < 5000 ? GeeqServiceImpl.N_05
                                : cnt < 8000 ? GeeqServiceImpl.P_00
                                        : cnt < 10000 ? GeeqServiceImpl.P_05 : GeeqServiceImpl.P_10;
            }

        }

        score = this.getMean(scores);
        gq.setsScoreAvgPlatformSize(score);
    }

    /**
     * 
     * @param ee
     * @param gq
     */
    private void scoreSampleSize(ExpressionExperiment ee, Geeq gq) {
        double score;

        int cnt = ee.getBioAssays().size();

        // FIXME factor out these magic numbers. Rationale: >500 is "too big"; 5 is "very small" and 20-500 is just fine.
        if (cnt > 500) {
            score = GeeqServiceImpl.N_10;
        } else {
            if (cnt < 6) {
                score = GeeqServiceImpl.N_10;
            } else if (cnt < 10) {
                score = GeeqServiceImpl.N_03;
            } else if (cnt < 20) {
                score = GeeqServiceImpl.P_00;
            } else {
                score = GeeqServiceImpl.P_10;
            }
        }
        gq.setsScoreSampleSize(score);
    }

    private boolean scoreRawData(ExpressionExperiment ee, Geeq gq) {
        double score;

        Collection<QuantitationType> quantitationTypes = expressionExperimentService.getQuantitationTypes(ee);

        boolean dataReprocessedFromRaw = false;
        for (QuantitationType qt : quantitationTypes) {
            if (qt.getIsRecomputedFromRawData()) {
                dataReprocessedFromRaw = true;
            }
        }

        score = dataReprocessedFromRaw ? GeeqServiceImpl.P_10 : GeeqServiceImpl.N_10;
        gq.setsScoreRawData(score);
        return dataReprocessedFromRaw;
    }

    private void scoreMissingValues(ExpressionExperiment ee, Geeq gq, boolean hasRawData) {
        double score;
        boolean hasProcessedVectors = true;
        boolean hasMissingValues = false;
        String problems = "";

        if (!hasRawData) {
            try {
                ExpressionDataDoubleMatrix dmatrix = expressionDataMatrixService
                        .getProcessedExpressionDataMatrix(ee);
                hasMissingValues = dmatrix.hasMissingValues();
            } catch (IllegalArgumentException e) {
                hasProcessedVectors = false;
            } catch (Exception e) {
                hasProcessedVectors = false;
                problems = GeeqServiceImpl.ERR_MSG_MISSING_VALS + e.getMessage();
            }
        }

        score = hasRawData || (!hasMissingValues && hasProcessedVectors) ? GeeqServiceImpl.P_10
                : GeeqServiceImpl.N_10;
        gq.setNoVectors(!hasProcessedVectors);
        gq.addOtherIssues(problems);
        gq.setsScoreMissingValues(score);
    }

    /*
     * Quality scoring methods
     */

    private void scoreOutliers(Geeq gq, DoubleMatrix<BioAssay, BioAssay> cormat) {
        double score;
        boolean hasCorrMat = true;
        boolean hasNaNs = false;
        boolean outliers = true;

        if (cormat == null || cormat.rows() == 0) {
            hasCorrMat = false;
        } else {
            // Check if cormat has NaNs (diagonal is not checked, but there really should not be NaNs on the diagonal)
            Double[] doubleArray = ArrayUtils.toObject(this.getLowerTriangle(cormat.getRawMatrix()));
            List<Double> list = new ArrayList<>(Arrays.asList(doubleArray));
            hasNaNs = list.contains(Double.NaN);

            outliers = outlierDetectionService.identifyOutliersByMedianCorrelation(cormat).size() > 0;
        }

        score = outliers ? GeeqServiceImpl.N_10 : GeeqServiceImpl.P_10; //
        gq.setCorrMatIssues((byte) (!hasCorrMat ? 1 : hasNaNs ? 2 : 0));
        gq.setqScoreOutliers(score);
    }

    private void scoreSampleMeanCorrelation(Geeq gq, double[] cormatLTri) {
        this.cormatOps(gq, cormatLTri, CormatOpsType.mean);
    }

    private void scoreSampleMedianCorrelation(Geeq gq, double[] cormatLTri) {
        this.cormatOps(gq, cormatLTri, CormatOpsType.median);
    }

    private void scoreSampleCorrelationVariance(Geeq gq, double[] cormatLTri) {
        this.cormatOps(gq, cormatLTri, CormatOpsType.variance);
    }

    private void scorePlatformsTech(Collection<ArrayDesign> ads, Geeq gq) {
        double score;
        boolean twoColor = false;

        for (ArrayDesign ad : ads) {
            if (ad.getTechnologyType().getValue().equals(TechnologyType.TWOCOLOR.getValue())) {
                twoColor = true;
                break;
            }
        }

        score = twoColor ? GeeqServiceImpl.N_10 : GeeqServiceImpl.P_10;
        gq.setqScorePlatformsTech(score);
    }

    private void scoreReplicates(ExpressionExperiment ee, Geeq gq) {
        double score;
        int replicates = -1;
        if (ee.getExperimentalDesign() != null && !ee.getExperimentalDesign().getExperimentalFactors().isEmpty()) {
            replicates = this.leastReplicates(ee);

            if (replicates < GEEQ_WORST_REPLICATION_THRESHOLD) {
                score = GeeqServiceImpl.N_10;
            } else if (replicates < GEEQ_MEDIUM_REPLICATION_THRESHOLD) {
                score = GeeqServiceImpl.P_00;
            } else {
                score = GeeqServiceImpl.P_10;
            }
        } else { // no information, so we give no penalty or bonus
            score = GeeqServiceImpl.P_00;
            gq.setReplicatesIssues((byte) 1); // no factors
        }

        // extra details
        if (replicates == -1) {
            gq.setReplicatesIssues((byte) 2); // somewhat redundant with no factors
        } else if (replicates == -2) {
            gq.setReplicatesIssues((byte) 3); // ALL values have only one sample (no replication at all)
        } else if (replicates == 0) { // shouldn't happen
            gq.setReplicatesIssues((byte) 4);
        }

        gq.setqScoreReplicates(score);
    }

    private boolean scoreBatchInfo(ExpressionExperiment ee, Geeq gq) {
        double score;
        boolean hasInfo = expressionExperimentService.checkHasBatchInfo(ee);

        score = !hasInfo ? GeeqServiceImpl.N_10 : GeeqServiceImpl.P_10;
        gq.setqScoreBatchInfo(score);
        return hasInfo;
    }

    private void scoreBatchEffect(ExpressionExperiment ee, Geeq gq, boolean infoDetected, boolean confound) {
        double score;
        boolean hasInfo = true;
        boolean hasStrong = false;
        boolean hasNone = false;
        boolean corrected = false;

        if (infoDetected && !confound) {
            boolean manual = gq.isManualBatchEffectActive();
            if (!manual) {
                BatchEffectDetails be = expressionExperimentService.getBatchEffect(ee);
                if (be == null) {
                    Log.warn(this.getClass(), GeeqServiceImpl.ERR_B_EFFECT_BAD_STATE);
                    hasInfo = false;
                } else {
                    hasStrong = be.getPvalue() < 0.0001;
                    hasNone = be.getPvalue() > 0.1;
                    corrected = be.getDataWasBatchCorrected();
                }
            } else {
                hasStrong = gq.isManualHasStrongBatchEffect();
                hasNone = gq.isManualHasNoBatchEffect();
            }
        }

        score = !infoDetected || !hasInfo || confound ? GeeqServiceImpl.P_00
                : hasStrong ? GeeqServiceImpl.BATCH_EFF_STRONG
                        : hasNone ? GeeqServiceImpl.BATCH_EFF_NONE : GeeqServiceImpl.BATCH_EFF_WEAK;
        gq.setBatchCorrected(corrected);
        gq.setqScoreBatchEffect(score);
    }

    private boolean scoreBatchConfound(ExpressionExperiment ee, Geeq gq, boolean infoDetected) {
        double score;
        boolean hasConfound = false;

        if (infoDetected) {
            boolean manual = gq.isManualBatchConfoundActive();
            if (!manual) {
                String confInfo = expressionExperimentService.getBatchConfound(ee);
                if (confInfo != null) {
                    // null can mean no confound but also no batch info, which is ok since both should result in score 0
                    hasConfound = true;
                }
            } else {
                hasConfound = gq.isManualHasBatchConfound();
            }
        }

        score = !infoDetected ? GeeqServiceImpl.P_00
                : hasConfound ? GeeqServiceImpl.BATCH_CONF_HAS : GeeqServiceImpl.BATCH_CONF_NO_HAS;
        gq.setqScoreBatchConfound(score);

        return hasConfound;
    }

    /*
     * Support methods and other stuff
     */

    private AuditEvent createGeeqEvent(ExpressionExperiment ee, String note, String details) {
        return auditTrailService.addUpdateEvent(ee, GeeqEvent.class, note, details);
    }

    private String fromTo(Object from, Object to) {
        return "From: " + from + " To: " + to;
    }

    /**
     * Checks for all combinations of factor values in the experiments bio assays, and counts the amount of
     * their occurrences, then checks what the lowest amount is. The method only combines factor values from
     * first (up to) MAX_EFS_REPLICATE_CHECK categorical experimental factors it encounters, and always disregards
     * values from batch factors.
     *
     * @param  ee an expression experiment to get the count for.
     * @return    the lowest number of replicates (ignoring factor value combinations with only one replicate),
     *            or -2 if <em>all</em> factor value combinations were present only once, or -1, if there were no usable
     *            factors
     *            to begin with.
     */
    private int leastReplicates(ExpressionExperiment ee) {
        HashMap<String, Integer> factors = new HashMap<>();
        Collection<BioAssay> bas = ee.getBioAssays();
        List<ExperimentalFactor> keepEfs = new ArrayList<>(GeeqServiceImpl.MAX_EFS_REPLICATE_CHECK);

        for (BioAssay ba : bas) {
            Collection<FactorValue> fvs = ba.getSampleUsed().getFactorValues();

            //only keep up to MAX_EFS_REPLICATE_CHECK categorical factors, ignoring batch factor and DE_EXCLUDE
            Collection<FactorValue> removeFvs = new LinkedList<>();
            for (FactorValue fv : fvs) {
                ExperimentalFactor ef = fv.getExperimentalFactor();
                if (ExperimentalDesignUtils.isBatch(ef) || DE_EXCLUDE.equalsIgnoreCase(fv.getDescriptiveString())
                        || ef.getType().equals(FactorType.CONTINUOUS)) {
                    removeFvs.add(fv); // always remove batch factor values and DE_EXCLUDE values
                } else {
                    if (!keepEfs.contains(ef) && keepEfs.size() <= GeeqServiceImpl.MAX_EFS_REPLICATE_CHECK) {
                        keepEfs.add(ef); // keep first MAX_EFS_REPLICATE_CHECK encountered factors
                    } else if (!keepEfs.contains(ef)) {
                        removeFvs.add(fv); // if from different factor, remove the value
                    }
                }
            }
            fvs.removeAll(removeFvs);

            // sort so the keys in the hash map are consistent
            Collection<Long> ids = EntityUtils.getIds(fvs);
            Long[] arr = ids.toArray(new Long[0]);
            Arrays.sort(arr);
            String key = Arrays.toString(arr);

            // add new key or increment counter of existing one
            Integer cnt = factors.get(key);
            factors.put(key, cnt == null ? 1 : ++cnt);
        }

        List<Integer> counts = new ArrayList<>(factors.values());
        Collections.sort(counts);

        if (counts.isEmpty()) {
            return -1;
        } else if (counts.get(counts.size() - 1) == 1) {
            return -2; // all conditions have only one replicate
        } else {
            return counts.get(0);
        }

    }

    private DoubleMatrix<BioAssay, BioAssay> getCormat(ExpressionExperiment ee, Geeq gq) {
        DoubleMatrix<BioAssay, BioAssay> cormat = null;
        try {
            cormat = sampleCoexpressionAnalysisService.loadTryRegressedThenFull(ee);
        } catch (IllegalStateException e) {
            Log.warn(this.getClass(),
                    GeeqServiceImpl.LOG_PREFIX + GeeqServiceImpl.ERR_MSG_CORMAT_MISSING_VALS + ee.getId());
        } catch (Exception e) {
            String err = GeeqServiceImpl.ERR_MSG_CORMAT + e.getMessage();
            Log.warn(this.getClass(), GeeqServiceImpl.LOG_PREFIX + err);
            gq.addOtherIssues(err);
        }
        return cormat;
    }

    private double[] getLowerTriCormat(DoubleMatrix<BioAssay, BioAssay> cormat) {
        if (cormat == null || cormat.rows() == 0) {
            return new double[] {};
        }
        double[] corTri = this.getLowerTriangle(cormat.getRawMatrix());

        // We have to remove NaNs, some cormats have them (we notify user about this in the outlier score)
        // this is not very efficient, but the DoubleMatrix does not have a method to get an array of Doubles (not doubles)
        Double[] doubleArray = ArrayUtils.toObject(corTri);
        List<Double> list = new ArrayList<>(Arrays.asList(doubleArray));
        //noinspection StatementWithEmptyBody // because java standard libraries suck, we have to iterate like this to remove all NaNs, not just the first one.
        while (list.remove(Double.NaN)) {
        }

        return ArrayUtils.toPrimitive(list.toArray(new Double[0]));
    }

    private void cormatOps(Geeq gq, double[] cormatLTri, CormatOpsType type) {
        double score;
        double value = 0;
        boolean hasCorrMat = true;

        if (cormatLTri == null || cormatLTri.length == 0) {
            hasCorrMat = false;
        } else {
            switch (type) {
            case mean:
                value = this.getMean(cormatLTri);
                break;
            case median:
                value = this.getMedian(cormatLTri);
                break;
            case variance:
                value = this.getVariance(cormatLTri);
                break;
            default:
                throw new IllegalStateException();
            }
        }

        score = !hasCorrMat ? GeeqServiceImpl.P_00 : value;
        switch (type) {
        case mean:
            gq.setqScoreSampleMeanCorrelation(score);
            break;
        case median:
            gq.setqScoreSampleMedianCorrelation(score);
            break;
        case variance:
            gq.setqScoreSampleCorrelationVariance(score);
            break;
        default:
            throw new IllegalStateException();
        }
    }

    private double getWeightedMean(double[] vals, double[] weights) {
        if (vals == null || weights == null || vals.length != weights.length) {
            throw new IllegalArgumentException(GeeqServiceImpl.ERR_W_MEAN_BAD_ARGS);
        }
        double sum = GeeqServiceImpl.P_00;
        double wSum = GeeqServiceImpl.P_00;
        for (int i = 0; i < vals.length; i++) {
            sum += weights[i] * vals[i];
            wSum += weights[i];
        }
        return sum / wSum;

    }

    private double getMean(double[] arr) {
        return StatUtils.mean(arr);
    }

    private double getMedian(double[] arr) {
        return Descriptive.median(new DoubleArrayList(arr));
    }

    private double getVariance(double[] arr) {
        return StatUtils.variance(arr);
    }

    private double[] getLowerTriangle(double[][] mat) {
        // half of the square, minus half of one row (the diagonal)
        double[] tri = new double[((mat.length * mat[0].length) / 2) - (mat.length / 2)];

        int k = 0;
        for (int i = 0; i < mat.length; i++) {
            for (int j = 0; j < mat[i].length; j++) {
                if (i > j) {
                    tri[k] = mat[i][j];
                    k++;
                }
            }
        }

        return tri;
    }

    private enum CormatOpsType {
        mean, median, variance
    }

}