ubc.pavlab.aspiredb.server.service.BurdenAnalysisServiceImpl.java Source code

Java tutorial

Introduction

Here is the source code for ubc.pavlab.aspiredb.server.service.BurdenAnalysisServiceImpl.java

Source

/*
 * The aspiredb project
 * 
 * Copyright (c) 2013 University of British Columbia
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package ubc.pavlab.aspiredb.server.service;

import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.math3.stat.StatUtils;
import org.apache.commons.math3.stat.inference.ChiSquareTest;
import org.apache.commons.math3.stat.inference.MannWhitneyUTest;
import org.directwebremoting.annotations.RemoteMethod;
import org.directwebremoting.annotations.RemoteProxy;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;

import ubc.pavlab.aspiredb.server.dao.LabelDao;
import ubc.pavlab.aspiredb.server.dao.ProjectDao;
import ubc.pavlab.aspiredb.server.dao.SubjectDao;
import ubc.pavlab.aspiredb.server.dao.VariantDao;
import ubc.pavlab.aspiredb.server.exceptions.BioMartServiceException;
import ubc.pavlab.aspiredb.server.exceptions.NeurocartaServiceException;
import ubc.pavlab.aspiredb.server.exceptions.NotLoggedInException;
import ubc.pavlab.aspiredb.server.model.CNV;
import ubc.pavlab.aspiredb.server.model.Characteristic;
import ubc.pavlab.aspiredb.server.model.CnvType;
import ubc.pavlab.aspiredb.server.model.Indel;
import ubc.pavlab.aspiredb.server.model.Label;
import ubc.pavlab.aspiredb.server.model.Project;
import ubc.pavlab.aspiredb.server.model.SNV;
import ubc.pavlab.aspiredb.server.model.Subject;
import ubc.pavlab.aspiredb.server.model.Variant;
import ubc.pavlab.aspiredb.shared.BurdenAnalysisValueObject;
import ubc.pavlab.aspiredb.shared.GeneValueObject;
import ubc.pavlab.aspiredb.shared.LabelValueObject;
import ubc.pavlab.aspiredb.shared.PhenotypeEnrichmentValueObject;
import ubc.pavlab.aspiredb.shared.query.CharacteristicProperty;
import ubc.pavlab.aspiredb.shared.query.PropertyValue;

/**
 * Compares the "burden" between two subject labels. "burden" can be number of genes affected, CNV length, number of
 * subjects with a pathogenic variant, etc.
 * 
 * @author ptan
 */
@Service("burdenAnalysisService")
@RemoteProxy(name = "BurdenAnalysisService")
public class BurdenAnalysisServiceImpl implements BurdenAnalysisService {

    @Autowired
    private ProjectDao projectDao;
    @Autowired
    private SubjectService subjectService;
    @Autowired
    private VariantService variantService;
    @Autowired
    private PhenotypeService phenotypeService;
    @Autowired
    private SubjectDao subjectDao;
    @Autowired
    private VariantDao variantDao;
    @Autowired
    private LabelDao labelDao;
    @Autowired
    private LabelService labelService;
    @Autowired
    private GeneService geneService;

    DecimalFormat dformat = new DecimalFormat("#.#####");

    enum CnvBurdenAnalysisPerSubject {
        PATIENT_ID, LABEL_NAME, NUM_SAMPLES, NUM_CNV_LOSS, NUM_CNV_GAIN, NUM_CNV_UNKNOWN, TOTAL_CNV, TOTAL_SIZE, AVG_SIZE, NUM_GENES, NUM_VARIANTS_WITH_GENE, AVG_GENES_PER_CNV, TOTAL_SNV, TOTAL_INDEL
    }

    final Map<CnvBurdenAnalysisPerSubject, String> CnvBurdenAnalysisPerSubjectDesc = new HashMap<>();

    {
        CnvBurdenAnalysisPerSubjectDesc.put(CnvBurdenAnalysisPerSubject.NUM_CNV_LOSS, "Mean number of CNV loss");
        CnvBurdenAnalysisPerSubjectDesc.put(CnvBurdenAnalysisPerSubject.NUM_CNV_GAIN, "Mean number of CNV gains");
        CnvBurdenAnalysisPerSubjectDesc.put(CnvBurdenAnalysisPerSubject.NUM_CNV_UNKNOWN,
                "Mean number of unknown CNV types");
        CnvBurdenAnalysisPerSubjectDesc.put(CnvBurdenAnalysisPerSubject.TOTAL_CNV, "Mean number of CNVs");
        CnvBurdenAnalysisPerSubjectDesc.put(CnvBurdenAnalysisPerSubject.TOTAL_SIZE, "Mean total CNV size (bp)");
        CnvBurdenAnalysisPerSubjectDesc.put(CnvBurdenAnalysisPerSubject.AVG_SIZE, "Mean average CNV size (bp)");
        CnvBurdenAnalysisPerSubjectDesc.put(CnvBurdenAnalysisPerSubject.NUM_GENES,
                "Mean number of genes overlapping a variant");
        CnvBurdenAnalysisPerSubjectDesc.put(CnvBurdenAnalysisPerSubject.NUM_VARIANTS_WITH_GENE,
                "Mean number of variants overlapping a gene");
        CnvBurdenAnalysisPerSubjectDesc.put(CnvBurdenAnalysisPerSubject.AVG_GENES_PER_CNV,
                "Mean number of genes per CNV");
        CnvBurdenAnalysisPerSubjectDesc.put(CnvBurdenAnalysisPerSubject.TOTAL_SNV, "Mean number of SNVs");
        CnvBurdenAnalysisPerSubjectDesc.put(CnvBurdenAnalysisPerSubject.TOTAL_INDEL, "Mean number of Indels");
    }

    protected static Log log = LogFactory.getLog(BurdenAnalysisServiceImpl.class);

    private Map<CnvBurdenAnalysisPerSubject, String> statsToString(Map<CnvBurdenAnalysisPerSubject, ?> stats) {
        HashMap<CnvBurdenAnalysisPerSubject, String> ret = new HashMap<>();
        for (CnvBurdenAnalysisPerSubject key : stats.keySet()) {
            if (key.equals(CnvBurdenAnalysisPerSubject.AVG_SIZE)
                    || key.equals(CnvBurdenAnalysisPerSubject.AVG_GENES_PER_CNV)) {
                ret.put(key, String.format("%.1f", stats.get(key)));
            } else if (key.equals(CnvBurdenAnalysisPerSubject.PATIENT_ID)) {
                ret.put(key, String.format("%s", stats.get(key).toString()));
            } else if (key.equals(CnvBurdenAnalysisPerSubject.LABEL_NAME)) {
                ret.put(key, String.format("%s", stats.get(key).toString()));
            } else {
                ret.put(key, dformat.format(stats.get(key)));
            }
        }
        return ret;
    }

    /**
     * @param variants
     * @return Map<Subject.patientId, Map<statsName, statsDoubleValue>>
     * @throws NotLoggedInException
     * @throws BioMartServiceException
     */
    private Map<String, Map<CnvBurdenAnalysisPerSubject, Double>> getVariantStatsBySubject(
            Collection<Variant> variants) throws BioMartServiceException, NotLoggedInException {
        Map<String, Collection<Long>> subjectVariants = new HashMap<>();
        for (Variant v : variants) {
            Subject subject = subjectDao.load(v.getSubject().getId());

            // group variants by patient id
            Collection<Long> variantsAdded = subjectVariants.get(subject.getPatientId());
            if (variantsAdded == null) {
                variantsAdded = new ArrayList<>();
                subjectVariants.put(subject.getPatientId(), variantsAdded);
            }
            variantsAdded.add(v.getId());

        }

        // store stats by patientId
        Map<String, Map<CnvBurdenAnalysisPerSubject, Double>> patientIdStats = new HashMap<>();
        for (String patientId : subjectVariants.keySet()) {
            Map<CnvBurdenAnalysisPerSubject, Double> stats = getCnvBurdenAnalysisPerSubject(
                    subjectVariants.get(patientId));
            patientIdStats.put(patientId, stats);
        }

        return patientIdStats;
    }

    /**
     * @param statName
     * @param patientIds
     * @return double[] array of the statName values for all of the patientIds
     */
    private double[] getPatientStats(CnvBurdenAnalysisPerSubject statName, Collection<String> patientIds,
            Map<String, Map<CnvBurdenAnalysisPerSubject, Double>> patientIdStats) {
        Collection<Double> result = new ArrayList<>();
        for (String patientId : patientIds) {
            Double val = patientIdStats.get(patientId).get(statName);
            if (val == null) {
                continue;
            }
            result.add(val);
        }
        return ArrayUtils.toPrimitive(result.toArray(new Double[0]));
    }

    @SuppressWarnings("boxing")
    private double[] removeNaNs(double[] d) {
        Collection<Double> ret = new ArrayList<>();
        for (int i = 0; i < d.length; i++) {
            if (Double.isInfinite(d[i]) || Double.isNaN(d[i])) {
                continue;
            }
            ret.add(d[i]);
        }
        return ArrayUtils.toPrimitive(ret.toArray(new Double[ret.size()]));
    }

    @SuppressWarnings("boxing")
    @Override
    @RemoteMethod
    @Transactional(readOnly = true)
    public Collection<BurdenAnalysisValueObject> getBurdenAnalysisPerSubjectLabel(LabelValueObject group1,
            LabelValueObject group2, Collection<Long> variantIds)
            throws NotLoggedInException, BioMartServiceException {

        final String NO_LABEL = "<NO_LABEL>";

        Collection<BurdenAnalysisValueObject> ret = new ArrayList<>();

        // if ( group1 == null ) {
        // log.warn( "Labels can't be null! Group1 is " + group1 );
        // return ret;
        // }

        if (variantIds == null) {
            log.warn("No variants found.");
            return ret;
        }

        Collection<Subject> subjects = new HashSet<>();
        Collection<String> allPatientIds = new HashSet<>();
        @SuppressWarnings("unchecked")
        Collection<Variant> variants = (Collection<Variant>) variantDao.load(variantIds);
        Map<String, Collection<CNV>> cnvGainMap = new HashMap<>();
        Map<String, Collection<CNV>> cnvLossMap = new HashMap<>();
        Map<String, Collection<CNV>> cnvUnknownMap = new HashMap<>();
        Map<String, Collection<SNV>> snvMap = new HashMap<>();
        Map<String, Collection<Indel>> indelMap = new HashMap<>();
        for (Variant v : variants) {
            Subject subject = subjectDao.load(v.getSubject().getId());
            subjects.add(subject);
            allPatientIds.add(subject.getPatientId());

            if (v instanceof CNV) {
                CNV cnv = (CNV) v;
                if (cnv.getType() == CnvType.GAIN) {
                    if (!cnvGainMap.containsKey(subject.getPatientId())) {
                        cnvGainMap.put(subject.getPatientId(), new ArrayList<CNV>());
                    }
                    cnvGainMap.get(subject.getPatientId()).add((CNV) v);
                } else if (cnv.getType() == CnvType.LOSS) {
                    if (!cnvLossMap.containsKey(subject.getPatientId())) {
                        cnvLossMap.put(subject.getPatientId(), new ArrayList<CNV>());
                    }
                    cnvLossMap.get(subject.getPatientId()).add((CNV) v);
                } else {
                    if (!cnvUnknownMap.containsKey(subject.getPatientId())) {
                        cnvUnknownMap.put(subject.getPatientId(), new ArrayList<CNV>());
                    }
                    cnvUnknownMap.get(subject.getPatientId()).add((CNV) v);
                }

            } else if (v instanceof SNV) {
                if (!snvMap.containsKey(subject.getPatientId())) {
                    snvMap.put(subject.getPatientId(), new ArrayList<SNV>());
                }
                snvMap.get(subject.getPatientId()).add((SNV) v);
            } else if (v instanceof Indel) {
                if (!indelMap.containsKey(subject.getPatientId())) {
                    indelMap.put(subject.getPatientId(), new ArrayList<Indel>());
                }
                indelMap.get(subject.getPatientId()).add((Indel) v);
            }
        }

        Map<String, Collection<String>> labelPatientId = null;

        if (group1 != null && group2 != null) {
            labelPatientId = subjectService.groupPatientIdsBySubjectLabel(subjects);
            labelPatientId = getMutuallyExclusivePatientIds(labelPatientId, group1.getName(), group2.getName());
        } else if (group1 == null) {
            group1 = new LabelValueObject(NO_LABEL);
            labelPatientId = new HashMap<>();
            labelPatientId.put(group1.getName(), allPatientIds);
        } else {
            log.warn("Either select both Group 1 and Group 2 labels or none at all");
            return ret;
        }

        if (group1 != null && !labelPatientId.containsKey(group1.getName())) {
            log.warn("Subject label " + group1.getName() + " not found");
            return ret;
        }

        if (group2 != null && !labelPatientId.containsKey(group2.getName())) {
            log.warn("Subject label " + group2.getName() + " not found");
            return ret;
        }

        // count total number of CNVs and SNVs by group
        Map<String, Collection<CNV>> cnvGainMapByGroup = new HashMap<>();
        Map<String, Collection<CNV>> cnvLossMapByGroup = new HashMap<>();
        Map<String, Collection<CNV>> cnvUnknownMapByGroup = new HashMap<>();
        Map<String, Collection<SNV>> snvMapByGroup = new HashMap<>();
        Map<String, Collection<Indel>> indelMapByGroup = new HashMap<>();
        Collection<String> groupNames = new ArrayList<>();
        groupNames.add(group1.getName());
        if (group2 != null) {
            groupNames.add(group2.getName());
        }
        for (String groupName : groupNames) {
            cnvGainMapByGroup.put(groupName, new ArrayList<CNV>());
            cnvLossMapByGroup.put(groupName, new ArrayList<CNV>());
            cnvUnknownMapByGroup.put(groupName, new ArrayList<CNV>());
            snvMapByGroup.put(groupName, new ArrayList<SNV>());
            indelMapByGroup.put(groupName, new ArrayList<Indel>());
            for (String patientId : labelPatientId.get(groupName)) {
                if (cnvGainMap.containsKey(patientId)) {
                    cnvGainMapByGroup.get(groupName).addAll(cnvGainMap.get(patientId));
                }
                if (cnvLossMap.containsKey(patientId)) {
                    cnvLossMapByGroup.get(groupName).addAll(cnvLossMap.get(patientId));
                }
                if (cnvUnknownMap.containsKey(patientId)) {
                    cnvUnknownMapByGroup.get(groupName).addAll(cnvUnknownMap.get(patientId));
                }
                if (snvMap.containsKey(patientId)) {
                    snvMapByGroup.get(groupName).addAll(snvMap.get(patientId));
                }
                if (indelMap.containsKey(patientId)) {
                    indelMapByGroup.get(groupName).addAll(indelMap.get(patientId));
                }
            }
        }

        Double group1SubjectCount = 1.0 * labelPatientId.get(group1.getName()).size();
        Double group2SubjectCount = group2 != null ? 1.0 * labelPatientId.get(group2.getName()).size() : null;
        ret.add(new BurdenAnalysisValueObject("Total number of subjects", group1SubjectCount, group2SubjectCount,
                null));
        Double group1CNVGainCount = 1.0 * cnvGainMapByGroup.get(group1.getName()).size();
        Double group2CNVGainCount = group2 != null ? 1.0 * cnvGainMapByGroup.get(group2.getName()).size() : null;
        Double group1CNVLossCount = 1.0 * cnvLossMapByGroup.get(group1.getName()).size();
        Double group2CNVLossCount = group2 != null ? 1.0 * cnvLossMapByGroup.get(group2.getName()).size() : null;
        Double group1CNVUnknownCount = 1.0 * cnvUnknownMapByGroup.get(group1.getName()).size();
        Double group2CNVUnknownCount = group2 != null ? 1.0 * cnvUnknownMapByGroup.get(group2.getName()).size()
                : null;
        Double group1SNVCount = 1.0 * snvMapByGroup.get(group1.getName()).size();
        Double group2SNVCount = group2 != null ? 1.0 * snvMapByGroup.get(group2.getName()).size() : null;
        Double group1IndelCount = 1.0 * indelMapByGroup.get(group1.getName()).size();
        Double group2IndelCount = group2 != null ? 1.0 * indelMapByGroup.get(group2.getName()).size() : null;
        // if ( group1CNVCount + group2CNVCount > 0 ) {
        ret.add(new BurdenAnalysisValueObject("Total number of CNV gains", group1CNVGainCount, group2CNVGainCount,
                null));
        // }
        ret.add(new BurdenAnalysisValueObject("Total number of CNV losses", group1CNVLossCount, group2CNVLossCount,
                null));
        ret.add(new BurdenAnalysisValueObject("Total number of unknown CNV types", group1CNVUnknownCount,
                group2CNVUnknownCount, null));
        // if ( group1SNVCount + group2SNVCount > 0 ) {
        ret.add(new BurdenAnalysisValueObject("Total number of SNVs", group1SNVCount, group2SNVCount, null));
        // }
        // if ( group1IndelCount + group2IndelCount > 0 ) {
        ret.add(new BurdenAnalysisValueObject("Total number of Indels", group1IndelCount, group2IndelCount, null));
        // }

        Map<String, Map<CnvBurdenAnalysisPerSubject, Double>> patientIdStats = getVariantStatsBySubject(variants);

        for (CnvBurdenAnalysisPerSubject statName : CnvBurdenAnalysisPerSubject.values()) {

            double[] label1Stats = getPatientStats(statName, labelPatientId.get(group1.getName()), patientIdStats);

            if (label1Stats.length == 0) {
                log.warn("No values found for " + statName);
                continue;
            }

            // remove NaNs
            label1Stats = removeNaNs(label1Stats);

            Double mean1 = StatUtils.mean(label1Stats);
            Double mean2 = null;
            Double pval = null;

            if (group2 != null) {
                double[] label2Stats = getPatientStats(statName, labelPatientId.get(group2.getName()),
                        patientIdStats);
                label2Stats = removeNaNs(label2Stats);

                mean2 = StatUtils.mean(label2Stats);

                if (mean1 + mean2 == 0) {
                    log.warn("No " + statName + " stats was found");
                    continue;
                }

                pval = new MannWhitneyUTest().mannWhitneyUTest(label1Stats, label2Stats);
            }

            ret.add(new BurdenAnalysisValueObject(CnvBurdenAnalysisPerSubjectDesc.get(statName), mean1, mean2,
                    pval));
        }

        return ret;
    }

    /**
     * Excludes those IDs that have both label1 and label2.
     * 
     * @param labelPatientId
     * @param name
     * @param name2
     * @return
     */
    private Map<String, Collection<String>> getMutuallyExclusivePatientIds(
            Map<String, Collection<String>> labelPatientId, String label1, String label2) {

        if (!labelPatientId.containsKey(label1) || !labelPatientId.containsKey(label2)) {
            log.warn("Label not found");
            return labelPatientId;
        }
        Collection<String> label1IdsOld = new ArrayList<>(labelPatientId.get(label1));
        Collection<String> label2IdsOld = new ArrayList<>(labelPatientId.get(label2));
        Collection<String> label1Ids = labelPatientId.get(label1);
        Collection<String> label2Ids = labelPatientId.get(label2);
        boolean removed = label1Ids.removeAll(label2IdsOld);
        label2Ids.removeAll(label1IdsOld);

        // for logging
        if (removed) {
            label1IdsOld.removeAll(label1Ids);
            label2IdsOld.removeAll(label2Ids);
            label1IdsOld.addAll(label2IdsOld);
            String overlappedIds = StringUtils.join(label1IdsOld, ", ");
            log.info("Ignoring " + label1IdsOld.size() + " ("
                    + overlappedIds.substring(0, Math.min(50, overlappedIds.length()))
                    + "... ) subjects with labels " + label1 + " and " + label2 + ". After filtering, " + label1
                    + " has " + label1Ids.size() + " and " + label2 + " has " + label2Ids.size());
        }

        return labelPatientId;
    }

    /**
     * Excludes those IDs that have both label1 and label2.
     * 
     * @param labelSubjectId
     * @param name
     * @param name2
     * @return
     */
    private Map<String, Collection<Long>> getMutuallyExclusiveSubjectIds(
            Map<String, Collection<Long>> labelSubjectId, String label1, String label2) {

        if (!labelSubjectId.containsKey(label1) || !labelSubjectId.containsKey(label2)) {
            log.warn("Label not found");
            return labelSubjectId;
        }
        Collection<Long> label1IdsOld = new ArrayList<>(labelSubjectId.get(label1));
        Collection<Long> label2IdsOld = new ArrayList<>(labelSubjectId.get(label2));
        Collection<Long> label1Ids = labelSubjectId.get(label1);
        Collection<Long> label2Ids = labelSubjectId.get(label2);
        boolean removed = label1Ids.removeAll(label2IdsOld);
        label2Ids.removeAll(label1IdsOld);

        // for logging
        if (removed) {
            label1IdsOld.removeAll(label1Ids);
            label2IdsOld.removeAll(label2Ids);
            label1IdsOld.addAll(label2IdsOld);
            log.info("Ignoring " + label1IdsOld.size() + " (" + StringUtils.join(label1IdsOld, ",")
                    + ") subjects with labels " + label1 + " and " + label2 + ". After filtering, " + label1
                    + " has " + label1Ids.size() + " and " + label2 + " has " + label2Ids.size());
        }

        return labelSubjectId;
    }

    /**
     * Returns the BurdenAnalysis per Subject (See Bug 4129).
     * 
     * @param subjectIds
     * @return a map with the patientID as index, e.g. { PATIENT_ID : 'Patient_01', NUM_DELETION : 2, NUM_DUPLICATION :
     *         4, }'
     * @throws NotLoggedInException
     * @throws BioMartServiceException
     */
    @Override
    @RemoteMethod
    public Collection<Map<CnvBurdenAnalysisPerSubject, String>> getBurdenAnalysisPerSubject(
            Collection<Long> variantIds) throws NotLoggedInException, BioMartServiceException {
        Collection<Map<CnvBurdenAnalysisPerSubject, String>> results = new ArrayList<>();

        // group variants by patient id
        Map<String, Collection<Long>> subjectVariants = new HashMap<>();
        for (Variant v : variantDao.load(variantIds)) {

            // skip non-CNV
            if (!(v instanceof CNV)) {
                continue;
            }

            Collection<Long> variantsAdded = subjectVariants.get(v.getSubject().getPatientId());
            if (variantsAdded == null) {
                variantsAdded = new ArrayList<>();
                subjectVariants.put(v.getSubject().getPatientId(), variantsAdded);
            }
            variantsAdded.add(v.getId());
        }

        for (String patientId : subjectVariants.keySet()) {
            Map<CnvBurdenAnalysisPerSubject, Double> stats = getCnvBurdenAnalysisPerSubject(
                    subjectVariants.get(patientId));

            Map<CnvBurdenAnalysisPerSubject, String> statsStr = statsToString(stats);

            statsStr.put(CnvBurdenAnalysisPerSubject.PATIENT_ID, patientId);

            results.add(statsStr);
        }

        return results;
    }

    /**
     * @param variantIds
     * @return
     * @throws NotLoggedInException
     * @throws BioMartServiceException
     */
    @SuppressWarnings("boxing")
    private Map<CnvBurdenAnalysisPerSubject, Double> getCnvBurdenAnalysisPerSubject(Collection<Long> variantIds)
            throws NotLoggedInException, BioMartServiceException {

        // Initialize
        Map<CnvBurdenAnalysisPerSubject, Double> results = new HashMap<>();
        for (CnvBurdenAnalysisPerSubject ba : CnvBurdenAnalysisPerSubject.values()) {
            // don't add PATIENT_ID and LABEL_NAME
            if (ba.equals(CnvBurdenAnalysisPerSubject.PATIENT_ID)
                    || ba.equals(CnvBurdenAnalysisPerSubject.LABEL_NAME)
                    || ba.equals(CnvBurdenAnalysisPerSubject.NUM_SAMPLES)) {
                continue;
            }
            results.put(ba, 0.0);
        }

        // Gene overlap
        Map<Long, Collection<GeneValueObject>> genesPerVariant = geneService.getGenesPerVariant(variantIds);

        // Calculate some statistics
        for (Long variantId : genesPerVariant.keySet()) {

            Variant v = variantDao.load(variantId);

            // skip non-CNV
            // if ( !( v instanceof CNV ) ) {
            // continue;
            // }

            Collection<GeneValueObject> genes = genesPerVariant.get(variantId);

            if (v instanceof CNV) {
                CNV cnv = (CNV) v;

                if (cnv.getType().equals(CnvType.GAIN)) {
                    results.put(CnvBurdenAnalysisPerSubject.NUM_CNV_GAIN,
                            results.get(CnvBurdenAnalysisPerSubject.NUM_CNV_GAIN) + 1);
                } else if (cnv.getType().equals(CnvType.LOSS)) {
                    results.put(CnvBurdenAnalysisPerSubject.NUM_CNV_LOSS,
                            results.get(CnvBurdenAnalysisPerSubject.NUM_CNV_LOSS) + 1);
                } else {
                    results.put(CnvBurdenAnalysisPerSubject.NUM_CNV_UNKNOWN,
                            results.get(CnvBurdenAnalysisPerSubject.NUM_CNV_UNKNOWN) + 1);
                }

                results.put(CnvBurdenAnalysisPerSubject.TOTAL_CNV,
                        results.get(CnvBurdenAnalysisPerSubject.TOTAL_CNV) + 1);

                results.put(CnvBurdenAnalysisPerSubject.TOTAL_SIZE,
                        results.get(CnvBurdenAnalysisPerSubject.TOTAL_SIZE) + cnv.getCnvLength());
            } else if (v instanceof SNV) {
                results.put(CnvBurdenAnalysisPerSubject.TOTAL_SNV,
                        results.get(CnvBurdenAnalysisPerSubject.TOTAL_SNV) + 1);
            } else if (v instanceof Indel) {
                results.put(CnvBurdenAnalysisPerSubject.TOTAL_INDEL,
                        results.get(CnvBurdenAnalysisPerSubject.TOTAL_INDEL) + 1);
            }

            if (genes.size() > 0) {
                results.put(CnvBurdenAnalysisPerSubject.NUM_GENES,
                        results.get(CnvBurdenAnalysisPerSubject.NUM_GENES) + genes.size());

                results.put(CnvBurdenAnalysisPerSubject.NUM_VARIANTS_WITH_GENE,
                        results.get(CnvBurdenAnalysisPerSubject.NUM_VARIANTS_WITH_GENE) + 1);
            }

        }

        if (results.get(CnvBurdenAnalysisPerSubject.TOTAL_SIZE) > 0
                && results.get(CnvBurdenAnalysisPerSubject.NUM_VARIANTS_WITH_GENE) > 0) {
            results.put(CnvBurdenAnalysisPerSubject.AVG_SIZE, results.get(CnvBurdenAnalysisPerSubject.TOTAL_SIZE)
                    / results.get(CnvBurdenAnalysisPerSubject.TOTAL_CNV) * 1.0);

            // results.put(
            // CnvBurdenAnalysisPerSubject.NUM_CNVS_WITH_GENE.toString(),
            // results.get( CnvBurdenAnalysisPerSubject.NUM_CNVS_WITH_GENE.toString() )
            // / results.get( CnvBurdenAnalysisPerSubject.TOTAL.toString() ) * 1.0 );

            results.put(CnvBurdenAnalysisPerSubject.AVG_GENES_PER_CNV,
                    results.get(CnvBurdenAnalysisPerSubject.NUM_GENES)
                            / results.get(CnvBurdenAnalysisPerSubject.NUM_VARIANTS_WITH_GENE) * 1.0);
        }

        return results;
    }

    @SuppressWarnings({ "boxing", "rawtypes" })
    @Override
    @Transactional(readOnly = true)
    @RemoteMethod
    public Collection<BurdenAnalysisValueObject> getBurdenAnalysisCharacteristic(
            CharacteristicProperty characteristic, LabelValueObject group1, LabelValueObject group2,
            Collection<Long> variantIds)
            throws NotLoggedInException, BioMartServiceException, NeurocartaServiceException {

        class YesNoCount {
            long yesCount = 0;
            long noCount = 0;

            public long incrementYes() {
                return this.yesCount++;
            }

            public long incrementNo() {
                return this.noCount++;
            }

            public long getYesCount() {
                return this.yesCount;
            }

            public long getNoCount() {
                return this.noCount;
            }
        }

        final int MAX_CHAR_VALUES = 50; // maximum number of unique characteristic values we allow

        Collection<BurdenAnalysisValueObject> ret = new ArrayList<>();

        if (characteristic == null) {
            log.warn("Characteristic can't be null!");
            return ret;
        }

        if (group1 == null || group2 == null) {
            log.warn("Labels can't be null! Group1 is " + group1 + " and group2 is " + group2);
            return ret;
        }

        if (variantIds == null) {
            log.warn("No variants found.");
            return ret;
        }

        Map<String, Subject> patientIdSubjects = new HashMap<>();
        Collection<Variant> variants = (Collection<Variant>) variantDao.load(variantIds);

        for (Variant v : variants) {
            Subject subject = subjectDao.load(v.getSubject().getId());
            patientIdSubjects.put(subject.getPatientId(), subject);
        }

        // key: labelName, values: patientIds
        Map<String, Collection<String>> labelPatientId = subjectService
                .groupPatientIdsBySubjectLabel(patientIdSubjects.values());

        labelPatientId = getMutuallyExclusivePatientIds(labelPatientId, group1.getName(), group2.getName());

        if (!labelPatientId.containsKey(group1.getName())) {
            log.warn("Subject label " + group1.getName() + " not found");
            return ret;
        }

        if (!labelPatientId.containsKey(group2.getName())) {
            log.warn("Subject label " + group2.getName() + " not found");
            return ret;
        }

        Collection<PropertyValue> charValues = variantService.suggestValues(characteristic, null);
        if (charValues.size() > MAX_CHAR_VALUES) {
            log.warn("There are too many (>" + MAX_CHAR_VALUES + ") " + characteristic.getDisplayName()
                    + " values!");
            return ret;
        }

        // keys: charValue, subjectLabel {group1, group2},
        // value: YesNoCount {subjectCountYes, subjectCountNo}
        Map<String, Map<String, YesNoCount>> counts = new HashMap<>();

        // init map
        for (PropertyValue v : charValues) {
            Map<String, YesNoCount> m = new HashMap<>();
            counts.put(v.getDisplayValue(), m);
            for (String label : labelPatientId.keySet()) {
                m.put(label, new YesNoCount());
            }
        }

        // count which subjects has a variant that matches the character name and value
        // TODO could potentially use some optimization?
        for (PropertyValue v : charValues) {
            for (String label : labelPatientId.keySet()) {
                for (String patientId : labelPatientId.get(label)) {

                    boolean found = subjectHasCharacteristicValue(patientIdSubjects.get(patientId).getVariants(),
                            characteristic.getDisplayName(), v);

                    YesNoCount yesNo = counts.get(v.getDisplayValue()).get(label);

                    if (found) {
                        yesNo.incrementYes();
                    } else {
                        yesNo.incrementNo();
                    }

                }
            }
        }

        // calculate chi-squred tests for each charValue using counts
        for (String charValue : counts.keySet()) {

            YesNoCount grp1 = counts.get(charValue).get(group1.getName());
            YesNoCount grp2 = counts.get(charValue).get(group2.getName());

            String grp1frac = grp1.getYesCount() + "/" + (grp1.getYesCount() + grp1.getNoCount());
            String grp2frac = grp2.getYesCount() + "/" + (grp2.getYesCount() + grp2.getNoCount());

            long[][] d = { { grp1.getYesCount(), grp1.getNoCount() }, { grp2.getYesCount(), grp2.getNoCount() } };

            double pval = new ChiSquareTest().chiSquareTest(d);

            if (Double.isNaN(pval)) {
                log.warn(charValue + "'s p-value is NaN, grp1 " + grp1frac + ", grp2 " + grp2frac);
                continue;
            }

            ret.add(new BurdenAnalysisValueObject(charValue, grp1frac, grp2frac, pval));
        }

        return ret;
    }

    @SuppressWarnings("rawtypes")
    private boolean subjectHasCharacteristicValue(Collection<Variant> variants, String displayName,
            PropertyValue charValue) {

        for (Variant v : variants) {
            for (Characteristic c : v.getCharacteristics()) {
                if (!c.getKey().equalsIgnoreCase(displayName)) {
                    continue;
                }
                if (c.getValue().equalsIgnoreCase(charValue.getDisplayValue())) {
                    return true;
                }
            }
        }

        return false;
    }

    private boolean subjectHasVariantLabel(Collection<Variant> variants, LabelValueObject label) {

        for (Variant v : variants) {
            for (Label l : v.getLabels()) {
                if (l.getId() == label.getId()) {
                    return true;
                }
            }
        }

        return false;
    }

    @SuppressWarnings("boxing")
    @Override
    @RemoteMethod
    @Transactional(readOnly = true)
    public Collection<BurdenAnalysisValueObject> getBurdenAnalysisPerPhenotype(Collection<Long> activeProjects,
            LabelValueObject group1, LabelValueObject group2) throws NotLoggedInException, BioMartServiceException {

        Collection<BurdenAnalysisValueObject> ret = new ArrayList<>();

        if (group1 == null || group2 == null) {
            log.warn("Labels can't be null! Group1 is " + group1 + " and group2 is " + group2);
            return ret;
        }

        Project activeProject = projectDao.load(activeProjects.iterator().next());

        Map<String, Collection<Long>> labelSubjectId = subjectService
                .groupSubjectIdsBySubjectLabel(activeProject.getSubjects());

        labelSubjectId = getMutuallyExclusiveSubjectIds(labelSubjectId, group1.getName(), group2.getName());

        if (!labelSubjectId.containsKey(group1.getName())) {
            log.warn("Subject label " + group1.getName() + " not found");
            return ret;
        }

        if (!labelSubjectId.containsKey(group2.getName())) {
            log.warn("Subject label " + group2.getName() + " not found");
            return ret;
        }

        List<PhenotypeEnrichmentValueObject> phenoVOs = phenotypeService.getPhenotypeEnrichmentValueObjects(
                activeProjects, labelSubjectId.get(group1.getName()), labelSubjectId.get(group2.getName()));

        // convert to BurdenAnalysisValueObject
        for (PhenotypeEnrichmentValueObject pvo : phenoVOs) {
            ret.add(new BurdenAnalysisValueObject(pvo));
        }

        return ret;

    }

    @SuppressWarnings({ "boxing" })
    @Override
    @Transactional(readOnly = true)
    @RemoteMethod
    public Collection<BurdenAnalysisValueObject> getBurdenAnalysisVariantLabel(LabelValueObject group1,
            LabelValueObject group2, Collection<Long> variantIds)
            throws NotLoggedInException, BioMartServiceException, NeurocartaServiceException {

        class YesNoCount {
            long yesCount = 0;
            long noCount = 0;

            public long incrementYes() {
                return this.yesCount++;
            }

            public long incrementNo() {
                return this.noCount++;
            }

            public long getYesCount() {
                return this.yesCount;
            }

            public long getNoCount() {
                return this.noCount;
            }
        }

        final int MAX_VARIANT_LABELS = 50; // maximum number of unique characteristic values we allow

        Collection<BurdenAnalysisValueObject> ret = new ArrayList<>();

        if (group1 == null || group2 == null) {
            log.warn("Labels can't be null! Group1 is " + group1 + " and group2 is " + group2);
            return ret;
        }

        if (variantIds == null) {
            log.warn("No variants found.");
            return ret;
        }

        Map<String, Subject> patientIdSubjects = new HashMap<>();
        Collection<Variant> variants = (Collection<Variant>) variantDao.load(variantIds);

        for (Variant v : variants) {
            Subject subject = subjectDao.load(v.getSubject().getId());
            patientIdSubjects.put(subject.getPatientId(), subject);
        }

        // key: labelName, values: patientIds
        Map<String, Collection<String>> labelPatientId = subjectService
                .groupPatientIdsBySubjectLabel(patientIdSubjects.values());

        labelPatientId = getMutuallyExclusivePatientIds(labelPatientId, group1.getName(), group2.getName());

        if (!labelPatientId.containsKey(group1.getName())) {
            log.warn("Subject label " + group1.getName() + " not found");
            return ret;
        }

        if (!labelPatientId.containsKey(group2.getName())) {
            log.warn("Subject label " + group2.getName() + " not found");
            return ret;
        }

        Collection<LabelValueObject> variantLabels = labelService
                .getVariantLabelsByProjectId(variants.iterator().next().getSubject().getProject().getId());
        if (variantLabels.size() > MAX_VARIANT_LABELS) {
            log.warn("There are too many (>" + MAX_VARIANT_LABELS + ") variant labels!");
            return ret;
        }

        // keys: charValue, subjectLabel {group1, group2},
        // value: YesNoCount {subjectCountYes, subjectCountNo}
        Map<String, Map<String, YesNoCount>> counts = new HashMap<>();

        // init map
        for (LabelValueObject l : variantLabels) {
            Map<String, YesNoCount> m = new HashMap<>();
            counts.put(l.getName(), m);
            for (String subjectLabel : labelPatientId.keySet()) {
                m.put(subjectLabel, new YesNoCount());
            }
        }

        // count which subjects has a variant that matches the character name and value
        // TODO could potentially use some optimization?
        for (LabelValueObject l : variantLabels) {
            for (String subjectLabel : labelPatientId.keySet()) {
                for (String patientId : labelPatientId.get(subjectLabel)) {

                    boolean found = subjectHasVariantLabel(patientIdSubjects.get(patientId).getVariants(), l);

                    YesNoCount yesNo = counts.get(l.getName()).get(subjectLabel);

                    if (found) {
                        yesNo.incrementYes();
                    } else {
                        yesNo.incrementNo();
                    }

                }
            }
        }

        // calculate chi-squred tests for each charValue using counts
        for (String charValue : counts.keySet()) {

            YesNoCount grp1 = counts.get(charValue).get(group1.getName());
            YesNoCount grp2 = counts.get(charValue).get(group2.getName());

            String grp1frac = grp1.getYesCount() + "/" + (grp1.getYesCount() + grp1.getNoCount());
            String grp2frac = grp2.getYesCount() + "/" + (grp2.getYesCount() + grp2.getNoCount());

            long[][] d = { { grp1.getYesCount(), grp1.getNoCount() }, { grp2.getYesCount(), grp2.getNoCount() } };

            double pval = new ChiSquareTest().chiSquareTest(d);

            if (Double.isNaN(pval)) {
                log.warn(charValue + "'s p-value is NaN, grp1 " + grp1frac + ", grp2 " + grp2frac);
                continue;
            }

            ret.add(new BurdenAnalysisValueObject(charValue, grp1frac, grp2frac, pval));
        }

        return ret;
    }
}