org.opencb.opencga.storage.mongodb.variant.load.variants.MongoDBVariantMerger.java Source code

Java tutorial

Introduction

Here is the source code for org.opencb.opencga.storage.mongodb.variant.load.variants.MongoDBVariantMerger.java

Source

/*
 * Copyright 2015-2016 OpenCB
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.opencb.opencga.storage.mongodb.variant.load.variants;

import com.mongodb.MongoExecutionTimeoutException;
import org.apache.commons.lang3.StringUtils;
import org.bson.BsonArray;
import org.bson.Document;
import org.bson.conversions.Bson;
import org.bson.types.Binary;
import org.opencb.biodata.models.variant.StudyEntry;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.VariantNormalizer;
import org.opencb.biodata.models.variant.avro.FileEntry;
import org.opencb.biodata.models.variant.avro.VariantType;
import org.opencb.biodata.tools.variant.merge.VariantMerger;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.datastore.core.QueryResult;
import org.opencb.commons.run.ParallelTaskRunner;
import org.opencb.opencga.storage.core.metadata.StudyConfiguration;
import org.opencb.opencga.storage.core.variant.VariantStorageEngine;
import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor;
import org.opencb.opencga.storage.mongodb.variant.converters.DocumentToSamplesConverter;
import org.opencb.opencga.storage.mongodb.variant.converters.DocumentToStudyVariantEntryConverter;
import org.opencb.opencga.storage.mongodb.variant.converters.DocumentToVariantConverter;
import org.opencb.opencga.storage.mongodb.variant.converters.VariantStringIdConverter;
import org.opencb.opencga.storage.mongodb.variant.load.stage.MongoDBVariantStageLoader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.*;
import java.util.stream.Collectors;

import static com.mongodb.client.model.Filters.and;
import static com.mongodb.client.model.Filters.eq;
import static com.mongodb.client.model.Updates.*;
import static org.opencb.opencga.storage.mongodb.variant.MongoDBVariantStorageEngine.MongoDBVariantOptions.DEFAULT_GENOTYPE;
import static org.opencb.opencga.storage.mongodb.variant.converters.DocumentToSamplesConverter.UNKNOWN_GENOTYPE;
import static org.opencb.opencga.storage.mongodb.variant.converters.DocumentToStudyVariantEntryConverter.*;
import static org.opencb.opencga.storage.mongodb.variant.converters.DocumentToVariantConverter.IDS_FIELD;
import static org.opencb.opencga.storage.mongodb.variant.converters.DocumentToVariantConverter.STUDIES_FIELD;
import static org.opencb.opencga.storage.mongodb.variant.load.stage.MongoDBVariantStageLoader.STRING_ID_CONVERTER;
import static org.opencb.opencga.storage.mongodb.variant.load.stage.MongoDBVariantStageLoader.VARIANT_CONVERTER_DEFAULT;

/**
 * Created on 07/04/16.
 *
 * Merges data from the Stage collection into the Variant collection.
 *
 * There are different situations depending on how the data is coming or depending on
 * the current variants configuration.
 *
 * ### Depending on the configuration of the variants
 *
 * Easy scenarios:
 *
 *                +---------------+------+
 *                |     Stage     | Vars |
 *                +-------+-------+------+
 *                | File1 | File2 | Vars |
 *  +-------------+-------+-------+------+
 *  | 1:100:A:C   | DATA  | DATA  | DATA |  <--- A1) Update variants with data from File1 and File2
 *  +-------------+-------+-------+------+
 *  | 1:125:A:C   | DATA  | DATA  | ---- |  <--- A2) New variant
 *  +-------------+-------+-------+------+
 *  | 1:150:G:T   | ----  | ----  | DATA |  <--- A3) Fill Gaps. If new study, skip.
 *  +-------------+-------+-------+------+
 *
 *  Some partial information from the stage collection. Fill gaps
 *  +-------------+-------+-------+------+
 *  | 1:200:A:C   | ----  | DATA  | DATA |  <--- B1) Update variants with data from File2 and missing from File1
 *  +-------------+-------+-------+------+
 *  | 1:225:A:C   | DATA  | ----  | ---- |  <--- B2) New variant with missing information from the File2
 *  +-------------+-------+-------+------+
 *  | 1:250:G:T   | ----  | ----  | DATA |  <--- B3) Missing information. Fill Gaps. If new study, skip.
 *  +-------------+-------+-------+------+
 *
 *  Overlapping variants
 *  +-------------+-------+-------+------+
 *  | 1:300:A:C   | ----  | DATA  | DATA |  <---
 *  +-------------+-------+-------+------+      |- C1) Simple merge variants
 *  | 1:300:A:T   | DATA  | ----  | DATA |  <---
 *  +=============+=======+=======+======+
 *  | 1:400:A:C   | DATA  | DATA  | DATA |  <---
 *  +-------------+-------+-------+------+      |- C2) Simple merge variants. File1 has duplicated information for this variants
 *  | 1:400:A:T   | DATA  | ----  | DATA |  <---   Ideally, variants for File1 have correct secondary alternate. Discard one variant.
 *  +=============+=======+=======+======+
 *  | 1:500:A:C   | ----  | ----  | DATA |  <---
 *  +-------------+-------+-------+------+      |- C3) New overlapped region. Fetch data from the database and merge.
 *  | 1:500:A:T   | DATA  | DATA  | ---- |  <---   Fill gaps for indexed files
 *  +=============+=======+=======+======+
 *  | 1:600:A:C   | ----  | ----  | DATA |  <---
 *  +-------------+-------+-------+------+      |- C4) New overlapped region. Fetch data from the database and merge.
 *  | 1:600:A:T   | DATA  | ----  | ---- |  <---   Fill gaps for indexed files and file2
 *  +=============+=======+=======+======+
 *  | 1:700:A:C   | ----  | ----  | DATA |  <---
 *  +-------------+-------+-------+------+      |- C5) Already existing overlapped region. No extra information. No overlapping variants.
 *  | 1:700:A:T   | ----  | ----  | DATA |  <---   Fill gaps for file1 and file2
 *  +=============+=======+=======+======+
 *
 *  Duplicated variants. Do not load duplicated variants.
 *  +-------------+-------+-------+------+
 *  | 1:200:A:C   | ----  | DATA* | DATA |  <--- D1) Duplicated variants in file 2. Fill gaps for file1 and file2.
 *  +-------------+-------+-------+------+           Skip if new study (nonInserted++)
 *  | 1:250:G:T   | ----  | DATA* | ---- |  <--- D2) Duplicated variants in file 2, missing for the rest.
 *  +-------------+-------+-------+------+           Skip variant! (nonInserted++)
 *  | 1:225:A:C   | DATA  | DATA* | DATA |  <--- D3) Duplicated variants in file 2. Fill gaps for file2 and insert file1.
 *  +-------------+-------+-------+------+
 *  | 1:225:A:C   | DATA  | DATA* | ---- |  <--- D4) Duplicated variants in file 2. Fill gaps for file2 and insert file1.
 *  +-------------+-------+-------+------+
 *
 *
 * ### Depending on how the data is splitted in files, and how the files are sent.
 *
 * The data can came splitted by chromosomes and/or by batches of samples. For the
 * next tables, columns are batches of samples, and the rows are different regions,
 * for example, chromosomes.
 * The cells represents a file. If the file is already merged on the database uses an X,
 * or if it's being loaded right now, an O.
 *
 * In each scenario, must be aware when filling gaps (for new or missing variants). Have to
 * select properly the "indexed files" for each region.
 *
 * X = File loaded and merged
 * O = To be merged
 *
 * a) Merging one file having other merged files from a same sample set and chromosome.
 *        +---+---+---+
 *        |S1 |S2 |S3 |
 * +------+---+---+---+
 * | Chr1 | X | X |   |
 * +------+---+---+---+
 * | Chr2 | X | O |   |
 * +------+---+---+---+
 * | Chr3 | X |   |   |
 * +------+---+---+---+
 * Indexed files = {f_21}
 *
 * a.2) Merging different files from the same sample set.
 *      Must be aware when filling gaps (for new or missing variants).
 *      In this case, merge chromosome by chromosome in different calls.
 *        +---+---+---+
 *        |S1 |S2 |S3 |
 * +------+---+---+---+
 * | Chr1 | X | X | X |
 * +------+---+---+---+
 * | Chr2 | X | X | O |
 * +------+---+---+---+
 * | Chr3 | X | X | O |
 * +------+---+---+---+
 * Indexed files in Chr2 = {f_21, f_22}
 * Indexed files in Chr3 = {f_31, f_32}
 *
 * b) Merging one or more files from the same chromosome
 *    Having the indexed files in this region, no special consideration.
 *        +---+---+---+---+
 *        |S1 |S2 |S3 |S4 |
 * +------+---+---+---+---+
 * | Chr1 | X | X | X | X |
 * +------+---+---+---+---+
 * | Chr2 | X | X | O | O |
 * +------+---+---+---+---+
 * | Chr3 | X |   |   |   |
 * +------+---+---+---+---+
 * Indexed files = {f_21, f_22}
 *
 * c) Mix different regions and Sample sets
 *    Same situation as case a.2
 *        +---+---+---+
 *        |S1 |S2 |S3 |
 * +------+---+---+---+
 * | Chr1 | X | X | O |
 * +------+---+---+---+
 * | Chr2 | X | X |   |
 * +------+---+---+---+
 * | Chr3 | X | O |   |
 * +------+---+---+---+
 * Indexed files in Chr1 = {f_11, f_12}
 * Indexed files in Chr3 = {f_31}
 *
 * c) Mix different regions sizes. A single chromosome and a whole genome.
 *    Do not allow this!
 *        +---+---+---+
 *        |S1 |S2 |S3 |
 * +------+---+---+---+
 * | Chr1 | X | X | O |
 * +------+---+---+ O |
 * | Chr2 | X | O | O |
 * +------+---+---+ O |
 * | Chr3 | X | X | O |
 * +------+---+---+---+
 *
 * @author Jacobo Coll &lt;jacobo167@gmail.com&gt;
 */
public class MongoDBVariantMerger implements ParallelTaskRunner.Task<Document, MongoDBOperations> {

    private final VariantDBAdaptor dbAdaptor;

    /** Study to be merged. */
    private final Integer studyId;
    private final String studyIdStr;
    /** Files to be merged. */
    private final List<Integer> fileIds;
    /** Indexed files in the region that we are merging. */
    private final Set<Integer> indexedFiles;
    /**
     * Check overlapping variants.
     * Only needed when loading more than one file at the same time, or there were other loaded files in the same region
     **/
    private boolean checkOverlappings;
    private final DocumentToVariantConverter variantConverter;
    private final DocumentToStudyVariantEntryConverter studyConverter;
    private final StudyConfiguration studyConfiguration;
    private final boolean excludeGenotypes;
    private final boolean addUnknownGenotypes;

    // Variables that must be aware of concurrent modification
    private final Map<Integer, LinkedHashMap<String, Integer>> samplesPositionMap;
    private final List<Integer> indexedSamples;

    private final Logger logger = LoggerFactory.getLogger(MongoDBVariantMerger.class);
    private final VariantMerger variantMerger;
    private final List<String> format;
    private boolean resume;

    public MongoDBVariantMerger(VariantDBAdaptor dbAdaptor, StudyConfiguration studyConfiguration,
            List<Integer> fileIds, Set<Integer> indexedFiles, boolean resume, boolean ignoreOverlapping) {
        this.dbAdaptor = Objects.requireNonNull(dbAdaptor);
        this.studyConfiguration = Objects.requireNonNull(studyConfiguration);
        this.fileIds = Objects.requireNonNull(fileIds);
        this.indexedFiles = Objects.requireNonNull(indexedFiles);

        excludeGenotypes = getExcludeGenotypes(studyConfiguration);
        format = buildFormat(studyConfiguration);
        indexedSamples = Collections.unmodifiableList(buildIndexedSamplesList(fileIds));
        studyId = studyConfiguration.getStudyId();
        studyIdStr = String.valueOf(studyId);
        String defaultGenotype = studyConfiguration.getAttributes().getString(DEFAULT_GENOTYPE.key(), "");
        if (defaultGenotype.equals(DocumentToSamplesConverter.UNKNOWN_GENOTYPE)) {
            logger.debug("Do not need fill unknown genotype array. DefaultGenotype is UNKNOWN_GENOTYPE({}).",
                    DocumentToSamplesConverter.UNKNOWN_GENOTYPE);
            addUnknownGenotypes = false;
        } else if (excludeGenotypes) {
            logger.debug("Do not need fill unknown genotype array. Excluding genotypes.");
            addUnknownGenotypes = false;
        } else {
            addUnknownGenotypes = true;
        }

        checkOverlappings = !ignoreOverlapping && (fileIds.size() > 1 || !indexedFiles.isEmpty());
        DocumentToSamplesConverter samplesConverter = new DocumentToSamplesConverter(this.studyConfiguration);
        studyConverter = new DocumentToStudyVariantEntryConverter(false, samplesConverter);
        variantConverter = new DocumentToVariantConverter(studyConverter, null);
        samplesPositionMap = new HashMap<>();

        variantMerger = new VariantMerger();

        this.resume = resume;
    }

    @Override
    public List<MongoDBOperations> apply(List<Document> batch) {
        try {
            return Collections.singletonList(merge(batch));
        } catch (Exception e) {
            if (batch.isEmpty()) {
                logger.error("Fail loading empty batch");
            } else {
                logger.error("Fail loading batch from " + batch.get(0).get("_id") + " to "
                        + batch.get(batch.size() - 1).get("_id"));
            }
            throw e;
        }
    }

    public MongoDBOperations merge(List<Document> variants) {

        // Set of operations to be executed in the Database
        MongoDBOperations mongoDBOps = new MongoDBOperations();

        Variant previousVariant = null;
        Document previousDocument = null;
        int start = 0;
        int end = 0;
        String chromosome = null;
        List<Document> overlappedVariants = null;

        Iterator<Document> iterator = variants.iterator();
        // Get first valid variant
        while (iterator.hasNext()) {
            Document document = iterator.next();
            if (document.get(studyIdStr) != null) {
                previousDocument = document;
                previousVariant = STRING_ID_CONVERTER.convertToDataModelType(previousDocument);

                chromosome = previousVariant.getChromosome();
                start = previousVariant.getStart();
                end = getEnd(previousVariant);
                break;
            }
        }

        while (iterator.hasNext()) {
            Document document = iterator.next();
            Variant variant = STRING_ID_CONVERTER.convertToDataModelType(document);
            Document study = document.get(studyIdStr, Document.class);
            if (study != null) {
                if (checkOverlappings && variant.overlapWith(chromosome, start, end, true)) {
                    // If the variant overlaps with the last one, add to the overlappedVariants list.
                    // Do not process any variant yet!
                    if (overlappedVariants == null) {
                        overlappedVariants = new ArrayList<>();
                        overlappedVariants.add(previousDocument);
                    }
                    overlappedVariants.add(document);

                    // Take min start and max end
                    start = Math.min(start, variant.getStart());
                    end = Math.max(end, getEnd(variant));
                } else {
                    // If the current variant does not overlap with the previous variant, we can load the previous variant (or region)
                    processVariants(overlappedVariants, previousDocument, previousVariant, mongoDBOps);
                    overlappedVariants = null;

                    // Reset region
                    chromosome = variant.getChromosome();
                    start = variant.getStart();
                    end = getEnd(variant);

                }

                previousDocument = document;
                previousVariant = variant;
            }
        }

        // Process remaining variants
        processVariants(overlappedVariants, previousDocument, previousVariant, mongoDBOps);

        //        // Execute MongoDB Operations
        //        return executeMongoDBOperations(mongoDBOps);

        return mongoDBOps;
    }

    public void processVariants(List<Document> overlappedVariants, Document document, Variant variant,
            MongoDBOperations mongoDBOps) {
        try {
            if (overlappedVariants != null) {
                for (Document overlappedVariant : overlappedVariants) {
                    if (alreadyProcessedStageDocument(overlappedVariant)) {
                        // Skip this batch if any of the documents is already processed
                        mongoDBOps.setMissingVariantsNoFillGaps(
                                mongoDBOps.getMissingVariantsNoFillGaps() + overlappedVariants.size());
                        return;
                    }
                }
                processOverlappedVariants(overlappedVariants, mongoDBOps);
            } else if (document != null) {
                if (alreadyProcessedStageDocument(document)) {
                    mongoDBOps.setMissingVariantsNoFillGaps(mongoDBOps.getMissingVariantsNoFillGaps() + 1);
                    return;
                }
                processVariant(document, variant, mongoDBOps);
            }
        } catch (Exception e) {
            logger.error("Error processing variant " + variant, e);
            throw e;
        }
    }

    public boolean alreadyProcessedStageDocument(Document overlappedVariant) {
        Document study = overlappedVariant.get(studyIdStr, Document.class);
        for (Integer fileId : fileIds) {
            if (study.containsKey(fileId.toString())) {
                // If any of the files is null, the document is already processed.
                return study.get(fileId.toString()) == null;
            }
        }
        return false;
    }

    public Integer getEnd(Variant variant) {
        //        if (variant.getType().equals(VariantType.SYMBOLIC) || variant.getType().equals(VariantType.NO_VARIATION)) {
        //            return variant.getEnd();
        //        } else {
        //            return variant.getStart() + Math.max(variant.getReference().length() - 1, -1 /* 0 */);
        //        }
        if (EnumSet.of(VariantType.SYMBOLIC, VariantType.CNV).contains(variant.getType())) {
            return variant.getStart();
        } else {
            return variant.getEnd();
        }
    }

    /**
     * Given a document from the stage collection, transforms the document into a set of MongoDB operations.
     *
     * It may be a new variant document in the database, a new study in the document, or just an update of an existing study variant.
     *
     * @param document          Document to load
     * @param emptyVar          Parsed empty variant of the document. Only chr, pos, ref, alt
     * @param mongoDBOps        Set of MongoDB operations to update
     */
    protected void processVariant(Document document, Variant emptyVar, MongoDBOperations mongoDBOps) {
        Document study = document.get(studyIdStr, Document.class);

        // New variant in the study.
        boolean newStudy = isNewStudy(study);
        // New variant in the collection if new variant and document size is 2 {_id, study}
        boolean newVariant = isNewVariant(document, newStudy);

        Set<String> ids = new HashSet<>();
        List<Document> fileDocuments = new LinkedList<>();
        List<Document> alternateDocuments = new LinkedList<>();
        Document gts = new Document();

        // Loop for each file that have to be merged
        int missing = 0;
        int skipped = 0;
        int duplicated = 0;
        for (Integer fileId : fileIds) {

            // Different actions if the file is present or missing in the document.
            if (study.containsKey(fileId.toString())) {
                //Duplicated documents are treated like missing. Increment the number of duplicated variants
                List<Binary> duplicatedVariants = getListFromDocument(study, fileId.toString());
                if (duplicatedVariants.size() > 1) {
                    mongoDBOps.setNonInserted(mongoDBOps.getNonInserted() + duplicatedVariants.size());
                    if (addUnknownGenotypes) {
                        addSampleIdsGenotypes(gts, UNKNOWN_GENOTYPE, getSamplesInFile(fileId));
                    }
                    logDuplicatedVariant(emptyVar, duplicatedVariants.size(), fileId);
                    duplicated++;
                    continue;
                }

                Binary file = duplicatedVariants.get(0);
                Variant variant = VARIANT_CONVERTER_DEFAULT.convertToDataModelType(file);
                if (variant.getType().equals(VariantType.NO_VARIATION)
                        || variant.getType().equals(VariantType.SYMBOLIC)) {
                    mongoDBOps.setSkipped(mongoDBOps.getSkipped() + 1);
                    skipped++;
                    continue;
                }
                if (StringUtils.isNotEmpty(variant.getId()) && !variant.getId().equals(variant.toString())) {
                    ids.add(variant.getId());
                }
                if (variant.getNames() != null) {
                    ids.addAll(variant.getNames());
                }
                emptyVar.setType(variant.getType());
                variant.getStudies().get(0).setSamplesPosition(getSamplesPosition(fileId));
                Document newDocument = studyConverter.convertToStorageType(variant, variant.getStudies().get(0));
                fileDocuments.add((Document) getListFromDocument(newDocument, FILES_FIELD).get(0));
                alternateDocuments = getListFromDocument(newDocument, ALTERNATES_FIELD);

                if (newDocument.containsKey(GENOTYPES_FIELD)) {
                    for (Map.Entry<String, Object> entry : newDocument.get(GENOTYPES_FIELD, Document.class)
                            .entrySet()) {
                        addSampleIdsGenotypes(gts, entry.getKey(), (List<Integer>) entry.getValue());
                    }
                }

            } else if (addUnknownGenotypes) {
                //                logger.debug("File {} not in variant {}", fileId, emptyVar);
                addSampleIdsGenotypes(gts, UNKNOWN_GENOTYPE, getSamplesInFile(fileId));
                missing++;
            }

        }

        if (newStudy && addUnknownGenotypes) {
            //If it is a new variant for the study, add the already loaded samples as UNKNOWN
            addSampleIdsGenotypes(gts, UNKNOWN_GENOTYPE, getIndexedSamples());
        }

        addCleanStageOperations(document, mongoDBOps, newStudy, missing, skipped, duplicated);

        updateMongoDBOperations(emptyVar, new ArrayList<>(ids), fileDocuments, alternateDocuments, gts, newStudy,
                newVariant, mongoDBOps);
    }

    protected void processOverlappedVariants(List<Document> overlappedVariants, MongoDBOperations mongoDBOps) {
        for (Document document : overlappedVariants) {
            try {
                processOverlappedVariants(document, overlappedVariants, mongoDBOps);
            } catch (Exception e) {
                Variant mainVariant = STRING_ID_CONVERTER.convertToDataModelType(document);
                List<Variant> variants = overlappedVariants.stream()
                        .map(STRING_ID_CONVERTER::convertToDataModelType).collect(Collectors.toList());
                logger.error("Error processing variant " + mainVariant + " in overlapped variants " + variants);
                throw e;
            }
        }
    }

    /**
     * Given a list of documents from the stage collection, and one variant from the list of documents,
     * merges into the main variant and transforms into a set of MongoDB operations.
     *
     * It may be a new variant document in the database, a new study in the document, or just an update of an existing study variant.
     *
     * @param mainDocument          Main document to add.
     * @param overlappedVariants    Overlapping documents from Stage collection.
     * @param mongoDBOps            Set of MongoDB operations to update
     */
    protected void processOverlappedVariants(Document mainDocument, List<Document> overlappedVariants,
            MongoDBOperations mongoDBOps) {

        Variant mainVariant = STRING_ID_CONVERTER.convertToDataModelType(mainDocument);

        int variantsWithValidData = getVariantsWithValidData(mainVariant, overlappedVariants);

        Document study = mainDocument.get(studyId.toString(), Document.class);

        // New variant in the study.
        boolean newStudy = isNewStudy(study);
        // New variant in the collection if new variant and document size is 2 {_id, study}
        boolean newVariant = isNewVariant(mainDocument, newStudy);

        // A variant counts as duplicated if is duplicated or missing for all the files.
        int duplicatedVariants = 0;
        List<String> duplicatedVariantsList = new ArrayList<>();
        int duplicatedFiles = 0;
        int missingFiles = 0;
        for (Integer fileId : fileIds) {
            List<Binary> files = getListFromDocument(study, fileId.toString());
            if (files == null || files.isEmpty()) {
                missingFiles++;
            } else if (files.size() > 1) {
                duplicatedVariants += files.size();
                duplicatedFiles++;
                //                        // If there are more than one variant for this file, increment the number of nonInserted variants.
                //                        // Duplicated variant
                logDuplicatedVariant(mainVariant, files.size(), fileId);
                for (Binary binary : files) {
                    Variant duplicatedVariant = VARIANT_CONVERTER_DEFAULT.convertToDataModelType(binary);
                    String call = duplicatedVariant.getStudies().get(0).getFiles().get(0).getCall();
                    if (call == null) {
                        call = duplicatedVariant.toString();
                    }
                    duplicatedVariantsList.add(call);
                }
            }
        }

        addCleanStageOperations(mainDocument, mongoDBOps, newStudy, missingFiles, 0, duplicatedFiles);

        // An overlapping variant will be considered missing if is missing or duplicated for all the files.
        final boolean missingOverlappingVariant;

        if (duplicatedFiles + missingFiles == fileIds.size()) {
            // C3.1), C4.1), C5), B3), D1), D2)
            missingOverlappingVariant = true;
            if (duplicatedFiles > 0) {
                // D1), D2)
                logger.error("Duplicated! " + mainVariant + " " + duplicatedVariantsList);
                mongoDBOps.setNonInserted(mongoDBOps.getNonInserted() + duplicatedVariants);
            }
            // No information for this variant
            if (newStudy) {
                // B3), D1), D2)
                return;
            }
            // else {
            //      Do not skip. Fill gaps.
            //      No new overlapped variants.
            // }

            if (variantsWithValidData != 0) {
                // Scenarios C3.1), C4.1)
                logger.debug("Missing overlapped variant! {}, {}", fileIds, mainVariant);
                mongoDBOps.setOverlappedVariants(mongoDBOps.getOverlappedVariants() + 1);
            }
            // else {
            //      If the files to be loaded where not present in the current variant, there is not overlapped variant.
            //      See scenario C5)
            // }
        } else {
            missingOverlappingVariant = false;
        }

        // Merge documents
        Variant variant = mergeOverlappedVariants(mainVariant, overlappedVariants);

        Document gts = new Document();
        List<Document> fileDocuments = new LinkedList<>();
        List<Document> alternateDocuments = null;
        StudyEntry studyEntry = variant.getStudies().get(0);

        // For all the files that are being indexed
        for (Integer fileId : fileIds) {
            FileEntry file = studyEntry.getFile(fileId.toString());
            if (file == null) {
                file = studyEntry.getFile(String.valueOf(-fileId));
            }
            if (file != null) {
                Document studyDocument = studyConverter.convertToStorageType(variant, studyEntry, file,
                        getSampleNamesInFile(fileId));
                if (studyDocument.containsKey(GENOTYPES_FIELD)) {
                    studyDocument.get(GENOTYPES_FIELD, Document.class).forEach(
                            (gt, sampleIds) -> addSampleIdsGenotypes(gts, gt, (Collection<Integer>) sampleIds));
                }
                fileDocuments.addAll(getListFromDocument(studyDocument, FILES_FIELD));
                alternateDocuments = getListFromDocument(studyDocument, ALTERNATES_FIELD);
            } else if (addUnknownGenotypes) {
                addSampleIdsGenotypes(gts, UNKNOWN_GENOTYPE, getSamplesInFile(fileId));
            }
        }

        // For the rest of the files not indexed, only is this variant is new in this study,
        // add all the already indexed files information, if present in this variant.
        if (newStudy) {
            for (Integer fileId : indexedFiles) {
                FileEntry file = studyEntry.getFile(fileId.toString());
                if (file == null) {
                    file = studyEntry.getFile(String.valueOf(-fileId));
                }
                if (file != null) {
                    Document studyDocument = studyConverter.convertToStorageType(variant, studyEntry, file,
                            getSampleNamesInFile(fileId));
                    if (studyDocument.containsKey(GENOTYPES_FIELD)) {
                        studyDocument.get(GENOTYPES_FIELD, Document.class).forEach(
                                (gt, sampleIds) -> addSampleIdsGenotypes(gts, gt, (Collection<Integer>) sampleIds));
                    }
                    fileDocuments.addAll(getListFromDocument(studyDocument, FILES_FIELD));
                } else if (addUnknownGenotypes) {
                    addSampleIdsGenotypes(gts, UNKNOWN_GENOTYPE, getSamplesInFile(fileId));
                }
            }
        }
        updateMongoDBOperations(mainVariant, variant.getIds(), fileDocuments, alternateDocuments, gts, newStudy,
                newVariant, mongoDBOps);

    }

    private void addCleanStageOperations(Document document, MongoDBOperations mongoDBOps, boolean newStudy,
            int missing, int skipped, int duplicated) {
        if (newStudy && duplicated > 0 && (missing + skipped + duplicated) == fileIds.size()) {
            //            System.out.println("duplicated: document.getString(\"_id\") = " + document.getString("_id"));
            mongoDBOps.getDocumentsToCleanStudies().add(document.getString("_id"));
        } else {
            if (missing != fileIds.size()) {
                mongoDBOps.getDocumentsToCleanFiles().add(document.getString("_id"));
            } // else {
              //     logger.debug("Nothing to clean in variant " + document.getString("_id") + " , " + fileIds);
              // }
        }
    }

    private void logDuplicatedVariant(Variant variant, int numDuplicates, Integer fileId) {
        logger.warn("Found {} duplicated variants for file {} in variant {}.", numDuplicates, fileId, variant);
    }

    /**
     * Given a collection of documents from the stage collection, returns the number of documents (variants) with valid data.
     * i.e. : At least one file not duplicated with information
     *
     * @param mainVariant Main variant. Only valid data if overlaps with the main variant.
     * @param documents Variants from the stage collection
     * @return  Number of variants with valid data.
     */
    private int getVariantsWithValidData(Variant mainVariant, Collection<Document> documents) {
        int variantsWithValidData = 0;
        for (Document document : documents) {
            if (!mainVariant.overlapWith(STRING_ID_CONVERTER.convertToDataModelType(document), true)) {
                continue;
            }
            Document study = document.get(studyIdStr, Document.class);
            boolean existingFiles = false;
            for (Integer fileId : fileIds) {
                List<Binary> files = getListFromDocument(study, fileId.toString());
                if (files != null && files.size() == 1) {
                    existingFiles = true;
                    break;
                }
            }
            if (existingFiles) {
                variantsWithValidData++;
            }
        }
        return variantsWithValidData;
    }

    /**
     * Given a list of overlapped documents from the stage collection, merge resolving the overlapping positions.
     *
     * If there are any conflict with overlapped positions, will try to select always the mainVariant.
     *
     * @see {@link VariantMerger}
     *
     * @param mainVariant           Main variant to resolve conflicts.
     * @param overlappedVariants    Overlapping documents from Stage collection.
     * @return  For each document, its corresponding merged variant
     */
    protected Variant mergeOverlappedVariants(Variant mainVariant, List<Document> overlappedVariants) {
        //        System.out.println("--------------------------------");
        //        System.out.println("Overlapped region = " + overlappedVariants
        //                .stream()
        //                .map(doc -> STRING_ID_CONVERTER.convertToDataModelType(doc.getString("_id")))
        //                .collect(Collectors.toList()));

        // The overlapping region will be new if any of the variants is new for the study
        boolean newOverlappingRegion = false;
        // The overlapping region will be completely new if ALL the variants are new for the study
        boolean completelyNewOverlappingRegion = true;

        Map<Integer, List<Variant>> variantsPerFile = new HashMap<>();
        for (Integer fileId : fileIds) {
            variantsPerFile.put(fileId, new LinkedList<>());
        }

        Variant mainVariantNew = null;
        List<Variant> variants = new ArrayList<>(overlappedVariants.size());
        List<Boolean> newStudies = new ArrayList<>(overlappedVariants.size());

        // For each variant, create an empty variant that will be filled by the VariantMerger
        for (Document document : overlappedVariants) {
            Variant var = STRING_ID_CONVERTER.convertToDataModelType(document);
            if (!mainVariant.overlapWith(var, true)) {
                // Skip those variants that do not overlap with the given main variant
                continue;
            }

            Document study = document.get(studyIdStr, Document.class);

            // New variant in the study.
            boolean newStudy = isNewStudy(study);
            newStudies.add(newStudy);
            // Its a new OverlappingRegion if at least one variant is new in this study
            newOverlappingRegion |= newStudy;
            // Its a completely new OverlappingRegion if all the variants are new in this study
            completelyNewOverlappingRegion &= newStudy;

            variants.add(var);

            if (sameVariant(var, mainVariant)) {
                mainVariantNew = var;
                StudyEntry se = new StudyEntry(studyId.toString(), new LinkedList<>(), format);
                se.setSamplesPosition(new HashMap<>());
                var.addStudyEntry(se);
            }
            HashSet<String> ids = new HashSet<>();
            for (Integer fileId : fileIds) {
                List<Binary> files = getListFromDocument(study, fileId.toString());
                if (files != null && files.size() == 1) {
                    // If there is only one variant for this file, add to the map variantsPerFile
                    Variant variant = VARIANT_CONVERTER_DEFAULT.convertToDataModelType(files.get(0));
                    variant.getStudies().get(0).setSamplesPosition(getSamplesPosition(fileId));
                    variantsPerFile.get(fileId).add(variant);
                    ids.addAll(variant.getIds());
                }
            }
            var.setIds(new ArrayList<>(ids));
        }

        if (mainVariantNew == null) {
            // This should never happen
            throw new IllegalStateException("Main variant was not one of the variants to merge");
        }

        List<Integer> overlappingFiles = new ArrayList<>();
        List<Variant> variantsToMerge = new LinkedList<>();
        for (Integer fileId : fileIds) {
            List<Variant> variantsInFile = variantsPerFile.get(fileId);
            switch (variantsInFile.size()) {
            case 0:
                break;
            case 1:
                variantsToMerge.add(variantsInFile.get(0));
                if (!sameVariant(variantsInFile.get(0), mainVariant)) {
                    overlappingFiles.add(fileId);
                }
                break;
            default:
                // If there are overlapping variants, select the mainVariant if possible.
                Variant var = null;
                for (Variant variant : variantsInFile) {
                    if (sameVariant(variant, mainVariant)) {
                        var = variant;
                    }
                }
                // If not found, get the first
                if (var == null) {
                    var = variantsInFile.get(0);
                    overlappingFiles.add(fileId);
                    //                        logger.info("Variant " + mainVariant + " not found in " + variantsInFile);
                }
                variantsToMerge.add(var);

                // Get the original call from the first variant
                String call = var.getStudies().get(0).getFiles().get(0).getCall();
                if (call != null) {
                    if (call.isEmpty()) {
                        call = null;
                    } else {
                        call = call.substring(0, call.lastIndexOf(':'));
                    }
                }

                // Do not prompt overlapping variants if genotypes are being excluded
                if (!excludeGenotypes) {
                    boolean prompted = false;
                    for (int i = 1; i < variantsInFile.size(); i++) {
                        Variant auxVar = variantsInFile.get(i);
                        // Check if variants where part of the same multiallelic variant
                        String auxCall = auxVar.getStudies().get(0).getFiles().get(0).getCall();
                        if (!prompted && (auxCall == null || call == null || !auxCall.startsWith(call))) {
                            logger.warn("Overlapping variants in file {} : {}", fileId, variantsInFile);
                            prompted = true;
                        }
                        //                        // Those variants that do not overlap with the selected variant won't be inserted
                        //                        if (!auxVar.overlapWith(var, true)) {
                        //                            mongoDBOps.nonInserted++;
                        //                            logger.warn("Skipping overlapped variant " + auxVar);
                        //                        }
                    }
                }
                break;
            }
        }

        /*
         * If is a new overlapping region and there are some file already indexed
         * Fetch the information from the database regarding the loaded variants of this region.
         *
         *      +---+---+---+---+
         *      | A | B | C | D |
         * +----+---+---+---+---+
         * | V1 | X | X |   |   |
         * +----+---+---+---+---+
         * | V2 | X | X |   | X |
         * +----+---+---+---+---+
         * | V3 |   |   | X |   |
         * +----+---+---+---+---+
         *
         * - Files A and B are loaded
         * - Files C and D are being loaded
         * - Variants V1,V2,V3 are overlapping
         *
         * In order to merge the data properly, we need to get from the server the information about
         * the variants {V1, V2} for the files {A, B}.
         *
         * Because the variants {V1, V2} are already loaded, the information that we need is duplicated
         * in both variants, so we only need to get one of them.
         *
         */
        if (!completelyNewOverlappingRegion && newOverlappingRegion && !indexedFiles.isEmpty()) {
            int i = 0;
            for (Variant variant : variants) {
                // If the variant is not new in this study, query to the database for the loaded info.
                if (!newStudies.get(i)) {
                    QueryResult<Variant> queryResult = fetchVariant(variant);
                    if (queryResult.getResult().size() == 1 && queryResult.first().getStudies().size() == 1) {
                        // Check if overlapping variant. If so, invert!
                        for (FileEntry fileEntry : queryResult.first().getStudies().get(0).getFiles()) {
                            boolean empty = StringUtils.isEmpty(fileEntry.getCall());
                            if (empty && !sameVariant(mainVariant, queryResult.first())
                                    || !empty && !sameVariant(mainVariant, fileEntry.getCall())) {
                                markAsOverlapped(fileEntry);
                            } else {
                                markAsNonOverlapped(fileEntry);
                            }
                        }
                        variantsToMerge.add(queryResult.first());
                    } else {
                        if (queryResult.getResult().isEmpty()) {
                            throw new IllegalStateException("Variant " + variant + " not found!");
                        } else {
                            throw new IllegalStateException(
                                    "Variant " + variant + " found wrong! : " + queryResult.getResult());
                        }
                    }
                    // Because the loaded variants were an overlapped region, all the information required is in every variant.
                    // Fetch only one variant
                    break;
                }
                i++;
            }
        }

        // Finally, merge variants
        variantMerger.merge(mainVariantNew, variantsToMerge);

        if (!overlappingFiles.isEmpty()) {
            for (FileEntry fileEntry : mainVariantNew.getStudies().get(0).getFiles()) {
                int fileId = Integer.parseInt(fileEntry.getFileId());
                if (overlappingFiles.contains(fileId)) {
                    markAsOverlapped(fileEntry);
                }
            }
        }

        return mainVariantNew;
    }

    private void markAsOverlapped(FileEntry fileEntry) {
        int fid = Integer.parseInt(fileEntry.getFileId());
        if (fid > 0) {
            fileEntry.setFileId(String.valueOf(-fid));
        }
    }

    private void markAsNonOverlapped(FileEntry fileEntry) {
        int fid = Integer.parseInt(fileEntry.getFileId());
        if (fid < 0) {
            fileEntry.setFileId(String.valueOf(-fid));
        }
    }

    /**
     * Reads the given variant from the 'variants' collection.
     *
     * It may happen that, 3s of default timeout, is not enough if there is a
     * lot of writes at the same time in the "variants" collection. Also add a
     * retry, just in case.
     * @param variant Variant to read
     * @return  Query result of the query
     */
    private QueryResult<Variant> fetchVariant(Variant variant) {
        QueryResult<Variant> queryResult = null;
        int maxNumFails = 2;
        int fails = 0;
        while (queryResult == null) {
            try {
                queryResult = dbAdaptor.get(
                        new Query().append(VariantDBAdaptor.VariantQueryParams.ID.key(), variant.toString())
                                .append(VariantDBAdaptor.VariantQueryParams.UNKNOWN_GENOTYPE.key(), ".")
                                .append(VariantDBAdaptor.VariantQueryParams.RETURNED_STUDIES.key(), studyId),
                        new QueryOptions(QueryOptions.TIMEOUT, 30_000));
            } catch (MongoExecutionTimeoutException e) {
                fails++;
                if (fails < maxNumFails) {
                    logger.warn("Got timeout exception reading variants. Retry!", e);
                } else {
                    throw e;
                }
            }
        }
        return queryResult;
    }

    /**
     * Transform the set of genotypes and file objects into a set of mongodb operations.
     *
     * @param emptyVar            Parsed empty variant of the document. Only chr, pos, ref, alt
     * @param ids                 Variant identifiers seen for this variant
     * @param fileDocuments       List of files to be updated
     * @param secondaryAlternates SecondaryAlternates documents.
     * @param gts                 Set of genotypes to be updates
     * @param newStudy            If the variant is new for this study
     * @param newVariant          If the variant was never seen in the database
     * @param mongoDBOps          Set of MongoBD operations to update
     */
    protected void updateMongoDBOperations(Variant emptyVar, List<String> ids, List<Document> fileDocuments,
            List<Document> secondaryAlternates, Document gts, boolean newStudy, boolean newVariant,
            MongoDBOperations mongoDBOps) {
        if (newStudy) {
            // If there where no files and the study is new, do not add a new study.
            // It may happen if all the files in the variant where duplicated for this variant.
            if (!fileDocuments.isEmpty()) {
                Document studyDocument = new Document(STUDYID_FIELD, studyId).append(FILES_FIELD, fileDocuments);

                if (!excludeGenotypes) {
                    studyDocument.append(GENOTYPES_FIELD, gts);
                }

                if (secondaryAlternates != null && !secondaryAlternates.isEmpty()) {
                    studyDocument.append(ALTERNATES_FIELD, secondaryAlternates);
                }

                final String id;
                List<Bson> updates = new ArrayList<>();
                updates.add(push(STUDIES_FIELD, studyDocument));
                if (newVariant) {
                    Document variantDocument = variantConverter.convertToStorageType(emptyVar);
                    updates.add(addEachToSet(IDS_FIELD, ids));
                    for (Map.Entry<String, Object> entry : variantDocument.entrySet()) {
                        if (!entry.getKey().equals("_id") && !entry.getKey().equals(STUDIES_FIELD)
                                && !entry.getKey().equals(IDS_FIELD)) {
                            Object value = entry.getValue();
                            if (value instanceof List) {
                                updates.add(setOnInsert(entry.getKey(), new BsonArray(((List) value))));
                            } else {
                                updates.add(setOnInsert(entry.getKey(), value));
                            }
                        }
                    }
                    mongoDBOps.getNewStudy().getVariants().add(variantDocument);
                    id = variantDocument.getString("_id");
                } else {
                    id = variantConverter.buildStorageId(emptyVar);
                }
                mongoDBOps.getNewStudy().getIds().add(id);
                mongoDBOps.getNewStudy().getQueries().add(eq("_id", id));
                mongoDBOps.getNewStudy().getUpdates().add(combine(updates));
            }
        } else {
            String id = variantConverter.buildStorageId(emptyVar);
            List<Bson> mergeUpdates = new LinkedList<>();
            if (!ids.isEmpty()) {
                mergeUpdates.add(addEachToSet(IDS_FIELD, ids));
            }

            if (!excludeGenotypes) {
                for (String gt : gts.keySet()) {
                    List sampleIds = getListFromDocument(gts, gt);
                    if (resume) {
                        mergeUpdates
                                .add(addEachToSet(STUDIES_FIELD + ".$." + GENOTYPES_FIELD + '.' + gt, sampleIds));
                    } else {
                        mergeUpdates.add(pushEach(STUDIES_FIELD + ".$." + GENOTYPES_FIELD + '.' + gt, sampleIds));
                    }
                }
            }
            if (secondaryAlternates != null && !secondaryAlternates.isEmpty()) {
                mergeUpdates.add(addEachToSet(STUDIES_FIELD + ".$." + ALTERNATES_FIELD, secondaryAlternates));
            }

            if (!fileDocuments.isEmpty()) {
                mongoDBOps.getExistingStudy().getIds().add(id);
                mongoDBOps.getExistingStudy().getQueries()
                        .add(and(eq("_id", id), eq(STUDIES_FIELD + '.' + STUDYID_FIELD, studyId)));

                if (resume) {
                    mergeUpdates.add(addEachToSet(STUDIES_FIELD + ".$." + FILES_FIELD, fileDocuments));
                } else {
                    mergeUpdates.add(pushEach(STUDIES_FIELD + ".$." + FILES_FIELD, fileDocuments));
                }
                mongoDBOps.getExistingStudy().getUpdates().add(combine(mergeUpdates));
            } else if (!mergeUpdates.isEmpty()) {
                // These files are not present in this variant. Increase the number of missing variants.
                mongoDBOps.setMissingVariants(mongoDBOps.getMissingVariants() + 1);
                mongoDBOps.getExistingStudy().getIds().add(id);
                mongoDBOps.getExistingStudy().getQueries()
                        .add(and(eq("_id", id), eq(STUDIES_FIELD + '.' + STUDYID_FIELD, studyId)));
                mongoDBOps.getExistingStudy().getUpdates().add(combine(mergeUpdates));
            } else {
                mongoDBOps.setMissingVariantsNoFillGaps(mongoDBOps.getMissingVariantsNoFillGaps() + 1);
            }
        }
    }

    /**
     * Is a new variant for the study depending on the value of the field {@link MongoDBVariantStageLoader#NEW_STUDY_FIELD}.
     * @param study Study object
     * @return      If this is the first time that the variant has been seen in this study.
     */
    public static boolean isNewStudy(Document study) {
        return study.getBoolean(MongoDBVariantStageLoader.NEW_STUDY_FIELD,
                MongoDBVariantStageLoader.NEW_STUDY_DEFAULT);
    }

    public static boolean isNewVariant(Document document, boolean newStudy) {
        // If the document has only the study, _id, end, ref and alt fields.
        if (!newStudy || document.size() != 5) {
            for (Map.Entry<String, Object> entry : document.entrySet()) {
                if (!entry.getKey().equals(VariantStringIdConverter.ID_FIELD)
                        && !entry.getKey().equals(VariantStringIdConverter.END_FIELD)
                        && !entry.getKey().equals(VariantStringIdConverter.REF_FIELD)
                        && !entry.getKey().equals(VariantStringIdConverter.ALT_FIELD)) {
                    if (!isNewStudy((Document) entry.getValue())) {
                        return false;
                    }
                }
            }
        }
        return true;
    }

    private boolean sameVariant(Variant variant, Variant other) {
        return variant.getChromosome().equals(other.getChromosome()) && variant.getStart().equals(other.getStart())
                && variant.getReference().equals(other.getReference())
                && variant.getAlternate().equals(other.getAlternate());
    }

    private boolean sameVariant(Variant variant, String call) {
        String[] split = call.split(":", -1);
        List<VariantNormalizer.VariantKeyFields> normalized = new VariantNormalizer().normalize(
                variant.getChromosome(), Integer.parseInt(split[0]), split[1], Arrays.asList(split[2].split(",")));
        for (VariantNormalizer.VariantKeyFields variantKeyFields : normalized) {
            if (variantKeyFields.getStart() == variant.getStart()
                    && variantKeyFields.getReference().equals(variant.getReference())
                    && variantKeyFields.getAlternate().equals(variant.getAlternate())) {
                return true;
            }
        }
        return false;
    }

    protected void addSampleIdsGenotypes(Document gts, String genotype, Collection<Integer> sampleIds) {
        if (sampleIds.isEmpty()) {
            return;
        }
        if (gts.containsKey(genotype)) {
            getListFromDocument(gts, genotype).addAll(sampleIds);
        } else {
            gts.put(genotype, new LinkedList<>(sampleIds));
        }
    }

    @SuppressWarnings("unchecked")
    private <T> List<T> getListFromDocument(Document document, String key) {
        return document.get(key, List.class);
    }

    protected List<Integer> getIndexedSamples() {
        return indexedSamples;
    }

    private List<Integer> buildIndexedSamplesList(List<Integer> fileIds) {
        List<Integer> indexedSamples = new LinkedList<>(
                StudyConfiguration.getIndexedSamples(studyConfiguration).values());
        for (Integer fileId : fileIds) {
            indexedSamples.removeAll(getSamplesInFile(fileId));
        }
        indexedSamples.sort(Integer::compareTo);
        return indexedSamples;
    }

    protected LinkedHashSet<Integer> getSamplesInFile(Integer fileId) {
        return studyConfiguration.getSamplesInFiles().get(fileId);
    }

    protected LinkedHashSet<String> getSampleNamesInFile(Integer fileId) {
        LinkedHashSet<String> samples = new LinkedHashSet<>();
        getSamplesInFile(fileId).forEach(sampleId -> {
            samples.add(studyConfiguration.getSampleIds().inverse().get(sampleId));
        });
        return samples;
    }

    protected LinkedHashMap<String, Integer> getSamplesPosition(Integer fileId) {
        if (!samplesPositionMap.containsKey(fileId)) {
            synchronized (samplesPositionMap) {
                if (!samplesPositionMap.containsKey(fileId)) {
                    LinkedHashMap<String, Integer> samplesPosition = new LinkedHashMap<>();
                    for (Integer sampleId : studyConfiguration.getSamplesInFiles().get(fileId)) {
                        samplesPosition.put(studyConfiguration.getSampleIds().inverse().get(sampleId),
                                samplesPosition.size());
                    }
                    samplesPositionMap.put(fileId, samplesPosition);
                }
            }
        }
        return samplesPositionMap.get(fileId);
    }

    public List<String> buildFormat(StudyConfiguration studyConfiguration) {
        List<String> format = new LinkedList<>();
        if (!excludeGenotypes) {
            format.add(VariantMerger.GT_KEY);
        }
        format.addAll(studyConfiguration.getAttributes()
                .getAsStringList(VariantStorageEngine.Options.EXTRA_GENOTYPE_FIELDS.key()));
        return format;
    }

    public boolean getExcludeGenotypes(StudyConfiguration studyConfiguration) {
        return studyConfiguration.getAttributes().getBoolean(VariantStorageEngine.Options.EXCLUDE_GENOTYPES.key(),
                VariantStorageEngine.Options.EXCLUDE_GENOTYPES.defaultValue());
    }
}