com.epam.catgenome.manager.FeatureIndexManager.java Source code

Java tutorial

Introduction

Here is the source code for com.epam.catgenome.manager.FeatureIndexManager.java

Source

/*
 * MIT License
 *
 * Copyright (c) 2016 EPAM Systems
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

package com.epam.catgenome.manager;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.UUID;
import java.util.stream.Collectors;

import com.epam.catgenome.manager.bed.BedManager;
import htsjdk.variant.vcf.VCFHeaderLineCount;
import org.apache.commons.lang3.math.NumberUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.commons.math3.util.MathUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.springframework.util.Assert;
import org.springframework.util.CollectionUtils;

import com.epam.catgenome.component.MessageHelper;
import com.epam.catgenome.constant.MessagesConstants;
import com.epam.catgenome.dao.index.FeatureIndexDao;
import com.epam.catgenome.entity.BaseEntity;
import com.epam.catgenome.entity.BiologicalDataItemFormat;
import com.epam.catgenome.entity.FeatureFile;
import com.epam.catgenome.entity.gene.Gene;
import com.epam.catgenome.entity.gene.GeneFile;
import com.epam.catgenome.entity.gene.GeneFileType;
import com.epam.catgenome.entity.index.FeatureIndexEntry;
import com.epam.catgenome.entity.index.FeatureType;
import com.epam.catgenome.entity.index.Group;
import com.epam.catgenome.entity.index.IndexSearchResult;
import com.epam.catgenome.entity.index.VcfIndexEntry;
import com.epam.catgenome.entity.project.Project;
import com.epam.catgenome.entity.reference.Chromosome;
import com.epam.catgenome.entity.reference.Reference;
import com.epam.catgenome.entity.track.Track;
import com.epam.catgenome.entity.vcf.InfoItem;
import com.epam.catgenome.entity.vcf.OrganismType;
import com.epam.catgenome.entity.vcf.Variation;
import com.epam.catgenome.entity.vcf.VariationType;
import com.epam.catgenome.entity.vcf.VcfFile;
import com.epam.catgenome.entity.vcf.VcfFilterForm;
import com.epam.catgenome.entity.vcf.VcfFilterInfo;
import com.epam.catgenome.exception.FeatureIndexException;
import com.epam.catgenome.exception.GeneReadingException;
import com.epam.catgenome.manager.gene.GeneFileManager;
import com.epam.catgenome.manager.gene.GeneUtils;
import com.epam.catgenome.manager.gene.GffManager;
import com.epam.catgenome.manager.gene.parser.GeneFeature;
import com.epam.catgenome.manager.gene.reader.AbstractGeneReader;
import com.epam.catgenome.manager.project.ProjectManager;
import com.epam.catgenome.manager.reference.BookmarkManager;
import com.epam.catgenome.manager.reference.ReferenceGenomeManager;
import com.epam.catgenome.manager.vcf.VcfFileManager;
import com.epam.catgenome.manager.vcf.VcfManager;
import com.epam.catgenome.manager.vcf.reader.VcfFileReader;
import com.epam.catgenome.util.NggbIntervalTreeMap;
import com.epam.catgenome.util.Utils;
import com.epam.catgenome.entity.bed.BedFile;
import htsjdk.tribble.bed.BEDFeature;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.Interval;
import htsjdk.tribble.AbstractFeatureReader;
import htsjdk.tribble.Feature;
import htsjdk.tribble.FeatureReader;
import htsjdk.tribble.readers.LineIterator;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFCompoundHeaderLine;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLineType;

/**
 * Source:      VcfIndexManager
 * Created:     28.04.16, 18:01
 * Project:     CATGenome Browser
 * Make:        IntelliJ IDEA 14.1.4, JDK 1.8
 *
 * <p>
 * A service class, that contains logic, connected with feature indexes. They are used for fast feature
 * search (variations, genes) by various criteria.
 * </p>
 */
@Service
public class FeatureIndexManager {
    private static final Logger LOGGER = LoggerFactory.getLogger(FeatureIndexManager.class);
    private static final String VCF_FILE_IDS_FIELD = "vcfFileIds";

    @Autowired
    private FileManager fileManager;

    @Autowired
    private ReferenceGenomeManager referenceGenomeManager;

    @Autowired
    private GffManager gffManager;

    @Autowired
    private VcfFileManager vcfFileManager;

    @Autowired
    private VcfManager vcfManager;

    @Autowired
    private ProjectManager projectManager;

    @Autowired
    private FeatureIndexDao featureIndexDao;

    @Autowired
    private GeneFileManager geneFileManager;

    @Autowired
    private BedManager bedManager;

    @Autowired
    private BookmarkManager bookmarkManager;

    @Value("#{catgenome['search.features.max.results'] ?: 100}")
    private Integer maxFeatureSearchResultsCount;

    /**
     * Deletes features from specified feature files from project's index
     *
     * @param projectId a project to delete index entries
     * @param fileIds files, which entries to delete
     */
    public void deleteFromIndexByFileId(final long projectId, List<Pair<FeatureType, Long>> fileIds) {
        featureIndexDao.deleteFromIndexByFileId(projectId, fileIds);
    }

    /**
     * Searches gene IDs, affected by variations in specified VCF files in a specified project
     *
     * @param gene a prefix of a gene ID to search
     * @param vcfFileIds a {@code List} of IDs of VCF files in project to search for gene IDs
     * @return a {@code Set} of gene IDs, that are affected by some variations in specified VCf files
     * @throws IOException
     */
    public Set<String> searchGenesInVcfFilesInProject(long projectId, String gene, List<Long> vcfFileIds)
            throws IOException {
        Project project = projectManager.loadProject(projectId);
        List<VcfFile> vcfFiles = project.getItems().stream()
                .filter(i -> i.getBioDataItem().getFormat() == BiologicalDataItemFormat.VCF)
                .map(i -> (VcfFile) i.getBioDataItem()).collect(Collectors.toList());

        List<VcfFile> selectedVcfFiles;
        if (CollectionUtils.isEmpty(vcfFileIds)) {
            selectedVcfFiles = vcfFiles;
        } else {
            selectedVcfFiles = vcfFiles.stream().filter(f -> vcfFileIds.contains(f.getId()))
                    .collect(Collectors.toList());
        }

        return featureIndexDao.searchGenesInVcfFiles(gene, selectedVcfFiles);
    }

    /**
     * Searches gene IDs, affected by variations in specified VCF files in a specified project
     *
     * @param gene a prefix of a gene ID to search
     * @param vcfFileIds a {@code List} of IDs of VCF files in project to search for gene IDs
     * @return a {@code Set} of gene IDs, that are affected by some variations in specified VCf files
     * @throws IOException if something goes wrong with file system
     */
    public Set<String> searchGenesInVcfFiles(String gene, List<Long> vcfFileIds) throws IOException {
        List<VcfFile> vcfFiles = vcfFileManager.loadVcfFiles(vcfFileIds);
        return featureIndexDao.searchGenesInVcfFiles(gene, vcfFiles);
    }

    /**
     * Filer chromosomes that contain variations, specified by filter
     *
     * @param filterForm a {@code VcfFilterForm} to filter out chromosomes
     * @param projectId a {@code Project}s ID to filter
     * @return a {@code List} of {@code Chromosome} that corresponds to specified filter
     * @throws IOException
     */
    public List<Chromosome> filterChromosomes(VcfFilterForm filterForm, long projectId) throws IOException {
        Assert.isTrue(filterForm.getVcfFileIds() != null && !filterForm.getVcfFileIds().isEmpty(),
                MessageHelper.getMessage(MessagesConstants.ERROR_NULL_PARAM, VCF_FILE_IDS_FIELD));
        Project project = projectManager.loadProject(projectId);
        List<VcfFile> vcfFiles = project.getItems().stream()
                .filter(i -> i.getBioDataItem().getFormat() == BiologicalDataItemFormat.VCF)
                .map(i -> (VcfFile) i.getBioDataItem()).collect(Collectors.toList());

        List<Chromosome> chromosomes = referenceGenomeManager.loadChromosomes(vcfFiles.get(0).getReferenceId());
        Map<Long, Chromosome> chromosomeMap = chromosomes.parallelStream()
                .collect(Collectors.toMap(BaseEntity::getId, chromosome -> chromosome));

        List<Long> chromosomeIds = featureIndexDao.getChromosomeIdsWhereVariationsPresentFacet(vcfFiles,
                filterForm.computeQuery(FeatureType.VARIATION));

        return chromosomeIds.stream().map(chromosomeMap::get).collect(Collectors.toList());
    }

    /**
     * Filer chromosomes that contain variations, specified by filter
     *
     * @param filterForm a {@code VcfFilterForm} to filter out chromosomes
     * @return a {@code List} of {@code Chromosome} that corresponds to specified filter
     * @throws IOException
     */
    public List<Chromosome> filterChromosomes(VcfFilterForm filterForm) throws IOException {
        Assert.isTrue(filterForm.getVcfFileIds() != null && !filterForm.getVcfFileIds().isEmpty(),
                MessageHelper.getMessage(MessagesConstants.ERROR_NULL_PARAM, VCF_FILE_IDS_FIELD));
        List<VcfFile> vcfFiles = vcfFileManager.loadVcfFiles(filterForm.getVcfFileIds());

        List<Chromosome> chromosomes = referenceGenomeManager.loadChromosomes(vcfFiles.get(0).getReferenceId());
        Map<Long, Chromosome> chromosomeMap = chromosomes.parallelStream()
                .collect(Collectors.toMap(BaseEntity::getId, chromosome -> chromosome));

        List<Long> chromosomeIds = featureIndexDao.getChromosomeIdsWhereVariationsPresentFacet(vcfFiles,
                filterForm.computeQuery(FeatureType.VARIATION));

        return chromosomeIds.stream().map(chromosomeMap::get).collect(Collectors.toList());
    }

    /**
     * Filter variations in a feature index for a project, specified by ID
     *
     * @param filterForm {@code VcfFilterForm}, setting filter options
     * @param projectId  an ID of a project, which index to work with
     * @return a {@code List} of {@code VcfIndexEntry}, representing variations that satisfy the filter
     * @throws IOException
     */
    public IndexSearchResult<VcfIndexEntry> filterVariations(VcfFilterForm filterForm, long projectId)
            throws IOException {
        Project project = projectManager.loadProject(projectId);
        List<VcfFile> files = project.getItems().stream()
                .filter(i -> i.getBioDataItem().getFormat() == BiologicalDataItemFormat.VCF)
                .map(i -> (VcfFile) i.getBioDataItem()).collect(Collectors.toList());
        if (filterForm.getPage() != null && filterForm.getPageSize() != null) {
            IndexSearchResult<VcfIndexEntry> res = featureIndexDao.searchFileIndexesPaging(files,
                    filterForm.computeQuery(FeatureType.VARIATION), filterForm.getInfoFields(),
                    filterForm.getPage(), filterForm.getPageSize(), filterForm.getOrderBy());
            res.setTotalPagesCount(getTotalPagesCount(filterForm, projectId));
            return res;
        } else {
            IndexSearchResult<VcfIndexEntry> res = featureIndexDao.searchFileIndexes(files,
                    filterForm.computeQuery(FeatureType.VARIATION), filterForm.getInfoFields(), null, null);
            res.setExceedsLimit(false);
            return res;
        }
    }

    public int getTotalPagesCount(VcfFilterForm filterForm) throws IOException {
        if (filterForm.getPageSize() == null) {
            throw new IllegalArgumentException("No page size is specified");
        }

        List<VcfFile> files = vcfFileManager.loadVcfFiles(filterForm.getVcfFileIds());
        int totalCount = featureIndexDao.getTotalVariationsCountFacet(files,
                filterForm.computeQuery(FeatureType.VARIATION));
        return (int) Math.ceil(totalCount / filterForm.getPageSize().doubleValue());
    }

    public int getTotalPagesCount(VcfFilterForm filterForm, long projectId) throws IOException {
        if (filterForm.getPageSize() == null) {
            throw new IllegalArgumentException("No page size is specified");
        }

        Project project = projectManager.loadProject(projectId);
        List<VcfFile> files = project.getItems().stream()
                .filter(i -> i.getBioDataItem().getFormat() == BiologicalDataItemFormat.VCF)
                .map(i -> (VcfFile) i.getBioDataItem()).collect(Collectors.toList());
        int totalCount = featureIndexDao.getTotalVariationsCountFacet(files,
                filterForm.computeQuery(FeatureType.VARIATION));
        return (int) Math.ceil(totalCount / filterForm.getPageSize().doubleValue());
    }

    /**
     * Groups variations from specified {@link List} of {@link VcfFile}s by specified field
     * @param filterForm {@code VcfFilterForm}, setting filter options
     * @param projectId a {@code Project}s ID to filter
     * @param groupByField a field to perform grouping
     * @return a {@link List} of {@link Pair}s, mapping field value to number of variations, having this value
     * @throws IOException if something goes wrong with the file system
     */
    public List<Group> groupVariations(VcfFilterForm filterForm, long projectId, String groupByField)
            throws IOException {
        Project project = projectManager.loadProject(projectId);
        List<VcfFile> files = project.getItems().stream()
                .filter(i -> i.getBioDataItem().getFormat() == BiologicalDataItemFormat.VCF)
                .map(i -> (VcfFile) i.getBioDataItem()).collect(Collectors.toList());
        return featureIndexDao.groupVariations(files, filterForm.computeQuery(FeatureType.VARIATION), groupByField);
    }

    /**
     * Groups variations from specified {@link List} of {@link VcfFile}s by specified field
     * @param filterForm {@code VcfFilterForm}, setting filter options
     * @param groupByField a field to perform grouping
     * @return a {@link List} of {@link Pair}s, mapping field value to number of variations, having this value
     * @throws IOException if something goes wrong with the file system
     */
    public List<Group> groupVariations(VcfFilterForm filterForm, String groupByField) throws IOException {
        List<VcfFile> files = vcfFileManager.loadVcfFiles(filterForm.getVcfFileIds());
        return featureIndexDao.groupVariations(files, filterForm.computeQuery(FeatureType.VARIATION), groupByField);
    }

    /**
     * Filter variations in a feature index for a project, specified by ID
     *
     * @param filterForm {@code VcfFilterForm}, setting filter options
     * @return a {@code List} of {@code VcfIndexEntry}, representing variations that satisfy the filter
     * @throws IOException
     */
    public IndexSearchResult<VcfIndexEntry> filterVariations(VcfFilterForm filterForm) throws IOException {
        List<VcfFile> files = vcfFileManager.loadVcfFiles(filterForm.getVcfFileIds());
        if (filterForm.getPage() != null && filterForm.getPageSize() != null) {
            IndexSearchResult<VcfIndexEntry> res = featureIndexDao.searchFileIndexesPaging(files,
                    filterForm.computeQuery(FeatureType.VARIATION), filterForm.getInfoFields(),
                    filterForm.getPage(), filterForm.getPageSize(), filterForm.getOrderBy());
            res.setTotalPagesCount(getTotalPagesCount(filterForm));
            return res;
        } else {
            IndexSearchResult<VcfIndexEntry> res = featureIndexDao.searchFileIndexes(files,
                    filterForm.computeQuery(FeatureType.VARIATION), filterForm.getInfoFields(), null, null);
            res.setExceedsLimit(false);
            return res;
        }
    }

    /**
     * Searches genes by it's ID in project's gene files. Minimum featureId prefix length == 2
     *
     * @param featureId a feature ID prefix to search for
     * @return a {@code List} of {@code FeatureIndexEntry}
     * @throws IOException if something goes wrong with the file system
     */
    public IndexSearchResult searchFeaturesInProject(String featureId, long projectId) throws IOException {
        if (featureId == null || featureId.length() < 2) {
            return new IndexSearchResult(Collections.emptyList(), false, 0);
        }

        IndexSearchResult<FeatureIndexEntry> bookmarkSearchRes = bookmarkManager.searchBookmarks(featureId,
                maxFeatureSearchResultsCount);

        Project project = projectManager.loadProject(projectId);
        Optional<Reference> maybeReference = project.getItems().stream()
                .filter(i -> i.getBioDataItem().getFormat() == BiologicalDataItemFormat.REFERENCE)
                .map(i -> (Reference) i.getBioDataItem()).findFirst();

        if (maybeReference.isPresent()) {
            Reference reference = maybeReference.get();
            List<FeatureFile> annotationFiles = referenceGenomeManager
                    .getReferenceAnnotationFiles(reference.getId()).stream()
                    .map(biologicalDataItem -> (FeatureFile) biologicalDataItem).collect(Collectors.toList());
            GeneFile geneFile = reference.getGeneFile();
            if (geneFile != null) {
                Long geneFileId = geneFile.getId();
                annotationFiles.add(geneFileManager.loadGeneFile(geneFileId));
            }

            IndexSearchResult<FeatureIndexEntry> res = featureIndexDao.searchFeatures(featureId, annotationFiles,
                    maxFeatureSearchResultsCount);
            bookmarkSearchRes.mergeFrom(res);
        }
        return bookmarkSearchRes;
    }

    public IndexSearchResult searchFeaturesByReference(String featureId, long referenceId) throws IOException {
        if (featureId == null || featureId.length() < 2) {
            return new IndexSearchResult<>(Collections.emptyList(), false, 0);
        }

        IndexSearchResult<FeatureIndexEntry> bookmarkSearchRes = bookmarkManager.searchBookmarks(featureId,
                maxFeatureSearchResultsCount);

        Reference reference = referenceGenomeManager.loadReferenceGenome(referenceId);

        List<FeatureFile> annotationFiles = referenceGenomeManager.getReferenceAnnotationFiles(referenceId).stream()
                .map(biologicalDataItem -> (FeatureFile) biologicalDataItem).collect(Collectors.toList());
        GeneFile geneFile = reference.getGeneFile();
        if (geneFile != null) {
            Long geneFileId = geneFile.getId();
            annotationFiles.add(geneFileManager.loadGeneFile(geneFileId));
        }

        IndexSearchResult<FeatureIndexEntry> res = featureIndexDao.searchFeatures(featureId, annotationFiles,
                maxFeatureSearchResultsCount);
        bookmarkSearchRes.mergeFrom(res);
        return bookmarkSearchRes;
    }

    /**
     * Loads {@code VcfFilterInfo} object for a specified project. {@code VcfFilterInfo} contains information about
     * available fields to perform filtering and display results
     *
     * @param projectId a {@code Project}'s ID to load filter info
     * @return a {@code VcfFilterInfo} object
     * @throws IOException
     */
    public VcfFilterInfo loadVcfFilterInfoForProject(long projectId) throws IOException {
        Project project = projectManager.loadProject(projectId);
        List<Long> vcfIds = project.getItems().stream()
                .filter(item -> item.getBioDataItem() != null
                        && item.getBioDataItem().getFormat() == BiologicalDataItemFormat.VCF)
                .map(item -> item.getBioDataItem().getId()).collect(Collectors.toList());

        return vcfManager.getFiltersInfo(vcfIds);
    }

    /**
     * Fetch gene IDs of genes, affected by variation. The variation is specified by it's start and end indexes
     *
     * @param intervalMap represents a batch loaded genes form gene file
     * @param start a start index of the variation
     * @param end an end index of the variation
     * @param chromosome a {@code Chromosome}
     * @return a {@code Set} of IDs of genes, affected by the variation
     */
    private Set<VariationGeneInfo> fetchGeneIdsFromBatch(NggbIntervalTreeMap<List<Gene>> intervalMap, int start,
            int end, Chromosome chromosome) {
        Set<VariationGeneInfo> geneIds = getGeneIds(intervalMap, chromosome, start, start);
        if (end > start) {
            geneIds.addAll(getGeneIds(intervalMap, chromosome, end, end));
        }

        return geneIds;
    }

    /**
     * Post processes fetched VCF index entries to add gene information and split them to resolve ambiguous fields
     * @param entries list of entries to process
     * @param geneFiles list of {@link GeneFile}s to fetch gene information from
     * @param chromosome a {@code Chromosome}, from which entries came
     * @param vcfHeader a header of VCF file
     * @param vcfReader a reader for VCF file
     * @return a list of post-processed index entries, ready to write into index
     * @throws GeneReadingException if an exception was thrown when reading genes information
     */
    public List<VcfIndexEntry> postProcessIndexEntries(List<VcfIndexEntry> entries, List<GeneFile> geneFiles,
            Chromosome chromosome, VCFHeader vcfHeader, VcfFileReader vcfReader) throws GeneReadingException {
        List<VcfIndexEntry> processedEntries = new ArrayList<>();
        int start = chromosome.getSize();
        int end = 0;
        for (FeatureIndexEntry entry : entries) {
            start = Math.min(start, entry.getStartIndex());
            end = Math.max(end, entry.getEndIndex());
        }

        Map<GeneFile, NggbIntervalTreeMap<List<Gene>>> intervalMapCache = new HashMap<>();

        for (VcfIndexEntry indexEntry : entries) {
            String geneIdsString = null;
            String geneNamesString = null;
            Set<VariationGeneInfo> geneIds = Collections.emptySet();

            for (GeneFile geneFile : geneFiles) {
                if (!intervalMapCache.containsKey(geneFile)) {
                    intervalMapCache.put(geneFile, loadGenesIntervalMap(geneFile, start, end, chromosome));
                }
                NggbIntervalTreeMap<List<Gene>> intervalMap = intervalMapCache.get(geneFile);

                geneIds = fetchGeneIdsFromBatch(intervalMap, indexEntry.getStartIndex(), indexEntry.getEndIndex(),
                        chromosome);
                geneIdsString = geneIds.stream().map(i -> i.geneId).collect(Collectors.joining(", "));
                geneNamesString = geneIds.stream().map(i -> i.geneName).collect(Collectors.joining(", "));
                indexEntry.setExon(geneIds.stream().anyMatch(i -> i.isExon));
            }

            Set<VariationType> types = new HashSet<>();
            for (int i = 0; i < indexEntry.getVariantContext().getAlternateAlleles().size(); i++) {
                Variation variation = vcfReader.createVariation(indexEntry.getVariantContext(), vcfHeader, i);
                types.add(variation.getType());
            }

            List<String> ambiguousInfoFields = vcfHeader.getInfoHeaderLines().stream().filter(
                    l -> l.getCount(indexEntry.getVariantContext()) > 1 && !isVariableLength(l.getCountType()))
                    .map(VCFCompoundHeaderLine::getID).collect(Collectors.toList());

            List<VcfIndexEntry> simplifiedEntries = simplifyVcfIndexEntries(indexEntry,
                    indexEntry.getVariantContext(), geneIds, types, geneIdsString, geneNamesString);

            processedEntries.addAll(splitAmbiguousInfoFields(simplifiedEntries, ambiguousInfoFields));
        }

        return processedEntries;
    }

    public void buildIndexForFile(FeatureFile featureFile) throws FeatureIndexException, IOException {
        if (!fileManager.indexForFeatureFileExists(featureFile)) {
            switch (featureFile.getFormat()) {
            case BED:
                bedManager.reindexBedFile(featureFile.getId());
                break;
            case GENE:
                // use false here, because it's default parameter for reindexing in GeneController
                gffManager.reindexGeneFile(featureFile.getId(), false);
                break;
            case VCF:
                vcfManager.reindexVcfFile(featureFile.getId());
                break;
            default:
                throw new IllegalArgumentException("Wrong FeatureType: " + featureFile.getFormat().name());
            }
        }
    }

    private NggbIntervalTreeMap<List<Gene>> loadGenesIntervalMap(GeneFile geneFile, int start, int end,
            Chromosome chromosome) {
        final NggbIntervalTreeMap<List<Gene>> genesRangeMap = new NggbIntervalTreeMap<>();
        try {
            IndexSearchResult<FeatureIndexEntry> searchResult = featureIndexDao
                    .searchFeaturesInInterval(Collections.singletonList(geneFile), start, end, chromosome);
            searchResult.getEntries().stream()
                    .filter(f -> f.getFeatureType() == FeatureType.EXON || f.getFeatureType() == FeatureType.GENE)
                    .map(f -> {
                        Gene gene = new Gene();
                        gene.setFeature(f.getFeatureType().name());
                        gene.setStartIndex(f.getStartIndex());
                        gene.setEndIndex(f.getEndIndex());
                        gene.setGroupId(f.getFeatureId());
                        gene.setFeatureName(f.getFeatureName().toUpperCase());
                        return gene;
                    }).forEach(g -> {
                        Interval interval = new Interval(chromosome.getName(), g.getStartIndex(), g.getEndIndex());
                        genesRangeMap.putIfAbsent(interval, new ArrayList<>());
                        genesRangeMap.get(interval).add(g);
                    });
        } catch (IOException e) {
            LOGGER.error(e.getMessage(), e);
        }
        genesRangeMap.setMaxEndIndex(start);
        genesRangeMap.setMinStartIndex(end);
        return genesRangeMap;
    }

    private Set<VariationGeneInfo> getGeneIds(NggbIntervalTreeMap<List<Gene>> intervalMap, Chromosome chromosome,
            int start, int end) {
        Collection<Gene> genes = intervalMap.getOverlapping(new Interval(chromosome.getName(), start, end)).stream()
                .flatMap(l -> l.stream()).collect(Collectors.toList());
        Set<VariationGeneInfo> geneIds = new HashSet<>();
        if (genes != null) {
            boolean isExon = genes.stream().anyMatch(GeneUtils::isExon);
            geneIds = genes.stream().filter(GeneUtils::isGene)
                    .map(g -> new VariationGeneInfo(g.getGroupId(), g.getFeatureName(), isExon))
                    .collect(Collectors.toSet());
        }

        return geneIds;
    }

    /**
     * Fetch gene IDs of genes, affected by variation. The variation is specified by it's start and end indexes
     *
     * @param start a start index of the variation
     * @param end an end index of the variation
     * @param geneFiles a {@code List} of {@code GeneFile} to look for genes
     * @param chromosome a {@code Chromosome}
     * @return a {@code Set} of IDs of genes, affected by the variation
     * @throws GeneReadingException
     */
    public Set<String> fetchGeneIds(int start, int end, List<GeneFile> geneFiles, Chromosome chromosome)
            throws GeneReadingException {
        Set<String> geneIds = new HashSet<>();

        for (GeneFile geneFile : geneFiles) {
            List<Gene> genes = new ArrayList<>();
            Track<Gene> track = new Track<>();
            track.setStartIndex(start);
            track.setEndIndex(end);
            track.setId(geneFile.getId());
            track.setChromosome(chromosome);
            track.setScaleFactor(AbstractGeneReader.LARGE_SCALE_FACTOR_LIMIT);

            if (end > start) {
                track.setStartIndex(start);
                track.setEndIndex(start);
                track = gffManager.loadGenes(track, geneFile, chromosome, false);
                genes.addAll(track.getBlocks());

                track.setStartIndex(end);
                track.setEndIndex(end);
                track = gffManager.loadGenes(track, geneFile, chromosome, false);
                genes.addAll(track.getBlocks());

            } else {
                track.setStartIndex(start);
                track.setEndIndex(end);
                track = gffManager.loadGenes(track, geneFile, chromosome, false);
                genes = track.getBlocks();
            }

            geneIds.addAll(
                    genes.stream().filter(GeneUtils::isGene).map(Gene::getGroupId).collect(Collectors.toList()));
        }

        return geneIds;
    }

    /**
     * Writes index entries for specified {@link FeatureFile} to file system
     * @param featureFile a {@link FeatureFile}, for which index is written
     * @param entries a list of index entries
     * @throws IOException if error occurred while writing to file system
     */
    public void writeLuceneIndexForFile(final FeatureFile featureFile,
            final List<? extends FeatureIndexEntry> entries) throws IOException {
        featureIndexDao.writeLuceneIndexForFile(featureFile, entries);
    }

    public void makeIndexForBedReader(BedFile bedFile, AbstractFeatureReader<BEDFeature, LineIterator> reader,
            Map<String, Chromosome> chromosomeMap) throws IOException {
        CloseableIterator<BEDFeature> iterator = reader.iterator();
        List<FeatureIndexEntry> allEntries = new ArrayList<>();
        while (iterator.hasNext()) {
            BEDFeature next = iterator.next();
            FeatureIndexEntry entry = new FeatureIndexEntry();
            entry.setFeatureFileId(bedFile.getId());
            entry.setChromosome(Utils.getFromChromosomeMap(chromosomeMap, next.getContig()));
            entry.setStartIndex(next.getStart());
            entry.setEndIndex(next.getEnd());
            entry.setFeatureId(next.getName());
            entry.setFeatureName(next.getName());
            entry.setUuid(UUID.randomUUID());
            entry.setFeatureType(FeatureType.BED_FEATURE);
            allEntries.add(entry);
        }
        featureIndexDao.writeLuceneIndexForFile(bedFile, allEntries);
    }

    /**
     * Creates a VCF file feature index
     * @param vcfFile a VCF file to create index
     * @param reader a reader to file
     * @param geneFiles a {@code List} of {@code GeneFile} to look for genes
     * @param chromosomeMap a Map of {@link Chromosome}s to chromosome names
     * @param info VCF file's info data
     * @throws FeatureIndexException if an error occured while building an index
     */
    public void makeIndexForVcfReader(VcfFile vcfFile, FeatureReader<VariantContext> reader,
            List<GeneFile> geneFiles, Map<String, Chromosome> chromosomeMap, VcfFilterInfo info)
            throws FeatureIndexException {

        VCFHeader vcfHeader = (VCFHeader) reader.getHeader();

        try {
            CloseableIterator<VariantContext> iterator = reader.iterator();
            String currentKey = null;
            VariantContext variantContext = null;
            VcfFileReader vcfFileReader = new VcfFileReader(fileManager, referenceGenomeManager);
            List<VcfIndexEntry> allEntries = new ArrayList<>();

            while (iterator.hasNext()) {
                variantContext = iterator.next();

                if (!variantContext.getContig().equals(currentKey)) {
                    putVariationsInIndex(allEntries, currentKey, vcfFile, geneFiles, chromosomeMap, vcfFileReader,
                            vcfHeader);

                    currentKey = variantContext.getContig();
                }

                addVariationToIndex(allEntries, variantContext, chromosomeMap, info, vcfHeader, vcfFileReader);
            }

            // Put the last one
            if (variantContext != null && currentKey != null
                    && Utils.chromosomeMapContains(chromosomeMap, currentKey)) {
                List<VcfIndexEntry> processedEntries = postProcessIndexEntries(allEntries, geneFiles,
                        Utils.getFromChromosomeMap(chromosomeMap, currentKey), vcfHeader, vcfFileReader);
                featureIndexDao.writeLuceneIndexForFile(vcfFile, processedEntries);
                LOGGER.info(MessageHelper.getMessage(MessagesConstants.INFO_FEATURE_INDEX_CHROMOSOME_WROTE,
                        currentKey));
                allEntries.clear();
            }
        } catch (IOException | GeneReadingException e) {
            throw new FeatureIndexException(vcfFile, e);
        }
    }

    private void putVariationsInIndex(List<VcfIndexEntry> allEntries, String currentKey, VcfFile vcfFile,
            List<GeneFile> geneFiles, Map<String, Chromosome> chromosomeMap, VcfFileReader vcfFileReader,
            VCFHeader vcfHeader) throws GeneReadingException, IOException {
        if (currentKey != null && Utils.chromosomeMapContains(chromosomeMap, currentKey)) {
            List<VcfIndexEntry> processedEntries = postProcessIndexEntries(allEntries, geneFiles,
                    Utils.getFromChromosomeMap(chromosomeMap, currentKey), vcfHeader, vcfFileReader);
            featureIndexDao.writeLuceneIndexForFile(vcfFile, processedEntries);
            LOGGER.info(
                    MessageHelper.getMessage(MessagesConstants.INFO_FEATURE_INDEX_CHROMOSOME_WROTE, currentKey));
            allEntries.clear();
        }
    }

    /**
     * Creates Gene file's feature index
     * @param geneFile a {@link GeneFile} to create index
     * @param chromosomeMap a Map of {@link Chromosome}s to chromosome names
     * @param full
     * @throws IOException if an error occurred while writing index
     */
    public void processGeneFile(GeneFile geneFile, final Map<String, Chromosome> chromosomeMap, boolean full)
            throws IOException {
        List<FeatureIndexEntry> allEntries = new ArrayList<>();

        LOGGER.info("Writing feature index for file {}:{}", geneFile.getId(), geneFile.getName());

        if (!full && fileManager.checkGeneFileExists(geneFile, GeneFileType.TRANSCRIPT)) {
            try (AbstractFeatureReader<GeneFeature, LineIterator> largeScaleReader = fileManager
                    .makeGeneReader(geneFile, GeneFileType.LARGE_SCALE);
                    AbstractFeatureReader<GeneFeature, LineIterator> transcriptReader = fileManager
                            .makeGeneReader(geneFile, GeneFileType.TRANSCRIPT)) {
                CloseableIterator<GeneFeature> largeScaleIterator = largeScaleReader.iterator();
                CloseableIterator<GeneFeature> transcriptIterator = transcriptReader.iterator();

                addFeaturesFromIteratorToIndex(largeScaleIterator, chromosomeMap, geneFile, allEntries, false);
                addFeaturesFromIteratorToIndex(transcriptIterator, chromosomeMap, geneFile, allEntries, true);
            }
        } else {
            addFeaturesFromUsualGeneFileToIndex(geneFile, chromosomeMap, allEntries);
        }
    }

    private void addFeaturesFromUsualGeneFileToIndex(GeneFile geneFile, Map<String, Chromosome> chromosomeMap,
            List<FeatureIndexEntry> allEntries) throws IOException {
        try (AbstractFeatureReader<GeneFeature, LineIterator> usualReader = fileManager.makeGeneReader(geneFile,
                GeneFileType.ORIGINAL)) {
            CloseableIterator<GeneFeature> iterator = usualReader.iterator();
            GeneFeature feature = null;
            String currentKey = null;

            while (iterator.hasNext()) {
                feature = iterator.next();
                currentKey = checkNextChromosome(feature, currentKey, chromosomeMap, allEntries, geneFile);

                if (GeneUtils.isGene(feature) || GeneUtils.isTranscript(feature) || GeneUtils.isExon(feature)) {
                    addGeneFeatureToIndex(allEntries, feature, chromosomeMap);
                }
            }

            // Put the last one
            if (feature != null && currentKey != null && (chromosomeMap.containsKey(currentKey)
                    || chromosomeMap.containsKey(Utils.changeChromosomeName(currentKey)))) {
                featureIndexDao.writeLuceneIndexForFile(geneFile, allEntries);
                allEntries.clear();
            }
        }
    }

    private void addFeaturesFromIteratorToIndex(CloseableIterator<GeneFeature> iterator,
            Map<String, Chromosome> chromosomeMap, GeneFile geneFile, List<FeatureIndexEntry> allEntries,
            boolean transcriptIterator) throws IOException {
        GeneFeature feature = null;
        String currentKey = null;

        while (iterator.hasNext()) {
            feature = iterator.next();
            currentKey = checkNextChromosome(feature, currentKey, chromosomeMap, allEntries, geneFile);

            if (transcriptIterator || GeneUtils.isGene(feature) || GeneUtils.isExon(feature)) {
                addGeneFeatureToIndex(allEntries, feature, chromosomeMap);
            }
        }

        // Put the last one
        if (feature != null && currentKey != null && (chromosomeMap.containsKey(currentKey)
                || chromosomeMap.containsKey(Utils.changeChromosomeName(currentKey)))) {
            featureIndexDao.writeLuceneIndexForFile(geneFile, allEntries);
            allEntries.clear();
        }
    }

    private String checkNextChromosome(Feature feature, String currentChromosomeName,
            Map<String, Chromosome> chromosomeMap, List<FeatureIndexEntry> allEntries, GeneFile geneFile)
            throws IOException {
        if (!feature.getContig().equals(currentChromosomeName)) {
            if (currentChromosomeName != null && (chromosomeMap.containsKey(currentChromosomeName)
                    || chromosomeMap.containsKey(Utils.changeChromosomeName(currentChromosomeName)))) {

                featureIndexDao.writeLuceneIndexForFile(geneFile, allEntries);
                LOGGER.info(MessageHelper.getMessage(MessagesConstants.INFO_FEATURE_INDEX_CHROMOSOME_WROTE,
                        currentChromosomeName));
                allEntries.clear();
            }

            return feature.getContig();
        }

        return currentChromosomeName;
    }

    public void addVariationToIndex(List<VcfIndexEntry> allEntries, VariantContext context,
            Map<String, Chromosome> chromosomeMap, VcfFilterInfo filterInfo, VCFHeader vcfHeader,
            VcfFileReader vcfReader) {
        if (chromosomeMap.containsKey(context.getContig())
                || chromosomeMap.containsKey(Utils.changeChromosomeName(context.getContig()))) {
            VcfIndexEntry masterEntry = new VcfIndexEntry();
            masterEntry.setUuid(UUID.randomUUID());
            masterEntry.setFeatureId(context.getID());
            masterEntry.setChromosome(Utils.getFromChromosomeMap(chromosomeMap, context.getContig()));
            masterEntry.setStartIndex(context.getStart());
            masterEntry.setEndIndex(context.getEnd());
            masterEntry.setFeatureType(FeatureType.VARIATION);
            masterEntry.setInfo(filterInfoByWhiteList(context, filterInfo, vcfHeader));
            masterEntry.setVariantContext(context);

            double qual = context.getPhredScaledQual();
            masterEntry.setQuality(MathUtils.equals(qual, VcfManager.HTSJDK_WRONG_QUALITY) ? 0D : qual);

            List<OrganismType> organismTypes = new ArrayList<>();
            for (int i = 0; i < context.getAlternateAlleles().size(); i++) {
                Variation variation = vcfReader.createVariation(context, vcfHeader, i);
                organismTypes.add(variation.getGenotypeData().getOrganismType());
            }

            if (!organismTypes.isEmpty()
                    && organismTypes.stream().anyMatch(type -> type.equals(OrganismType.NO_VARIATION))) {
                return;
            }

            allEntries.add(masterEntry);
        }
    }

    private List<VcfIndexEntry> simplifyVcfIndexEntries(VcfIndexEntry masterEntry, VariantContext context,
            Set<VariationGeneInfo> geneIds, Set<VariationType> types, String geneIdsString,
            String geneNamesString) {
        List<VcfIndexEntry> simplifiedEntries = new ArrayList<>();
        for (VariationType type : types) {
            if (geneIds.isEmpty()) {
                if (context.getFilters().isEmpty()) {
                    VcfIndexEntry entry = new VcfIndexEntry(masterEntry);
                    entry.setVariationType(type);

                    simplifiedEntries.add(entry);
                } else {
                    simplifyFilters(masterEntry, context, simplifiedEntries, type);
                }
            } else {
                simplifyGeneIds(masterEntry, context, geneIds, geneIdsString, geneNamesString, simplifiedEntries,
                        type);
            }
        }

        return simplifiedEntries;
    }

    private void simplifyFilters(VcfIndexEntry masterEntry, VariantContext context,
            List<VcfIndexEntry> simplifiedEntries, VariationType type) {
        for (String filter : context.getFilters()) {
            VcfIndexEntry entry = new VcfIndexEntry(masterEntry);
            entry.setVariationType(type);
            entry.setFailedFilter(filter);

            simplifiedEntries.add(entry);
        }
    }

    private void simplifyGeneIds(VcfIndexEntry masterEntry, VariantContext context, Set<VariationGeneInfo> geneIds,
            String geneIdsString, String geneNamesString, List<VcfIndexEntry> simplifiedEntries,
            VariationType type) {
        for (VariationGeneInfo geneInfo : geneIds) {
            if (context.getFilters().isEmpty()) {
                VcfIndexEntry entry = new VcfIndexEntry(masterEntry);
                entry.setVariationType(type);
                entry.setGene(geneInfo.geneId);
                entry.setGeneName(geneInfo.geneName);
                entry.setGeneIds(geneIdsString);
                entry.setGeneNames(geneNamesString);

                simplifiedEntries.add(entry);
            } else {
                for (String filter : context.getFilters()) {
                    VcfIndexEntry entry = new VcfIndexEntry(masterEntry);
                    entry.setVariationType(type);
                    entry.setGene(geneInfo.geneId);
                    entry.setGeneName(geneInfo.geneName);
                    entry.setGeneIds(geneIdsString);
                    entry.setGeneNames(geneNamesString);
                    entry.setFailedFilter(filter);

                    simplifiedEntries.add(entry);
                }
            }
        }
    }

    public void addGeneFeatureToIndex(List<FeatureIndexEntry> allEntries, GeneFeature feature,
            Map<String, Chromosome> chromosomeMap) {
        if (chromosomeMap.containsKey(feature.getContig())
                || chromosomeMap.containsKey(Utils.changeChromosomeName(feature.getContig()))) {
            FeatureIndexEntry masterEntry = new FeatureIndexEntry();
            masterEntry.setFeatureId(feature.getFeatureId());
            masterEntry.setUuid(UUID.randomUUID());
            masterEntry.setChromosome(
                    chromosomeMap.containsKey(feature.getContig()) ? chromosomeMap.get(feature.getContig())
                            : chromosomeMap.get(Utils.changeChromosomeName(feature.getContig())));
            masterEntry.setStartIndex(feature.getStart());
            masterEntry.setEndIndex(feature.getEnd());
            if (GeneUtils.isGene(feature)) {
                masterEntry.setFeatureType(FeatureType.GENE);
            }
            if (GeneUtils.isTranscript(feature)) {
                masterEntry.setFeatureType(FeatureType.MRNA);
            }
            if (GeneUtils.isExon(feature)) {
                masterEntry.setFeatureType(FeatureType.EXON);
            }

            allEntries.add(masterEntry);

            String featureName = feature.getFeatureName();
            masterEntry.setFeatureName(featureName);
        }
    }

    private Map<String, Object> filterInfoByWhiteList(VariantContext context, VcfFilterInfo vcfFilterInfo,
            VCFHeader vcfHeader) {
        Map<String, Object> permittedInfo = new HashMap<>();
        Map<String, Object> info = context.getAttributes();

        vcfFilterInfo.getInfoItems().forEach(key -> {
            if (info.containsKey(key.getName()) && vcfHeader.getInfoHeaderLine(key.getName()) != null) {
                int count = vcfHeader.getInfoHeaderLine(key.getName()).getCount(context);
                VCFHeaderLineCount countType = vcfHeader.getInfoHeaderLine(key.getName()).getCountType();
                switch (key.getType()) {
                case Integer:
                    addNumberInfo(permittedInfo, info, count, countType, VCFHeaderLineType.Integer, key);
                    break;
                case Float:
                    addNumberInfo(permittedInfo, info, count, countType, VCFHeaderLineType.Float, key);
                    break;
                case Flag:
                    permittedInfo.put(key.getName(), parseFlagInfo(permittedInfo, info, count, key));
                    break;
                default:
                    permittedInfo.put(key.getName(), info.get(key.getName()));
                }
            }
        });

        return permittedInfo;
    }

    private void addNumberInfo(Map<String, Object> permittedInfo, Map<String, Object> info, int count,
            VCFHeaderLineCount countType, VCFHeaderLineType type, InfoItem key) {
        Object value;
        if (isVariableLength(countType)) {
            value = info.get(key.getName()).toString();
        } else if (count > 1) {
            value = parseNumberArray(type, info.get(key.getName()));

            if (value == null) {
                LOGGER.error(
                        MessageHelper.getMessage(MessagesConstants.ERROR_FEATURE_INDEX_WRITING_WRONG_PARAMETER_TYPE,
                                key.getName(), key.getType(), info.get(key.getName()).toString()));
                return;
            }

            permittedInfo.put("_" + key.getName() + "_v", info.get(key.getName()).toString());
        } else {
            String numberString = info.get(key.getName()).toString();

            if (NumberUtils.isNumber(numberString)) {
                value = parseNumber(type, info.get(key.getName()));
            } else {
                LOGGER.error(
                        MessageHelper.getMessage(MessagesConstants.ERROR_FEATURE_INDEX_WRITING_WRONG_PARAMETER_TYPE,
                                key.getName(), key.getType(), numberString));
                return;
            }
        }

        permittedInfo.put(key.getName(), value);
    }

    private Object parseNumberArray(VCFHeaderLineType type, Object infoObject) {
        switch (type) {
        case Integer:
            return Utils.parseIntArray(infoObject.toString());
        case Float:
            return Utils.parseFloatArray(infoObject.toString());
        default:
            throw new IllegalArgumentException(
                    MessageHelper.getMessage(MessagesConstants.ERROR_FEATURE_INDEX_INVALID_NUMBER_FORMAT, type));
        }
    }

    private Object parseNumber(VCFHeaderLineType type, Object infoObject) {
        switch (type) {
        case Integer:
            return Integer.parseInt(infoObject.toString());
        case Float:
            return Float.parseFloat(infoObject.toString());
        default:
            throw new IllegalArgumentException(
                    MessageHelper.getMessage(MessagesConstants.ERROR_FEATURE_INDEX_INVALID_NUMBER_FORMAT, type));
        }
    }

    private Object parseFlagInfo(Map<String, Object> permittedInfo, Map<String, Object> info, int count,
            InfoItem key) {
        if (count > 1) {
            permittedInfo.put("_" + key.getName() + "_v", info.get(key.getName()).toString());
            return Utils.parseBooleanArray(info.get(key.getName()).toString());
        } else {
            return Boolean.parseBoolean(info.get(key.getName()).toString());
        }
    }

    private boolean isVariableLength(VCFHeaderLineCount countType) {
        return countType != VCFHeaderLineCount.INTEGER;
    }

    private List<VcfIndexEntry> splitAmbiguousInfoFields(List<VcfIndexEntry> entries,
            List<String> ambigousInfoFields) {
        ArrayList<VcfIndexEntry> queue = new ArrayList<>(entries);
        List<VcfIndexEntry> simplifiedEntries = new ArrayList<>();
        for (String key : ambigousInfoFields) {
            ArrayList<VcfIndexEntry> nextIteration = new ArrayList<>();
            for (FeatureIndexEntry e : queue) {
                VcfIndexEntry vcfIndexEntry = (VcfIndexEntry) e;
                boolean found = false;

                if (vcfIndexEntry.getInfo().containsKey(key)
                        && vcfIndexEntry.getInfo().get(key) instanceof Object[]) {
                    found = true;
                    Object[] arr = (Object[]) vcfIndexEntry.getInfo().get(key);
                    makeCopies(vcfIndexEntry, arr, nextIteration, key);
                }

                if (!found) {
                    simplifiedEntries.add(vcfIndexEntry);
                }
            }

            queue = nextIteration;
        }

        queue.addAll(simplifiedEntries);
        return queue;
    }

    private void makeCopies(VcfIndexEntry vcfIndexEntry, Object[] infoArray, ArrayList<VcfIndexEntry> nextIteration,
            String key) {
        for (Object element : infoArray) {
            VcfIndexEntry copy = new VcfIndexEntry(vcfIndexEntry);
            copy.getInfo().put(key, element);
            nextIteration.add(copy);
        }
    }

    private static class VariationGeneInfo {
        private String geneId;
        private String geneName;
        private boolean isExon;

        VariationGeneInfo(String geneId, String geneName, boolean isExon) {
            this.geneId = geneId;
            this.geneName = geneName;
            this.isExon = isExon;
        }

        @Override
        public int hashCode() {
            return geneId.hashCode();
        }

        @Override
        public boolean equals(Object obj) {
            return obj != null && obj.getClass() == this.getClass()
                    && Objects.equals(((VariationGeneInfo) obj).geneId, geneId);
        }
    }
}