Java tutorial
/* * MIT License * * Copyright (c) 2016 EPAM Systems * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package com.epam.catgenome.manager; import java.io.IOException; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.UUID; import java.util.concurrent.ThreadLocalRandom; import java.util.stream.Collectors; import com.epam.catgenome.entity.bed.BedFile; import com.epam.catgenome.manager.bed.BedManager; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.apache.lucene.queryparser.classic.ParseException; import org.junit.Assert; import org.junit.Before; import org.junit.Ignore; import org.junit.Test; import org.junit.runner.RunWith; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.ApplicationContext; import org.springframework.core.io.Resource; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; import org.springframework.transaction.annotation.Propagation; import org.springframework.transaction.annotation.Transactional; import com.epam.catgenome.common.AbstractManagerTest; import com.epam.catgenome.controller.vo.registration.FeatureIndexedFileRegistrationRequest; import com.epam.catgenome.dao.index.FeatureIndexDao; import com.epam.catgenome.dao.index.field.IndexSortField; import com.epam.catgenome.entity.BiologicalDataItem; import com.epam.catgenome.entity.gene.GeneFile; import com.epam.catgenome.entity.index.BookmarkIndexEntry; import com.epam.catgenome.entity.index.FeatureIndexEntry; import com.epam.catgenome.entity.index.FeatureType; import com.epam.catgenome.entity.index.Group; import com.epam.catgenome.entity.index.IndexSearchResult; import com.epam.catgenome.entity.index.VcfIndexEntry; import com.epam.catgenome.entity.project.Project; import com.epam.catgenome.entity.project.ProjectItem; import com.epam.catgenome.entity.reference.Bookmark; import com.epam.catgenome.entity.reference.Chromosome; import com.epam.catgenome.entity.reference.Reference; import com.epam.catgenome.entity.vcf.InfoItem; import com.epam.catgenome.entity.vcf.Variation; import com.epam.catgenome.entity.vcf.VariationQuery; import com.epam.catgenome.entity.vcf.VariationType; import com.epam.catgenome.entity.vcf.VcfFile; import com.epam.catgenome.entity.vcf.VcfFilterForm; import com.epam.catgenome.entity.vcf.VcfFilterInfo; import com.epam.catgenome.exception.FeatureIndexException; import com.epam.catgenome.exception.VcfReadingException; import com.epam.catgenome.helper.EntityHelper; import com.epam.catgenome.manager.gene.GffManager; import com.epam.catgenome.manager.project.ProjectManager; import com.epam.catgenome.manager.reference.BookmarkManager; import com.epam.catgenome.manager.reference.ReferenceGenomeManager; import com.epam.catgenome.manager.vcf.VcfManager; import com.epam.catgenome.util.TestUtils; import com.epam.catgenome.util.Utils; /** * Source: FeatureIndexManagerTest * Created: 29.04.16, 17:11 * Project: CATGenome Browser * Make: IntelliJ IDEA 14.1.4, JDK 1.8 * * @author Mikhail Miroliubov */ @RunWith(SpringJUnit4ClassRunner.class) @ContextConfiguration({ "classpath:applicationContext-test.xml" }) public class FeatureIndexManagerTest extends AbstractManagerTest { private static final String CLASSPATH_TEMPLATES_FELIS_CATUS_VCF = "classpath:templates/Felis_catus.vcf"; private static final String CLASSPATH_TEMPLATES_GENES_SORTED = "classpath:templates/genes_sorted.gtf"; private static final String CLASSPATH_TEMPLATES_BED = "classpath:templates/example.bed"; private static final int SVLEN_VALUE = -150; //public static final float QUAL_VALUE = -10.0F; private static final int CONST_42 = 42; private static final int TEST_WICKED_VCF_LENGTH = 248617560; private static final int PERFORMANCE_TEST_WARMING_COUNT = 20; private static final int PERFORMANCE_TEST_ATTEMPTS_COUNT = 20; private static final int PERFORMANCE_TEST_PAGE_SIZE = 20; private static final int INTERVAL1_START = 400_000; private static final int INTERVAL1_END = 500_000; private static final int INTERVAL2_START = 550_000; private static final int INTERVAL2_END = 650_000; private static final int INTERVAL3_START = 35470; private static final int INTERVAL3_END = 35490; private static final int BED_FEATURE_START = 127471197; private static final int BED_FEATURE_END = 127472363; private Logger logger = LoggerFactory.getLogger(FeatureIndexManagerTest.class); @Autowired private VcfManager vcfManager; @Autowired private ReferenceGenomeManager referenceGenomeManager; @Autowired private GffManager gffManager; @Autowired private BedManager bedManager; @Autowired private ProjectManager projectManager; @Autowired private FeatureIndexManager featureIndexManager; @Autowired private FeatureIndexDao featureIndexDao; @Autowired private BookmarkManager bookmarkManager; @Autowired private FileManager fileManager; @Autowired private ApplicationContext context; private static final int TEST_CHROMOSOME_SIZE = 239107476; private static final String TEST_PROJECT_NAME = "testProject1"; private static final String TEST_GENE_PREFIX = "ENS"; private static final String TEST_GENE_NAME = "pglyrp4"; private static final String TEST_GENE_AND_FILE_ID_QUERY = "geneId:ENS* AND fileId:%d"; private static final String SVTYPE_FIELD = "SVTYPE"; private static final String SVLEN_FIELD = "SVLEN"; private long referenceId; private Reference testReference; private Chromosome testChromosome; private VcfFile testVcf; private GeneFile testGeneFile; private BedFile testBedFile; private Project testProject; @Before public void setup() throws Exception { testChromosome = EntityHelper.createNewChromosome(); testChromosome.setSize(TEST_CHROMOSOME_SIZE); testReference = EntityHelper.createNewReference(testChromosome, referenceGenomeManager.createReferenceId()); referenceGenomeManager.register(testReference); referenceId = testReference.getId(); Resource resource = context.getResource(CLASSPATH_TEMPLATES_GENES_SORTED); FeatureIndexedFileRegistrationRequest request = new FeatureIndexedFileRegistrationRequest(); request.setReferenceId(referenceId); request.setPath(resource.getFile().getAbsolutePath()); testGeneFile = gffManager.registerGeneFile(request); referenceGenomeManager.updateReferenceGeneFileId(testReference.getId(), testGeneFile.getId()); Resource bedResource = context.getResource(CLASSPATH_TEMPLATES_BED); FeatureIndexedFileRegistrationRequest bedFileRequest = new FeatureIndexedFileRegistrationRequest(); bedFileRequest.setReferenceId(referenceId); bedFileRequest.setPath(bedResource.getFile().getAbsolutePath()); testBedFile = bedManager.registerBed(bedFileRequest); // TODO Ask about indexing bed with registration bedManager.reindexBedFile(testBedFile.getId()); resource = context.getResource(CLASSPATH_TEMPLATES_FELIS_CATUS_VCF); request = new FeatureIndexedFileRegistrationRequest(); request.setReferenceId(referenceId); request.setPath(resource.getFile().getAbsolutePath()); testVcf = vcfManager.registerVcfFile(request); testProject = new Project(); testProject.setName(TEST_PROJECT_NAME); testProject.setItems(Arrays.asList(new ProjectItem(new BiologicalDataItem(testVcf.getBioDataItemId())), new ProjectItem(new BiologicalDataItem(testReference.getBioDataItemId())))); projectManager.saveProject(testProject); // Index is created when vcf file is added } @Test @Transactional(propagation = Propagation.REQUIRES_NEW) public void testCreateFeatureIndex() throws Exception { List<FeatureIndexEntry> entryList = (List<FeatureIndexEntry>) featureIndexDao .searchFileIndexes(Collections.singletonList(testVcf), String.format(TEST_GENE_AND_FILE_ID_QUERY, testVcf.getId()), null) .getEntries(); Assert.assertFalse(entryList.isEmpty()); VcfFilterForm vcfFilterForm = new VcfFilterForm(); vcfFilterForm.setVcfFileIds(Collections.singletonList(testVcf.getId())); vcfFilterForm .setGenes(new VcfFilterForm.FilterSection<>(Collections.singletonList(TEST_GENE_PREFIX), false)); vcfFilterForm.setVariationTypes( new VcfFilterForm.FilterSection<>(Arrays.asList(VariationType.MNP, VariationType.SNV), false)); //vcfFilterForm.setQuality(Collections.singletonList(QUAL_VALUE)); IndexSearchResult<VcfIndexEntry> entryList2 = featureIndexManager.filterVariations(vcfFilterForm, testProject.getId()); Assert.assertFalse(entryList2.getEntries().isEmpty()); Assert.assertTrue(entryList2.getEntries().stream().anyMatch(e -> e.getInfo() != null && (Boolean) e.getInfo().get(FeatureIndexDao.FeatureIndexFields.IS_EXON.getFieldName()))); vcfFilterForm.setChromosomeIds(Collections.singletonList(testChromosome.getId())); entryList2 = featureIndexManager.filterVariations(vcfFilterForm, testProject.getId()); Assert.assertFalse(entryList2.getEntries().isEmpty()); double time1 = Utils.getSystemTimeMilliseconds(); List<Long> chromosomeIds = featureIndexDao.getChromosomeIdsWhereVariationsPresentFacet( Collections.singletonList(testVcf), "geneId:ENS* AND fileId:" + testVcf.getId() + " AND variationType:snv"); double time2 = Utils.getSystemTimeMilliseconds(); logger.info("Get chromosomes by facets time: {} ms", time2 - time1); Assert.assertFalse(chromosomeIds.isEmpty()); List<Chromosome> chromosomes = featureIndexManager.filterChromosomes(vcfFilterForm, testProject.getId()); Assert.assertFalse(chromosomes.isEmpty()); // filter by additional fields Map<String, Object> additionalFilters = new HashMap<>(); additionalFilters.put(SVTYPE_FIELD, "DEL"); //additionalFilters.put("SVLEN", SVLEN_VALUE); additionalFilters.put(SVLEN_FIELD, String.valueOf(SVLEN_VALUE)); vcfFilterForm.setAdditionalFilters(additionalFilters); vcfFilterForm.setGenes(null); vcfFilterForm.setVariationTypes(null); vcfFilterForm.setInfoFields(Arrays.asList(SVTYPE_FIELD, SVLEN_FIELD)); entryList2 = featureIndexManager.filterVariations(vcfFilterForm, testProject.getId()); Assert.assertFalse(entryList2.getEntries().isEmpty()); Assert.assertFalse(entryList2.getEntries().stream().anyMatch(e -> e.getInfo().isEmpty())); Set<String> genes = featureIndexManager.searchGenesInVcfFilesInProject(testProject.getId(), TEST_GENE_PREFIX, Collections.singletonList(testVcf.getId())); Assert.assertFalse(genes.isEmpty()); // search by gene name pglyrp4 vcfFilterForm = new VcfFilterForm(); vcfFilterForm.setGenes(new VcfFilterForm.FilterSection<>(Collections.singletonList(TEST_GENE_NAME))); IndexSearchResult<VcfIndexEntry> entries = featureIndexManager.filterVariations(vcfFilterForm, testProject.getId()); Assert.assertFalse(entries.getEntries().isEmpty()); genes = featureIndexManager.searchGenesInVcfFilesInProject(testProject.getId(), TEST_GENE_NAME, Collections.singletonList(testVcf.getId())); Assert.assertFalse(genes.isEmpty()); vcfFilterForm.setPageSize(1); int totalCount = featureIndexManager.getTotalPagesCount(vcfFilterForm, testProject.getId()); Assert.assertEquals(entries.getEntries().size(), totalCount); // search exons vcfFilterForm = new VcfFilterForm(); vcfFilterForm.setExon(true); entryList2 = featureIndexManager.filterVariations(vcfFilterForm, testProject.getId()); Assert.assertFalse(entryList2.getEntries().isEmpty()); Assert.assertTrue(entryList2.getEntries().stream().allMatch(e -> e.getInfo() != null && (Boolean) e.getInfo().get(FeatureIndexDao.FeatureIndexFields.IS_EXON.getFieldName()))); // check duplicates entryList2 = featureIndexManager.filterVariations(new VcfFilterForm(), testProject.getId()); checkDuplicates(entryList2.getEntries()); // test filter by position VcfIndexEntry e = entryList2.getEntries().get(0); VcfFilterForm filterForm = new VcfFilterForm(); filterForm.setStartIndex(e.getStartIndex()); filterForm.setEndIndex(e.getEndIndex()); entryList2 = featureIndexManager.filterVariations(filterForm, testProject.getId()); Assert.assertFalse(entryList2.getEntries().isEmpty()); Assert.assertTrue( entryList2.getEntries().stream().allMatch(v -> v.getStartIndex() >= filterForm.getStartIndex() && v.getEndIndex() <= filterForm.getEndIndex())); } @Test @Transactional(propagation = Propagation.REQUIRES_NEW) public void testLoadAllFields() throws IOException { VcfFilterForm filterForm = new VcfFilterForm(); VcfFilterInfo info = vcfManager.getFiltersInfo(Collections.singletonList(testVcf.getId())); filterForm.setInfoFields(info.getInfoItems().stream().map(InfoItem::getName).collect(Collectors.toList())); IndexSearchResult<VcfIndexEntry> entries = featureIndexManager.filterVariations(filterForm, testProject.getId()); Assert.assertFalse(entries.getEntries().isEmpty()); } /** * Testes indexing a vcf file with populated gene information. Therefore this information should be read from * VCF, not from gff files * @throws Exception */ @Ignore @Test @Transactional(propagation = Propagation.REQUIRES_NEW) public void testCreateFeatureIndex2() throws Exception { Chromosome chr14 = EntityHelper.createNewChromosome("chr14"); chr14.setSize(TEST_CHROMOSOME_SIZE); Reference testHumanReference = EntityHelper.createNewReference(chr14, referenceGenomeManager.createReferenceId()); referenceGenomeManager.register(testHumanReference); Long humanReferenceId = testHumanReference.getId(); Resource resource = context.getResource("classpath:templates/sample_2-lumpy.vcf"); FeatureIndexedFileRegistrationRequest request = new FeatureIndexedFileRegistrationRequest(); request.setReferenceId(humanReferenceId); request.setPath(resource.getFile().getAbsolutePath()); VcfFile vcfFile = vcfManager.registerVcfFile(request); Project project = new Project(); project.setName(TEST_PROJECT_NAME); project.setItems( Collections.singletonList(new ProjectItem(new BiologicalDataItem(vcfFile.getBioDataItemId())))); projectManager.saveProject(project); // Index is created when vcf file is added VcfFilterInfo info = featureIndexManager.loadVcfFilterInfoForProject(project.getId()); VcfFilterForm vcfFilterForm = new VcfFilterForm(); vcfFilterForm.setVcfFileIds(Collections.singletonList(vcfFile.getId())); vcfFilterForm .setGenes(new VcfFilterForm.FilterSection<>(Collections.singletonList("ENSG00000185070"), false)); vcfFilterForm.setVariationTypes( new VcfFilterForm.FilterSection<>(Arrays.asList(VariationType.DEL, VariationType.SNV), false)); String cipos95 = "CIPOS95"; vcfFilterForm .setInfoFields(info.getInfoItems().stream().map(InfoItem::getName).collect(Collectors.toList())); vcfFilterForm.setAdditionalFilters(Collections.singletonMap(cipos95, Arrays.asList(CONST_42, CONST_42))); IndexSearchResult<VcfIndexEntry> entryList2 = featureIndexManager.filterVariations(vcfFilterForm, project.getId()); Assert.assertFalse(entryList2.getEntries().isEmpty()); Assert.assertTrue(entryList2.getEntries().stream().anyMatch(e -> e.getInfo().containsKey(cipos95))); Assert.assertTrue( entryList2.getEntries().stream().filter(e -> e.getInfo().containsKey(cipos95)).allMatch(e -> { String cipos = (String) e.getInfo().get(cipos95); return cipos.startsWith("[") && cipos.endsWith("]"); })); // check info properly loaded for (VcfIndexEntry e : entryList2.getEntries()) { VariationQuery query = new VariationQuery(); query.setId(e.getFeatureFileId()); query.setProjectId(project.getId()); query.setChromosomeId(e.getChromosome().getId()); query.setPosition(e.getStartIndex()); Variation variation = vcfManager.loadVariation(query); Assert.assertNotNull(variation); for (Map.Entry<String, Variation.InfoField> i : variation.getInfo().entrySet()) { if (i.getValue().getValue() != null) { Assert.assertTrue( String.format("%s expected, %s found", i.getValue().getValue(), e.getInfo().get(i.getKey())), i.getValue().getValue().toString() .equalsIgnoreCase(e.getInfo().get(i.getKey()).toString())); } else { Assert.assertEquals(i.getValue().getValue(), e.getInfo().get(i.getKey())); } } } // flrt2 vcfFilterForm.setGenes(new VcfFilterForm.FilterSection<>(Collections.singletonList("FLRT2"), false)); IndexSearchResult<VcfIndexEntry> entryList21 = featureIndexManager.filterVariations(vcfFilterForm, project.getId()); Assert.assertFalse(entryList21.getEntries().isEmpty()); Assert.assertEquals(entryList21.getEntries().size(), entryList2.getEntries().size()); Assert.assertEquals(entryList21.getEntries().get(0).getGene(), entryList2.getEntries().get(0).getGene()); // empty filter test entryList2 = featureIndexManager.filterVariations(new VcfFilterForm(), project.getId()); Assert.assertFalse(entryList2.getEntries().isEmpty()); checkDuplicates(entryList2.getEntries()); } @Test @Transactional(propagation = Propagation.REQUIRES_NEW) public void testIndexUpdateOnProjectOperations() throws Exception { Resource gffResource = context.getResource(CLASSPATH_TEMPLATES_GENES_SORTED); FeatureIndexedFileRegistrationRequest request = new FeatureIndexedFileRegistrationRequest(); request.setReferenceId(referenceId); request.setPath(gffResource.getFile().getAbsolutePath()); request.setName("testGeneFile"); GeneFile geneFile = gffManager.registerGeneFile(request); Assert.assertNotNull(geneFile); Assert.assertNotNull(geneFile.getId()); referenceGenomeManager.updateReferenceGeneFileId(testReference.getId(), geneFile.getId()); Resource vcfResource = context.getResource(CLASSPATH_TEMPLATES_FELIS_CATUS_VCF); request = new FeatureIndexedFileRegistrationRequest(); request.setReferenceId(referenceId); request.setPath(vcfResource.getFile().getAbsolutePath()); request.setName("testVcf"); VcfFile vcfFile = vcfManager.registerVcfFile(request); Project project = new Project(); project.setName(TEST_PROJECT_NAME + 1); project.setItems(Arrays.asList(new ProjectItem(new BiologicalDataItem(vcfFile.getBioDataItemId())), new ProjectItem(new BiologicalDataItem(geneFile.getBioDataItemId())), new ProjectItem(new BiologicalDataItem(testReference.getBioDataItemId())))); projectManager.saveProject(project); // Index is created when vcf file is added VcfFilterForm vcfFilterForm = new VcfFilterForm(); vcfFilterForm.setVcfFileIds(Collections.singletonList(vcfFile.getId())); vcfFilterForm.setChromosomeIds(Collections.singletonList(testChromosome.getId())); vcfFilterForm .setGenes(new VcfFilterForm.FilterSection<>(Collections.singletonList(TEST_GENE_PREFIX), false)); vcfFilterForm.setVariationTypes( new VcfFilterForm.FilterSection<>(Arrays.asList(VariationType.MNP, VariationType.SNV), false)); IndexSearchResult<VcfIndexEntry> entryList = featureIndexManager.filterVariations(vcfFilterForm, project.getId()); Assert.assertFalse(entryList.getEntries().isEmpty()); // try to add an vcf item request = new FeatureIndexedFileRegistrationRequest(); request.setReferenceId(referenceId); request.setPath(vcfResource.getFile().getAbsolutePath()); request.setName(vcfResource.getFilename() + "2"); VcfFile vcfFile2 = vcfManager.registerVcfFile(request); project = projectManager.addProjectItem(project.getId(), vcfFile2.getBioDataItemId()); entryList = featureIndexManager.filterVariations(vcfFilterForm, project.getId()); Assert.assertFalse(entryList.getEntries().isEmpty()); Assert.assertTrue( entryList.getEntries().stream().allMatch(e -> e.getFeatureFileId().equals(vcfFile.getId()))); VcfFilterForm vcfFilterForm2 = new VcfFilterForm(); vcfFilterForm2.setVcfFileIds(Collections.singletonList(vcfFile2.getId())); vcfFilterForm.setChromosomeIds(Collections.singletonList(testChromosome.getId())); vcfFilterForm2 .setGenes(new VcfFilterForm.FilterSection<>(Collections.singletonList(TEST_GENE_PREFIX), false)); vcfFilterForm2.setVariationTypes( new VcfFilterForm.FilterSection<>(Arrays.asList(VariationType.MNP, VariationType.SNV), false)); IndexSearchResult<VcfIndexEntry> entryList2 = featureIndexManager.filterVariations(vcfFilterForm2, project.getId()); Assert.assertFalse(entryList2.getEntries().isEmpty()); Assert.assertEquals(entryList.getEntries().size(), entryList2.getEntries().size()); Assert.assertTrue( entryList2.getEntries().stream().allMatch(e -> e.getFeatureFileId().equals(vcfFile2.getId()))); // test no vcfFileIds vcfFilterForm2.setVcfFileIds(null); entryList2 = featureIndexManager.filterVariations(vcfFilterForm2, project.getId()); Assert.assertFalse(entryList2.getEntries().isEmpty()); Assert.assertEquals(entryList2.getEntries().size(), entryList.getEntries().size() * 2); // test with multiple vcfFileIds vcfFilterForm2.setVcfFileIds(Arrays.asList(vcfFile.getId(), vcfFile2.getId())); entryList2 = featureIndexManager.filterVariations(vcfFilterForm2, project.getId()); Assert.assertFalse(entryList2.getEntries().isEmpty()); Assert.assertEquals(entryList2.getEntries().size(), entryList.getEntries().size() * 2); // try to remove a vcf item by save - should be not indexed project.setItems(project.getItems().stream() .filter(i -> !(i.getBioDataItem() instanceof VcfFile) || !((VcfFile) i.getBioDataItem()).getId().equals(vcfFile2.getId())) .collect(Collectors.toList())); project = projectManager.saveProject(project); vcfFilterForm2.setVcfFileIds(Collections.singletonList(vcfFile2.getId())); entryList2 = featureIndexManager.filterVariations(vcfFilterForm2, project.getId()); Assert.assertTrue(entryList2.getEntries().isEmpty()); // try to remove gene file project.setItems(project.getItems().stream().filter(i -> !(i.getBioDataItem() instanceof GeneFile)) .collect(Collectors.toList())); project = projectManager.saveProject(project); vcfFilterForm.setGenes(null); entryList = featureIndexManager.filterVariations(vcfFilterForm, project.getId()); Assert.assertFalse(entryList.getEntries().isEmpty()); // add multiple files project.getItems().clear(); projectManager.saveProject(project); Project loadedProject = projectManager.loadProject(project.getId()); Assert.assertTrue(loadedProject.getItems().isEmpty()); entryList2 = featureIndexManager.filterVariations(new VcfFilterForm(), project.getId()); Assert.assertTrue(entryList2.getEntries().isEmpty()); project.setItems(Arrays.asList(new ProjectItem(new BiologicalDataItem(vcfFile.getBioDataItemId())), new ProjectItem(new BiologicalDataItem(vcfFile2.getBioDataItemId())), new ProjectItem(new BiologicalDataItem(testReference.getBioDataItemId())))); projectManager.saveProject(project); entryList2 = featureIndexManager.filterVariations(new VcfFilterForm(), project.getId()); Assert.assertTrue( entryList2.getEntries().stream().anyMatch(e -> e.getFeatureFileId().equals(vcfFile.getId()))); Assert.assertTrue( entryList2.getEntries().stream().anyMatch(e -> e.getFeatureFileId().equals(vcfFile2.getId()))); } @Test @Transactional(propagation = Propagation.REQUIRES_NEW) public void testLoadVcfFilterInfoForProject() throws IOException, InterruptedException, NoSuchAlgorithmException, VcfReadingException, FeatureIndexException { VcfFilterInfo filterInfo = featureIndexManager.loadVcfFilterInfoForProject(testProject.getId()); Assert.assertFalse(filterInfo.getAvailableFilters().isEmpty()); Assert.assertFalse(filterInfo.getInfoItems().isEmpty()); } @Test @Transactional(propagation = Propagation.REQUIRES_NEW) public void testCreateGeneIndex() throws IOException, InterruptedException, FeatureIndexException, NoSuchAlgorithmException, VcfReadingException { IndexSearchResult searchResult = featureIndexManager.searchFeaturesInProject("", testProject.getId()); Assert.assertTrue(searchResult.getEntries().isEmpty()); searchResult = featureIndexManager.searchFeaturesInProject("ens", testProject.getId()); Assert.assertFalse(searchResult.getEntries().isEmpty()); Assert.assertTrue(searchResult.getEntries().size() <= 10); Assert.assertTrue(searchResult.isExceedsLimit()); // ensfcag00000031547 and ccdc115 searchResult = featureIndexManager.searchFeaturesInProject("ensfcag00000031547", testProject.getId()); Assert.assertEquals(searchResult.getEntries().size(), 1); searchResult = featureIndexManager.searchFeaturesInProject("ccdc115", testProject.getId()); Assert.assertEquals(searchResult.getEntries().size(), 2); } @Test @Transactional(propagation = Propagation.REQUIRES_NEW) public void testIntervalQuery() throws IOException, InterruptedException, FeatureIndexException, NoSuchAlgorithmException, VcfReadingException { Resource resource = context.getResource(CLASSPATH_TEMPLATES_GENES_SORTED); FeatureIndexedFileRegistrationRequest request = new FeatureIndexedFileRegistrationRequest(); request.setReferenceId(referenceId); request.setName("GENES_SORTED_INT"); request.setPath(resource.getFile().getAbsolutePath()); GeneFile geneFile = gffManager.registerGeneFile(request); Assert.assertNotNull(geneFile); Assert.assertNotNull(geneFile.getId()); referenceGenomeManager.updateReferenceGeneFileId(testReference.getId(), geneFile.getId()); Project project = new Project(); project.setName(TEST_PROJECT_NAME + "_INT"); project.setItems(Arrays.asList(new ProjectItem(new BiologicalDataItem(testReference.getBioDataItemId())), new ProjectItem(new BiologicalDataItem(geneFile.getBioDataItemId())))); projectManager.saveProject(project); IndexSearchResult result1 = featureIndexDao.searchFeaturesInInterval(Collections.singletonList(geneFile), INTERVAL1_START, INTERVAL1_END, testChromosome); Assert.assertEquals(3, result1.getEntries().size()); IndexSearchResult result2 = featureIndexDao.searchFeaturesInInterval(Collections.singletonList(geneFile), INTERVAL2_START, INTERVAL2_END, testChromosome); Assert.assertEquals(0, result2.getEntries().size()); IndexSearchResult result3 = featureIndexDao.searchFeaturesInInterval(Collections.singletonList(geneFile), INTERVAL3_START, INTERVAL3_END, testChromosome); Assert.assertEquals(3, result3.getEntries().size()); } @Test @Transactional(propagation = Propagation.REQUIRES_NEW) public void testCreateUnmappedGeneIndex() throws IOException, InterruptedException, FeatureIndexException, NoSuchAlgorithmException { Chromosome chr1 = EntityHelper.createNewChromosome("chr1"); chr1.setSize(TEST_CHROMOSOME_SIZE); Reference testHumanReference = EntityHelper.createNewReference(chr1, referenceGenomeManager.createReferenceId()); referenceGenomeManager.register(testHumanReference); Long humanReferenceId = testHumanReference.getId(); Resource resource = context.getResource("classpath:templates/mrna.sorted.chunk.gtf"); FeatureIndexedFileRegistrationRequest request = new FeatureIndexedFileRegistrationRequest(); request.setReferenceId(humanReferenceId); request.setPath(resource.getFile().getAbsolutePath()); GeneFile geneFile = gffManager.registerGeneFile(request); Assert.assertNotNull(geneFile); Assert.assertNotNull(geneFile.getId()); Project project = new Project(); project.setName(TEST_PROJECT_NAME + 1); project.setItems(Arrays.asList(new ProjectItem(new BiologicalDataItem(geneFile.getBioDataItemId())), new ProjectItem(new BiologicalDataItem(testHumanReference.getBioDataItemId())))); projectManager.saveProject(project); List<FeatureIndexEntry> entryList = (List<FeatureIndexEntry>) featureIndexManager .searchFeaturesInProject("", project.getId()).getEntries(); Assert.assertTrue(entryList.isEmpty()); entryList = (List<FeatureIndexEntry>) featureIndexManager .searchFeaturesInProject("AM992871", project.getId()).getEntries(); Assert.assertTrue(entryList.isEmpty()); // we don't search for exons } @Test @Transactional(propagation = Propagation.REQUIRES_NEW) public void testBookmarkSearch() throws IOException, InterruptedException, FeatureIndexException { Bookmark bookmark = new Bookmark(); bookmark.setChromosome(testChromosome); bookmark.setStartIndex(1); bookmark.setEndIndex(testChromosome.getSize()); bookmark.setName("testBookmark"); bookmarkManager.saveBookmark(bookmark); Bookmark loadedBookmark = bookmarkManager.loadBookmark(bookmark.getId()); Assert.assertNotNull(loadedBookmark); IndexSearchResult<FeatureIndexEntry> result = featureIndexManager .searchFeaturesInProject(bookmark.getName(), testProject.getId()); Assert.assertFalse(result.getEntries().isEmpty()); Assert.assertEquals(result.getEntries().get(0).getFeatureType(), FeatureType.BOOKMARK); Assert.assertNotNull(((BookmarkIndexEntry) result.getEntries().get(0)).getBookmark()); Assert.assertEquals(result.getEntries().size(), result.getTotalResultsCount()); } @Test @Transactional(propagation = Propagation.REQUIRES_NEW) public void testWickedVcfIndex() throws IOException, InterruptedException, FeatureIndexException, NoSuchAlgorithmException, ParseException, VcfReadingException { Chromosome chr1 = EntityHelper.createNewChromosome("chr21"); chr1.setSize(TEST_WICKED_VCF_LENGTH); Chromosome chr2 = EntityHelper.createNewChromosome("chr22"); chr2.setSize(TEST_WICKED_VCF_LENGTH); Reference testHumanReference = EntityHelper.createNewReference(Arrays.asList(chr1, chr2), referenceGenomeManager.createReferenceId()); referenceGenomeManager.register(testHumanReference); Long humanReferenceId = testHumanReference.getId(); Project project = new Project(); project.setName(TEST_PROJECT_NAME + 1); project.setItems(Collections .singletonList(new ProjectItem(new BiologicalDataItem(testHumanReference.getBioDataItemId())))); projectManager.saveProject(project); Resource resource = context.getResource("classpath:templates/Homo_sapiens.GRCh38.83.sorted.chr21-22.gtf"); FeatureIndexedFileRegistrationRequest geneRequest = new FeatureIndexedFileRegistrationRequest(); geneRequest.setPath(resource.getFile().getAbsolutePath()); geneRequest.setReferenceId(humanReferenceId); GeneFile geneFile = gffManager.registerGeneFile(geneRequest); referenceGenomeManager.updateReferenceGeneFileId(humanReferenceId, geneFile.getId()); resource = context.getResource("classpath:templates/Dream.set3.VarDict.SV.vcf"); FeatureIndexedFileRegistrationRequest request = new FeatureIndexedFileRegistrationRequest(); request.setReferenceId(humanReferenceId); request.setPath(resource.getFile().getAbsolutePath()); VcfFile vcfFile = vcfManager.registerVcfFile(request); Assert.assertNotNull(vcfFile); Assert.assertNotNull(vcfFile.getId()); project.setItems(Arrays.asList(new ProjectItem(geneFile), new ProjectItem(vcfFile))); projectManager.saveProject(project); IndexSearchResult<VcfIndexEntry> entries = featureIndexManager.filterVariations(new VcfFilterForm(), project.getId()); Assert.assertFalse(entries.getEntries().isEmpty()); long varGenesCount = entries.getEntries().stream().filter(e -> StringUtils.isNotBlank(e.getGene())).count(); Assert.assertTrue(varGenesCount > 0); /*entries.stream().filter(e -> StringUtils.isNotBlank(e.getGene())).forEach(e -> logger.info("{} - {}, {}", e .getStartIndex(), e.getEndIndex(), e.getGeneIds()));*/ // check chromosome filter VcfFilterForm chr21Form = new VcfFilterForm(); chr21Form.setChromosomeIds(Collections.singletonList(chr1.getId())); IndexSearchResult<VcfIndexEntry> chr21Entries = featureIndexManager.filterVariations(chr21Form, project.getId()); Assert.assertFalse(chr21Entries.getEntries().isEmpty()); Assert.assertTrue( chr21Entries.getEntries().stream().allMatch(e -> e.getChromosome().getId().equals(chr1.getId()))); VcfFilterForm chr22Form = new VcfFilterForm(); chr22Form.setChromosomeIds(Collections.singletonList(chr2.getId())); IndexSearchResult<VcfIndexEntry> chr22Entries = featureIndexManager.filterVariations(chr22Form, project.getId()); Assert.assertFalse(chr22Entries.getEntries().isEmpty()); Assert.assertTrue( chr22Entries.getEntries().stream().allMatch(e -> e.getChromosome().getId().equals(chr2.getId()))); VcfFilterForm chr2122Form = new VcfFilterForm(); chr2122Form.setChromosomeIds(Arrays.asList(chr1.getId(), chr2.getId())); IndexSearchResult<VcfIndexEntry> chr2122Entries = featureIndexManager.filterVariations(chr2122Form, project.getId()); Assert.assertFalse(chr2122Entries.getEntries().isEmpty()); Assert.assertTrue( chr2122Entries.getEntries().stream().anyMatch(e -> e.getChromosome().getId().equals(chr1.getId()))); Assert.assertTrue( chr2122Entries.getEntries().stream().anyMatch(e -> e.getChromosome().getId().equals(chr2.getId()))); Assert.assertEquals(chr21Entries.getEntries().size() + chr22Entries.getEntries().size(), chr2122Entries.getEntries().size()); } @Test @Transactional(propagation = Propagation.REQUIRES_NEW) public void testSearchIndexForFile() throws IOException, FeatureIndexException { List<FeatureIndexEntry> entryList = (List<FeatureIndexEntry>) featureIndexDao .searchFileIndexes(Collections.singletonList(testVcf), String.format(TEST_GENE_AND_FILE_ID_QUERY, testVcf.getId()), null) .getEntries(); Assert.assertFalse(entryList.isEmpty()); VcfFilterForm vcfFilterForm = new VcfFilterForm(); vcfFilterForm.setVcfFileIds(Collections.singletonList(testVcf.getId())); vcfFilterForm .setGenes(new VcfFilterForm.FilterSection<>(Collections.singletonList(TEST_GENE_PREFIX), false)); vcfFilterForm.setVariationTypes( new VcfFilterForm.FilterSection<>(Arrays.asList(VariationType.MNP, VariationType.SNV), false)); //vcfFilterForm.setQuality(Collections.singletonList(QUAL_VALUE)); IndexSearchResult<VcfIndexEntry> entryList2 = featureIndexManager.filterVariations(vcfFilterForm); Assert.assertFalse(entryList2.getEntries().isEmpty()); Assert.assertTrue(entryList2.getEntries().stream().anyMatch(e -> e.getInfo() != null && (Boolean) e.getInfo().get(FeatureIndexDao.FeatureIndexFields.IS_EXON.getFieldName()))); vcfFilterForm.setChromosomeIds(Collections.singletonList(testChromosome.getId())); entryList2 = featureIndexManager.filterVariations(vcfFilterForm); Assert.assertFalse(entryList2.getEntries().isEmpty()); double time1 = Utils.getSystemTimeMilliseconds(); List<Long> chromosomeIds = featureIndexDao.getChromosomeIdsWhereVariationsPresentFacet( Collections.singletonList(testVcf), "geneId:ENS* AND fileId:" + testVcf.getId() + " AND variationType:snv"); double time2 = Utils.getSystemTimeMilliseconds(); logger.info("Get chromosomes by facets time: {} ms", time2 - time1); Assert.assertFalse(chromosomeIds.isEmpty()); List<Chromosome> chromosomes = featureIndexManager.filterChromosomes(vcfFilterForm); Assert.assertFalse(chromosomes.isEmpty()); // filter by additional fields Map<String, Object> additionalFilters = new HashMap<>(); additionalFilters.put(SVTYPE_FIELD, "DEL"); //additionalFilters.put("SVLEN", SVLEN_VALUE); additionalFilters.put(SVLEN_FIELD, String.valueOf(SVLEN_VALUE)); vcfFilterForm.setAdditionalFilters(additionalFilters); vcfFilterForm.setGenes(null); vcfFilterForm.setVariationTypes(null); vcfFilterForm.setInfoFields(Arrays.asList(SVTYPE_FIELD, SVLEN_FIELD)); entryList2 = featureIndexManager.filterVariations(vcfFilterForm); Assert.assertFalse(entryList2.getEntries().isEmpty()); Assert.assertFalse(entryList2.getEntries().stream().anyMatch(e -> e.getInfo().isEmpty())); Set<String> genes = featureIndexManager.searchGenesInVcfFiles(TEST_GENE_PREFIX, Collections.singletonList(testVcf.getId())); Assert.assertFalse(genes.isEmpty()); // search by gene name pglyrp4 vcfFilterForm = new VcfFilterForm(); vcfFilterForm.setVcfFileIds(Collections.singletonList(testVcf.getId())); vcfFilterForm.setGenes(new VcfFilterForm.FilterSection<>(Collections.singletonList(TEST_GENE_NAME))); IndexSearchResult<VcfIndexEntry> entries = featureIndexManager.filterVariations(vcfFilterForm); Assert.assertFalse(entries.getEntries().isEmpty()); genes = featureIndexManager.searchGenesInVcfFiles(TEST_GENE_NAME, Collections.singletonList(testVcf.getId())); Assert.assertFalse(genes.isEmpty()); vcfFilterForm.setPageSize(1); int totalCount = featureIndexManager.getTotalPagesCount(vcfFilterForm); Assert.assertEquals(entries.getEntries().size(), totalCount); // search exons vcfFilterForm = new VcfFilterForm(); vcfFilterForm.setVcfFileIds(Collections.singletonList(testVcf.getId())); vcfFilterForm.setExon(true); entryList2 = featureIndexManager.filterVariations(vcfFilterForm); Assert.assertFalse(entryList2.getEntries().isEmpty()); Assert.assertTrue(entryList2.getEntries().stream().allMatch(e -> e.getInfo() != null && (Boolean) e.getInfo().get(FeatureIndexDao.FeatureIndexFields.IS_EXON.getFieldName()))); // check duplicates vcfFilterForm = new VcfFilterForm(); vcfFilterForm.setVcfFileIds(Collections.singletonList(testVcf.getId())); entryList2 = featureIndexManager.filterVariations(vcfFilterForm); checkDuplicates(entryList2.getEntries()); } @Test @Transactional(propagation = Propagation.REQUIRES_NEW) public void testGeneIndexForFile() throws IOException { IndexSearchResult searchResult = featureIndexDao.searchFeatures("", Collections.singletonList(testGeneFile), null); Assert.assertTrue(searchResult.getEntries().isEmpty()); searchResult = featureIndexDao.searchFeatures(TEST_GENE_PREFIX.toLowerCase(), Collections.singletonList(testGeneFile), 10); Assert.assertFalse(searchResult.getEntries().isEmpty()); Assert.assertTrue(searchResult.getEntries().size() <= 10); Assert.assertTrue(searchResult.isExceedsLimit()); // ensfcag00000031547 and ccdc115 searchResult = featureIndexDao.searchFeatures("ensfcag00000031547", Collections.singletonList(testGeneFile), null); Assert.assertEquals(searchResult.getEntries().size(), 1); searchResult = featureIndexDao.searchFeatures("ccdc115", Collections.singletonList(testGeneFile), null); Assert.assertEquals(searchResult.getEntries().size(), 2); } @Test @Transactional(propagation = Propagation.REQUIRES_NEW) public void testBedIndexForFile() throws IOException { IndexSearchResult<FeatureIndexEntry> searchResult = featureIndexDao.searchFeatures("", Collections.singletonList(testBedFile), null); Assert.assertTrue(searchResult.getEntries().isEmpty()); searchResult = featureIndexDao.searchFeatures("Pos1", Collections.singletonList(testBedFile), null); List<FeatureIndexEntry> entries = searchResult.getEntries(); Assert.assertEquals(1, entries.size()); Assert.assertEquals("pos1", entries.get(0).getFeatureName()); Assert.assertEquals("A1", entries.get(0).getChromosome().getName()); // The BED format uses a first-base-is-zero convention, Tribble features use 1 => add 1. Assert.assertEquals(BED_FEATURE_START, (int) entries.get(0).getStartIndex()); Assert.assertEquals(BED_FEATURE_END, (int) entries.get(0).getEndIndex()); } @Test @Transactional(propagation = Propagation.REQUIRES_NEW) public void testReindexVcf() throws FeatureIndexException, IOException { Resource resource = context.getResource(CLASSPATH_TEMPLATES_FELIS_CATUS_VCF); FeatureIndexedFileRegistrationRequest request = new FeatureIndexedFileRegistrationRequest(); request.setReferenceId(referenceId); request.setPath(resource.getFile().getAbsolutePath()); request.setName(UUID.randomUUID().toString()); VcfFile vcfFile = vcfManager.registerVcfFile(request); VcfFilterForm vcfFilterForm = new VcfFilterForm(); vcfFilterForm.setVcfFileIds(Collections.singletonList(vcfFile.getId())); vcfFilterForm .setGenes(new VcfFilterForm.FilterSection<>(Collections.singletonList(TEST_GENE_PREFIX), false)); vcfFilterForm.setVariationTypes( new VcfFilterForm.FilterSection<>(Arrays.asList(VariationType.MNP, VariationType.SNV), false)); IndexSearchResult<VcfIndexEntry> entryList = featureIndexManager.filterVariations(vcfFilterForm); Assert.assertFalse(entryList.getEntries().isEmpty()); fileManager.deleteFileFeatureIndex(vcfFile); TestUtils.assertFail(() -> featureIndexManager.filterVariations(vcfFilterForm), Collections.singletonList(IllegalArgumentException.class)); vcfManager.reindexVcfFile(vcfFile.getId()); entryList = featureIndexManager.filterVariations(vcfFilterForm); Assert.assertFalse(entryList.getEntries().isEmpty()); } @Test @Transactional(propagation = Propagation.REQUIRES_NEW) public void testReindexGene() throws IOException { FeatureIndexedFileRegistrationRequest geneRequest = new FeatureIndexedFileRegistrationRequest(); Resource resource = context.getResource(CLASSPATH_TEMPLATES_GENES_SORTED); geneRequest.setReferenceId(referenceId); geneRequest.setPath(resource.getFile().getAbsolutePath()); geneRequest.setName(UUID.randomUUID().toString()); GeneFile geneFile = gffManager.registerGeneFile(geneRequest); IndexSearchResult searchResult = featureIndexDao.searchFeatures(TEST_GENE_PREFIX.toLowerCase(), Collections.singletonList(geneFile), 10); Assert.assertFalse(searchResult.getEntries().isEmpty()); Assert.assertTrue(searchResult.getEntries().size() <= 10); Assert.assertTrue(searchResult.isExceedsLimit()); fileManager.deleteFileFeatureIndex(geneFile); TestUtils.assertFail( () -> featureIndexDao.searchFeatures(TEST_GENE_PREFIX.toLowerCase(), Collections.singletonList(geneFile), 10), Collections.singletonList(IllegalArgumentException.class)); gffManager.reindexGeneFile(geneFile.getId(), false); searchResult = featureIndexDao.searchFeatures("ens", Collections.singletonList(geneFile), 10); Assert.assertFalse(searchResult.getEntries().isEmpty()); Assert.assertTrue(searchResult.getEntries().size() <= 10); Assert.assertTrue(searchResult.isExceedsLimit()); } @Test @Transactional(propagation = Propagation.REQUIRES_NEW) public void testNoIndexVcf() throws IOException, FeatureIndexException { Resource resource = context.getResource(CLASSPATH_TEMPLATES_FELIS_CATUS_VCF); FeatureIndexedFileRegistrationRequest request = new FeatureIndexedFileRegistrationRequest(); request.setReferenceId(referenceId); request.setPath(resource.getFile().getAbsolutePath()); request.setDoIndex(false); request.setName(UUID.randomUUID().toString()); VcfFile vcfFile = vcfManager.registerVcfFile(request); Assert.assertNotNull(vcfFile); Project project = new Project(); project.setName(TEST_PROJECT_NAME + UUID.randomUUID().toString()); project.setItems(Arrays.asList(new ProjectItem(new BiologicalDataItem(vcfFile.getBioDataItemId())), new ProjectItem(new BiologicalDataItem(testReference.getBioDataItemId())))); projectManager.saveProject(project); // Index is created when vcf file is added TestUtils.assertFail(() -> featureIndexManager.filterVariations(new VcfFilterForm(), project.getId()), Collections.singletonList(IllegalArgumentException.class)); } @Test @Transactional(propagation = Propagation.REQUIRES_NEW) public void testNoIndexGene() throws IOException { FeatureIndexedFileRegistrationRequest geneRequest = new FeatureIndexedFileRegistrationRequest(); Resource resource = context.getResource(CLASSPATH_TEMPLATES_GENES_SORTED); geneRequest.setReferenceId(referenceId); geneRequest.setPath(resource.getFile().getAbsolutePath()); geneRequest.setDoIndex(false); geneRequest.setName(UUID.randomUUID().toString()); GeneFile geneFile = gffManager.registerGeneFile(geneRequest); Assert.assertNotNull(geneFile); TestUtils.assertFail( () -> featureIndexDao.searchFeatures(TEST_GENE_PREFIX.toLowerCase(), Collections.singletonList(geneFile), 10), Collections.singletonList(IllegalArgumentException.class)); } @Test @Transactional(propagation = Propagation.REQUIRES_NEW) public void pagingTest() throws IOException, FeatureIndexException { VcfFilterForm vcfFilterForm = new VcfFilterForm(); IndexSearchResult<VcfIndexEntry> entryList = featureIndexManager.filterVariations(vcfFilterForm, testProject.getId()); Assert.assertFalse(entryList.getEntries().isEmpty()); vcfFilterForm.setPageSize(10); int total = featureIndexManager.getTotalPagesCount(vcfFilterForm, testProject.getId()); Set<VcfIndexEntry> pagedEntries = new HashSet<>(); for (int i = 1; i < total + 1; i++) { vcfFilterForm.setPage(i); IndexSearchResult<VcfIndexEntry> page = featureIndexManager.filterVariations(vcfFilterForm, testProject.getId()); Assert.assertFalse(page.getEntries().isEmpty()); Assert.assertEquals(total, page.getTotalPagesCount().intValue()); if (i < (entryList.getEntries().size() / 10) + 1) { // check if only it is not the last page // (there should be 4 variations) Assert.assertEquals(page.getEntries().size(), 10); } else { Assert.assertEquals(page.getEntries().size(), 4); } List<VcfIndexEntry> duplicates = page.getEntries().stream().filter(e -> pagedEntries.contains(e)) .collect(Collectors.toList()); Assert.assertTrue(duplicates.isEmpty()); pagedEntries.addAll(page.getEntries()); } Assert.assertEquals(entryList.getEntries().size(), pagedEntries.size()); } @Test @Transactional(propagation = Propagation.REQUIRED) public void sortingTest() throws IOException { VcfFilterForm vcfFilterForm = new VcfFilterForm(); vcfFilterForm.setPage(1); vcfFilterForm.setPageSize(10); for (IndexSortField sortField : IndexSortField.values()) { vcfFilterForm.setOrderBy(Collections.singletonList(new VcfFilterForm.OrderBy(sortField.name(), false))); IndexSearchResult<VcfIndexEntry> entryList = featureIndexManager.filterVariations(vcfFilterForm, testProject.getId()); Assert.assertFalse(entryList.getEntries().isEmpty()); Assert.assertEquals(vcfFilterForm.getPageSize().intValue(), entryList.getEntries().size()); } // check sorting by various fields checkSorted(IndexSortField.START_INDEX.name(), false, (page, seenEntries) -> page.stream() .anyMatch(p -> seenEntries.stream().anyMatch(e -> e.getStartIndex() > p.getStartIndex())), testProject.getId()); checkSorted(IndexSortField.END_INDEX.name(), false, (page, seenEntries) -> page.stream() .anyMatch(p -> seenEntries.stream().anyMatch(e -> e.getEndIndex() > p.getEndIndex())), testProject.getId()); checkSorted(IndexSortField.CHROMOSOME_NAME.name(), false, (page, seenEntries) -> page.stream() .anyMatch(p -> seenEntries.stream().anyMatch( e -> e.getChromosome().getName().compareTo(p.getChromosome().getName()) > 0)), testProject.getId()); checkSorted(IndexSortField.GENE_NAME.name(), false, (page, seenEntries) -> page.stream() .anyMatch(p -> seenEntries.stream() .anyMatch(e -> StringUtils.isNotBlank(e.getGeneName()) && StringUtils.isNotBlank(p.getGeneName()) && e.getGeneName().compareTo(p.getGeneName()) > 0)), testProject.getId()); checkSorted(IndexSortField.GENE_NAME.name(), false, (page, seenEntries) -> page.stream() .anyMatch(p -> seenEntries.stream() .anyMatch(e -> StringUtils.isNotBlank(e.getGeneNames()) && StringUtils.isNotBlank(p.getGeneNames()) && e.getGeneNames().compareTo(p.getGeneNames()) > 0)), testProject.getId()); checkSorted(IndexSortField.GENE_ID.name(), false, (page, seenEntries) -> page.stream() .anyMatch(p -> seenEntries.stream().anyMatch(e -> StringUtils.isNotBlank(e.getGene()) && StringUtils.isNotBlank(p.getGene()) && e.getGene().compareTo(p.getGene()) > 0)), testProject.getId()); checkSorted(IndexSortField.GENE_ID.name(), false, (page, seenEntries) -> page.stream() .anyMatch(p -> seenEntries.stream().anyMatch(e -> StringUtils.isNotBlank(e.getGeneIds()) && StringUtils.isNotBlank(p.getGeneIds()) && e.getGeneIds().compareTo(p.getGeneIds()) > 0)), testProject.getId()); checkSorted(IndexSortField.VARIATION_TYPE.name(), false, (page, seenEntries) -> page.stream() .anyMatch(p -> seenEntries.stream().anyMatch( e -> e.getVariationType().name().compareTo(p.getVariationType().name()) > 0)), testProject.getId()); checkSorted(IndexSortField.FILTER.name(), false, (page, seenEntries) -> page.stream() .anyMatch(p -> seenEntries.stream() .anyMatch(e -> (e.getFailedFilter() != null ? e.getFailedFilter() : "") .compareTo(p.getFailedFilter() != null ? p.getFailedFilter() : "") > 0)), testProject.getId()); // check order by additional fields VcfFilterInfo info = vcfManager.getFiltersInfo(Collections.singletonList(testVcf.getId())); for (InfoItem item : info.getInfoItems()) { switch (item.getType()) { case Integer: checkSorted(item.getName(), false, (page, seenEntries) -> page.stream().anyMatch(p -> seenEntries.stream() .anyMatch(e -> e.getInfo().containsKey(item.getName()) && e.getInfo().get(item.getName()) != null && e.getInfo().containsKey(item.getName()) && p.getInfo().get(item.getName()) != null && (e.getInfo().get(item.getName()).toString()) .compareTo(p.getInfo().get(item.getName()).toString()) > 0)), testProject.getId(), Collections.singletonList(item.getName())); break; case Float: checkSorted(item.getName(), false, (page, seenEntries) -> page.stream().anyMatch(p -> seenEntries.stream() .anyMatch(e -> e.getInfo().containsKey(item.getName()) && e.getInfo().get(item.getName()) != null && e.getInfo().containsKey(item.getName()) && p.getInfo().get(item.getName()) != null && (e.getInfo().get(item.getName()).toString()) .compareTo(p.getInfo().get(item.getName()).toString()) > 0)), testProject.getId(), Collections.singletonList(item.getName())); break; default: checkSorted(item.getName(), false, (page, seenEntries) -> page.stream().anyMatch(p -> seenEntries.stream() .anyMatch(e -> e.getInfo().get(item.getName()) != null && p.getInfo().get(item.getName()) != null && e.getInfo().get(item.getName()).toString() .compareTo(p.getInfo().get(item.getName()).toString()) > 0)), testProject.getId(), Collections.singletonList(item.getName())); } } // Test sort desc checkSorted(IndexSortField.START_INDEX.name(), true, (page, seenEntries) -> page.stream() .anyMatch(p -> seenEntries.stream().anyMatch(e -> e.getStartIndex() < p.getStartIndex())), testProject.getId()); } @Test @Transactional(propagation = Propagation.REQUIRES_NEW) public void testSortByMultipleFields() throws IOException { // check sorted by multiple fields IndexSearchResult<VcfIndexEntry> referentList = featureIndexManager.filterVariations(new VcfFilterForm(), testProject.getId()); List<VcfIndexEntry> pagedEntries = new ArrayList<>(); VcfFilterForm filterForm = new VcfFilterForm(); filterForm.setPageSize(10); filterForm.setOrderBy(Arrays.asList(new VcfFilterForm.OrderBy(IndexSortField.START_INDEX.name(), false), new VcfFilterForm.OrderBy(IndexSortField.VARIATION_TYPE.name(), false))); for (int i = 1; i < (referentList.getEntries().size() / 10) + 2; i++) { filterForm.setPage(i); IndexSearchResult<VcfIndexEntry> pageRes = featureIndexManager.filterVariations(filterForm, testProject.getId()); List<VcfIndexEntry> page = pageRes.getEntries(); Assert.assertFalse(page.isEmpty()); if (i < (referentList.getEntries().size() / 10) + 1) { // check if only it is not the last page // (there should be 4 variations) Assert.assertEquals(page.size(), 10); } else { Assert.assertEquals(page.size(), 4); } List<VcfIndexEntry> duplicates = page.stream().filter(pagedEntries::contains) .collect(Collectors.toList()); Assert.assertTrue(duplicates.isEmpty()); Assert.assertFalse(page.stream() .anyMatch(p -> pagedEntries.stream().anyMatch(e -> e.getStartIndex() > p.getStartIndex()))); Assert.assertFalse(page.stream().anyMatch(p -> pagedEntries.stream() .anyMatch(e -> e.getVariationType().name().compareTo(p.getVariationType().name()) > 0))); pagedEntries.addAll(page); } } private void checkSorted(String orderBy, boolean desc, SortTestingPredicate testingPredicate, Long projectId) throws IOException { checkSorted(orderBy, desc, testingPredicate, projectId, null); } private void checkSorted(String orderBy, boolean desc, SortTestingPredicate testingPredicate, Long projectId, List<String> additionalFields) throws IOException { IndexSearchResult<VcfIndexEntry> referentList = featureIndexManager.filterVariations(new VcfFilterForm(), projectId); List<VcfIndexEntry> pagedEntries = new ArrayList<>(); VcfFilterForm vcfFilterForm = new VcfFilterForm(); vcfFilterForm.setPageSize(10); vcfFilterForm.setOrderBy(Collections.singletonList(new VcfFilterForm.OrderBy(orderBy, desc))); vcfFilterForm.setInfoFields(additionalFields); for (int i = 1; i < (referentList.getEntries().size() / 10) + 2; i++) { vcfFilterForm.setPage(i); IndexSearchResult<VcfIndexEntry> pageRes = featureIndexManager.filterVariations(vcfFilterForm, projectId); List<VcfIndexEntry> page = pageRes.getEntries(); Assert.assertFalse(page.isEmpty()); if (i < (referentList.getEntries().size() / 10) + 1) { // check if only it is not the last page // (there should be 4 variations) Assert.assertEquals(page.size(), 10); } else { Assert.assertEquals(page.size(), 4); } List<VcfIndexEntry> duplicates = page.stream().filter(pagedEntries::contains) .collect(Collectors.toList()); Assert.assertTrue(duplicates.isEmpty()); Assert.assertFalse(testingPredicate.doTest(page, pagedEntries)); pagedEntries.addAll(page); } } @FunctionalInterface private interface SortTestingPredicate { boolean doTest(List<VcfIndexEntry> page, List<VcfIndexEntry> seenEntries); } @Test @Transactional(propagation = Propagation.REQUIRES_NEW) public void groupingTest() throws IOException { VcfFilterForm vcfFilterForm = new VcfFilterForm(); IndexSearchResult<VcfIndexEntry> entryList = featureIndexManager.filterVariations(vcfFilterForm, testProject.getId()); Assert.assertFalse(entryList.getEntries().isEmpty()); List<Group> counts = featureIndexManager.groupVariations(new VcfFilterForm(), testProject.getId(), IndexSortField.CHROMOSOME_NAME.name()); Assert.assertFalse(counts.isEmpty()); // test load additional info and group by it VcfFilterInfo info = vcfManager.getFiltersInfo(Collections.singletonList(testVcf.getId())); vcfFilterForm = new VcfFilterForm(); vcfFilterForm .setInfoFields(info.getInfoItems().stream().map(i -> i.getName()).collect(Collectors.toList())); entryList = featureIndexManager.filterVariations(vcfFilterForm, testProject.getId()); Assert.assertFalse(entryList.getEntries().isEmpty()); for (InfoItem infoItem : info.getInfoItems()) { String groupByField = infoItem.getName(); List<Group> c = featureIndexManager.groupVariations(new VcfFilterForm(), testProject.getId(), groupByField); List<VcfIndexEntry> entriesWithField = entryList.getEntries().stream() .filter(e -> e.getInfo().get(groupByField) != null).collect(Collectors.toList()); if (!entriesWithField.isEmpty()) { Assert.assertFalse("Empty grouping for field: " + groupByField, c.isEmpty()); } } } @Test @Transactional(propagation = Propagation.REQUIRES_NEW) public void testGroupingForPlots() throws IOException { // chromosomes histogram List<Group> counts = featureIndexManager.groupVariations(new VcfFilterForm(), testProject.getId(), IndexSortField.CHROMOSOME_NAME.name()); Assert.assertFalse(counts.isEmpty()); // variation types histogram counts = featureIndexManager.groupVariations(new VcfFilterForm(), testProject.getId(), IndexSortField.VARIATION_TYPE.name()); Assert.assertFalse(counts.isEmpty()); counts = featureIndexManager.groupVariations(new VcfFilterForm(), testProject.getId(), IndexSortField.QUALITY.name()); Assert.assertFalse(counts.isEmpty()); } @Test @Ignore // TODO: remove this test before merging to master @Transactional(propagation = Propagation.REQUIRES_NEW) public void performanceTest() throws Exception { Reference hg38 = EntityHelper.createG38Reference(referenceGenomeManager.createReferenceId()); referenceGenomeManager.register(hg38); FeatureIndexedFileRegistrationRequest request = new FeatureIndexedFileRegistrationRequest(); request.setReferenceId(hg38.getId()); request.setPath("/home/kite/Documents/sampleData/Dream.set3.VarDict.SV.vcf"); VcfFile vcfFile1 = vcfManager.registerVcfFile(request); request.setPath("/home/kite/Documents/sampleData/synthetic.challenge.set3.tumor.20pctmasked.truth.vcf"); VcfFile vcfFile2 = vcfManager.registerVcfFile(request); Project project = new Project(); project.setName(TEST_PROJECT_NAME + 1); project.setItems(Arrays.asList(new ProjectItem(new BiologicalDataItem(vcfFile1.getBioDataItemId())), new ProjectItem(new BiologicalDataItem(vcfFile2.getBioDataItemId())))); projectManager.saveProject(project); IndexSearchResult<VcfIndexEntry> entriesRes = featureIndexManager.filterVariations(new VcfFilterForm(), project.getId()); List<VcfIndexEntry> entries = entriesRes.getEntries(); Assert.assertFalse(entries.isEmpty()); logger.info("!! Variations count: {}", entries.size()); TestUtils.warmUp(() -> featureIndexManager.filterVariations(new VcfFilterForm(), project.getId()), PERFORMANCE_TEST_WARMING_COUNT); double averageTime = TestUtils.measurePerformance( () -> featureIndexManager.filterVariations(new VcfFilterForm(), project.getId()), PERFORMANCE_TEST_ATTEMPTS_COUNT); logger.info("!! Performing index search took: {} ms", averageTime); TestUtils.warmUp(() -> featureIndexManager.filterVariations(new VcfFilterForm(), project.getId()), PERFORMANCE_TEST_WARMING_COUNT); averageTime = TestUtils.measurePerformance( () -> featureIndexManager.filterVariations(new VcfFilterForm(), project.getId()), PERFORMANCE_TEST_ATTEMPTS_COUNT); logger.info("!! Performing index search paging took: {} ms", averageTime); final VcfFilterForm filterForm = new VcfFilterForm(); filterForm.setPage(1); filterForm.setPageSize(PERFORMANCE_TEST_PAGE_SIZE); TestUtils.warmUp(() -> featureIndexManager.filterVariations(filterForm, project.getId()), PERFORMANCE_TEST_WARMING_COUNT); averageTime = TestUtils.measurePerformance( () -> featureIndexManager.filterVariations(filterForm, project.getId()), PERFORMANCE_TEST_ATTEMPTS_COUNT); logger.info("!! Performing index search single page took: {} ms", averageTime); TestUtils.warmUp( () -> ThreadLocalRandom.current().nextInt(1, entries.size() / PERFORMANCE_TEST_PAGE_SIZE + 1), (page) -> { filterForm.setPage(page); featureIndexManager.filterVariations(filterForm, project.getId()); }, PERFORMANCE_TEST_WARMING_COUNT); List<Double> timings = TestUtils.measurePerformanceTimings( () -> ThreadLocalRandom.current().nextInt(1, entries.size() / PERFORMANCE_TEST_PAGE_SIZE + 1), (page) -> { filterForm.setPage(page); featureIndexManager.filterVariations(filterForm, project.getId()); }, PERFORMANCE_TEST_ATTEMPTS_COUNT); timings.forEach(t -> logger.info("!! Performed index search random page took: {}", t)); averageTime = TestUtils.calculateAverage(timings); logger.info("!! Average Performing index search random page took: {} ms", averageTime); /*TestUtils.warmUp(() -> featureIndexManager.getTotalPagesCount(new VcfFilterForm(), project.getId()), PERFORMANCE_TEST_WARMING_COUNT); averageTime = TestUtils.measurePerformance( () -> featureIndexManager.getTotalPagesCount(new VcfFilterForm(), project.getId()), PERFORMANCE_TEST_ATTEMPTS_COUNT); logger.info("!! Performing total facet page count lookup took: {} ms", averageTime);*/ } private void checkDuplicates(List<VcfIndexEntry> entryList) { Map<Pair<Integer, Integer>, FeatureIndexEntry> duplicateMap = new HashMap<>(); entryList.forEach(e -> { Pair<Integer, Integer> indexPair = new ImmutablePair<>(e.getStartIndex(), e.getEndIndex()); Assert.assertFalse(String.format("Found duplicate: %d, %d", e.getStartIndex(), e.getEndIndex()), duplicateMap.containsKey(indexPair)); duplicateMap.put(indexPair, e); }); } }