Java tutorial
/* * MIT License * * Copyright (c) 2016 EPAM Systems * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package com.epam.catgenome.manager.vcf.reader; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.tuple.Pair; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.util.Assert; import com.epam.catgenome.component.MessageCode; import com.epam.catgenome.component.MessageHelper; import com.epam.catgenome.constant.Constants; import com.epam.catgenome.constant.MessagesConstants; import com.epam.catgenome.entity.reference.Chromosome; import com.epam.catgenome.entity.track.Track; import com.epam.catgenome.entity.vcf.Filter; import com.epam.catgenome.entity.vcf.GenotypeData; import com.epam.catgenome.entity.vcf.OrganismType; import com.epam.catgenome.entity.vcf.Variation; import com.epam.catgenome.entity.vcf.VariationType; import com.epam.catgenome.entity.vcf.VcfFile; import com.epam.catgenome.exception.VcfReadingException; import com.epam.catgenome.manager.FileManager; import com.epam.catgenome.manager.reference.ReferenceGenomeManager; import com.epam.catgenome.util.Utils; import htsjdk.samtools.util.CloseableIterator; import htsjdk.tribble.AbstractFeatureReader; import htsjdk.tribble.FeatureReader; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFCodec; import htsjdk.variant.vcf.VCFHeader; import htsjdk.variant.vcf.VCFHeaderLine; import htsjdk.variant.vcf.VCFInfoHeaderLine; /** * Source: VcfFileReader * Created: 14.10.16, 16:59 * Project: CATGenome Browser * Make: IntelliJ IDEA 14.1.4, JDK 1.8 * {@code VcfFileReader} is an implementation of {@code VcfReader} for reading variations data from * VCF files */ public class VcfFileReader extends AbstractVcfReader { private FileManager fileManager; public static final double HTSJDK_WRONG_QUALITY = -10.0; protected static final String BIND_CIPOS_ATTRIBUTE = "CIPOS"; private static final Logger LOGGER = LoggerFactory.getLogger(VcfFileReader.class); /** * Creates a {@code VcfFileReader} instance * @param fileManager for file access * @param referenceGenomeManager for getting reference data */ public VcfFileReader(final FileManager fileManager, final ReferenceGenomeManager referenceGenomeManager) { this.fileManager = fileManager; this.referenceGenomeManager = referenceGenomeManager; } /** * Reads the variations data from the VCF file and loads it into a track * @param vcfFile data source * @param track for loading data * @param chromosome reference sequence * @param sampleIndex determines fro which sample from the file variations are loaded * @param loadInfo if true data from the INFO fields from VCF file will be loaded into the track, * otherwise it will be ignored * @return {@code Track} filled with variations from a specified {@code VcfFile} * @throws VcfReadingException */ @Override public Track<Variation> readVariations(VcfFile vcfFile, final Track<Variation> track, Chromosome chromosome, final Integer sampleIndex, final boolean loadInfo, final boolean collapse) throws VcfReadingException { try (FeatureReader<VariantContext> reader = AbstractFeatureReader.getFeatureReader(vcfFile.getPath(), vcfFile.getIndex().getPath(), new VCFCodec(), true)) { if (checkBounds(vcfFile, track, chromosome, loadInfo)) { return track; } try (CloseableIterator<VariantContext> iterator = Utils.query(reader, chromosome.getName(), track.getStartIndex(), track.getEndIndex())) { VCFHeader header = (VCFHeader) reader.getHeader(); track.setBlocks( doReadVariations(iterator, track, header, vcfFile, sampleIndex, loadInfo, collapse)); } } catch (IOException e) { throw new VcfReadingException(vcfFile, e); } return track; } @Override public Variation getNextOrPreviousVariation(int fromPosition, VcfFile vcfFile, Integer sampleIndex, Chromosome chromosome, boolean forward) throws VcfReadingException { int end = forward ? chromosome.getSize() : 0; if (isOutOfBounds(fromPosition, forward, end)) { // no next features return null; } try (FeatureReader<VariantContext> reader = AbstractFeatureReader.getFeatureReader(vcfFile.getPath(), vcfFile.getIndex().getPath(), new VCFCodec(), true)) { return readNextOrPreviousVariation(fromPosition, vcfFile, sampleIndex, chromosome, forward, end, reader); } catch (IOException e) { throw new VcfReadingException(vcfFile, e); } } private boolean isOutOfBounds(int fromPosition, boolean forward, int end) { return (forward && fromPosition + 1 >= end) || (!forward && fromPosition - 1 <= end); } private Variation readNextOrPreviousVariation(int fromPosition, VcfFile vcfFile, Integer sampleIndex, Chromosome chromosome, boolean forward, int end, FeatureReader<VariantContext> reader) throws IOException { int bound = end; if (vcfFile.getCompressed()) { bound = getEndWithBounds(vcfFile, chromosome, forward); } VCFHeader vcfHeader = (VCFHeader) reader.getHeader(); return forward ? getNextVariation(fromPosition, sampleIndex, chromosome, bound, reader, vcfHeader) : getPreviousVariation(fromPosition, sampleIndex, chromosome, bound, reader, vcfHeader); } private int getEndWithBounds(VcfFile vcfFile, Chromosome chromosome, boolean forward) throws IOException { Map<String, Pair<Integer, Integer>> metaMap = fileManager.loadIndexMetadata(vcfFile); Pair<Integer, Integer> bounds = metaMap.get(chromosome.getName()); if (bounds == null) { bounds = metaMap.get(Utils.changeChromosomeName(chromosome.getName())); } Assert.notNull(bounds, MessageHelper.getMessage(MessageCode.NO_SUCH_CHROMOSOME)); return forward ? bounds.getRight() : bounds.getLeft(); } @Nullable private Variation getPreviousVariation(int fromPosition, Integer sampleIndex, Chromosome chromosome, int end, FeatureReader<VariantContext> reader, VCFHeader vcfHeader) throws IOException { Variation lastFeature = null; int i = 0; boolean lastChunk = false; while (lastFeature == null) { if (lastChunk) { break; } int firstIndex = fromPosition - Constants.PREV_FEATURE_OFFSET * (i + 1); int lastIndex = fromPosition - 1 - Constants.PREV_FEATURE_OFFSET * i; if (firstIndex < end) { firstIndex = end; lastChunk = true; // this is the last chunk to be traversed } try (CloseableIterator<VariantContext> iterator = Utils.query(reader, chromosome.getName(), firstIndex, lastIndex)) { // instead traversing the whole file, read it by small chunks, 10000 bps // long. Hopefully, the desired feature will be in first/second chunk lastFeature = createVariations(sampleIndex, vcfHeader, iterator, fromPosition); i++; } } return lastFeature; } private Variation createVariations(Integer sampleIndex, VCFHeader vcfHeader, CloseableIterator<VariantContext> iterator, int fromPosition) { Variation lastFeature = null; while (iterator.hasNext()) { Variation variation = createVariation(iterator.next(), vcfHeader, sampleIndex); if (variation.getGenotypeData() == null || variation.getGenotypeData().getOrganismType() != OrganismType.NO_VARIATION && variation.getEndIndex() < fromPosition) { lastFeature = variation; } } return lastFeature; } @Nullable private Variation getNextVariation(int fromPosition, Integer sampleIndex, Chromosome chromosome, int end, FeatureReader<VariantContext> reader, VCFHeader vcfHeader) throws IOException { try (CloseableIterator<VariantContext> iterator = Utils.query(reader, chromosome.getName(), fromPosition + 1, end)) { while (iterator.hasNext()) { VariantContext feature = iterator.next(); Variation variation = createVariation(feature, vcfHeader, sampleIndex); if (variation.getGenotypeData() == null || variation.getGenotypeData().getOrganismType() != OrganismType.NO_VARIATION && variation.getStartIndex() > fromPosition) { return variation; } } } return null; } /** * Translates HTSJDK's {@code VariantContext} object into our {@code Variant} entity * * @param context a {@code VariantContext} object, that presents a variation from parsed VCF file. * @param header a {@code VCFHeader} object, that represents a header of parsed VCF file. * @param sampleIndex {@code Integer} a name of a sample. * @return a {@code Variation} object, representing desired variation. */ public Variation createVariation(VariantContext context, VCFHeader header, Integer sampleIndex) { String ref = context.getReference().getDisplayString(); List<String> alt = context.getAlternateAlleles().stream().map(Allele::getDisplayString) .collect(Collectors.toList()); Genotype genotype = sampleIndex != null ? context.getGenotype(sampleIndex) : null; // First, determine OrganismType GenotypeData genotypeData = getGenotypeData(context, genotype); Variation variation = new Variation(context.getStart(), context.getEnd(), ref, alt); variation.setGenotypeData(genotypeData); variation.setFailedFilters(context.getFilters().stream().map(f -> { VCFHeaderLine vcfHeaderLine = header.getFilterHeaderLine(f); return new Filter(f, vcfHeaderLine != null ? vcfHeaderLine.getValue() : null); }).collect(Collectors.toList())); variation.setIdentifier(context.getID()); Double qual = context.getPhredScaledQual(); variation.setQuality(Double.compare(qual, HTSJDK_WRONG_QUALITY) != 0 ? qual : 0); determineVariationType(context, sampleIndex, variation); return variation; } @NotNull private GenotypeData getGenotypeData(VariantContext context, Genotype genotype) { GenotypeData genotypeData; if (genotype == null) { genotypeData = new GenotypeData(); genotypeData.setOrganismType(OrganismType.NOT_SPECIFIED); } else { int[] genotypeArray = null; OrganismType organismType; switch (genotype.getType()) { case HOM_VAR: organismType = OrganismType.HOMOZYGOUS; int altIndex = context.getAlternateAlleles().indexOf(genotype.getAllele(0)) + 1; genotypeArray = new int[] { altIndex, altIndex }; break; case MIXED: case HET: genotypeArray = new int[2]; organismType = determineHeterozygousGenotype(context, genotype, genotypeArray); break; case UNAVAILABLE: case NO_CALL: organismType = OrganismType.NOT_SPECIFIED; break; case HOM_REF: organismType = OrganismType.NO_VARIATION; genotypeArray = new int[] { 0, 0 }; break; default: organismType = OrganismType.NOT_SPECIFIED; } genotypeData = new GenotypeData(organismType, genotypeArray, genotype.getGenotypeString()); } return genotypeData; } private boolean checkBounds(VcfFile vcfFile, Track<Variation> track, Chromosome chromosome, boolean loadInfo) throws IOException { // Bounds metadata should be load only for track loading to improve performance // Load bounds metadata for this file return !loadInfo && vcfFile.getCompressed() && checkBoundsForCompressedFile(vcfFile, track, chromosome); } private boolean checkBoundsForCompressedFile(VcfFile vcfFile, Track<Variation> track, Chromosome chromosome) throws IOException { Map<String, Pair<Integer, Integer>> metaMap = fileManager.loadIndexMetadata(vcfFile); Pair<Integer, Integer> bounds = metaMap.get(chromosome.getName()); if (bounds == null) { bounds = metaMap.get(Utils.changeChromosomeName(chromosome.getName())); } Assert.notNull(bounds, MessageHelper.getMessage(MessageCode.NO_SUCH_CHROMOSOME)); if (track.getStartIndex() < bounds.getLeft()) { track.setStartIndex(bounds.getLeft()); } if (track.getEndIndex() > bounds.getRight()) { track.setEndIndex(bounds.getRight()); } // If we are out of variation bounds, return empty list of variations if (track.getStartIndex() > bounds.getRight() || track.getEndIndex() < bounds.getLeft()) { track.setBlocks(Collections.emptyList()); return true; } return false; } private List<Variation> doReadVariations(CloseableIterator<VariantContext> iterator, Track<Variation> track, VCFHeader header, VcfFile vcfFile, Integer sampleIndex, boolean loadInfo, boolean collapse) throws IOException { if (track.getScaleFactor() >= 1 || !collapse) { ArrayList<Variation> variations = new ArrayList<>(); while (iterator.hasNext()) { VariantContext context = iterator.next(); Variation variation = createVariation(context, header, sampleIndex); if (loadInfo) { parseInfo(variation, context, header, sampleIndex, vcfFile); } if (variation.getGenotypeData() == null || variation.getGenotypeData().getOrganismType() != OrganismType.NO_VARIATION) { variations.add(variation); } } return variations; } else { return loadStatisticVariations(iterator, track, header, vcfFile, sampleIndex, loadInfo); } } private List<Variation> loadStatisticVariations(CloseableIterator<VariantContext> iterator, Track<Variation> track, VCFHeader header, VcfFile vcfFile, Integer sampleIndex, boolean loadInfo) { ArrayList<Variation> variations = new ArrayList<>(); final int step = (int) Math.ceil((double) 1 / track.getScaleFactor()); final int from = track.getStartIndex(); int to = from + step; boolean found = false; int variationCount = 0; // On small scale we need to count overlapping variations List<Variation> extendingVariations = new ArrayList<>(); // variations, that extend one pixel region VariantContext lastContext = null; while (iterator.hasNext()) { VariantContext context = iterator.next(); Variation variation = createVariation(context, header, sampleIndex); if (loadInfo) { parseInfo(variation, context, header, sampleIndex, vcfFile); } if (variation.getType() == VariationType.BND) { variations.add(variation); continue; } if (context.getStart() > to) { found = false; to = to + step < context.getStart() ? (context.getStart() + step) : (to + step); if (!variations.isEmpty()) { tryToGroupVariations(variations, variationCount, lastContext); } variationCount = 0; variations.addAll(extendingVariations); extendingVariations.clear(); } if (context.getEnd() > to && context.getStart() >= from && context.getType() == VariantContext.Type.SYMBOLIC) { if (variation.getGenotypeData() == null || variation.getGenotypeData().getOrganismType() != OrganismType.NO_VARIATION) { extendingVariations.add(variation); } continue; } if (!found && (variation.getGenotypeData() == null || variation.getGenotypeData().getOrganismType() != OrganismType.NO_VARIATION)) { variations.add(variation); found = true; } if (variation.getGenotypeData() == null || variation.getGenotypeData().getOrganismType() != OrganismType.NO_VARIATION) { variationCount++; } lastContext = context; } if (!variations.isEmpty()) { tryToGroupVariations(variations, variationCount, lastContext); } variations.addAll(extendingVariations); return variations; } private void tryToGroupVariations(ArrayList<Variation> variations, int variationCount, VariantContext lastContext) { Variation lastVariation = findLastNotBndVariation(variations); if (lastVariation != null && lastContext != null) { lastVariation.setVariationsCount(variationCount); if (variationCount > 1) { // set overlapping variations type to VariationType.STATISTIC lastVariation.setType(VariationType.STATISTIC); lastVariation.setEndIndex(lastContext.getEnd()); } } } /** * Translates HTSJDK's ambiguous Heterozygous type into our HETEROZYGOUS or HETERO_VAR organism types * * @param context {@code VariantContext} a context, from which genotype is being achieved * @param genotype {@code Genotype} a source genotype * @param genotypeArray {@code} an array, in which correct genotype will be stored in format {0, 0}, {0, 1}, etc. * @return correct {@code OrganismType} */ private static OrganismType determineHeterozygousGenotype(VariantContext context, Genotype genotype, int[] genotypeArray) { OrganismType organismType = null; for (int i = 0; i < genotype.getAlleles().size(); i++) { if (genotype.getAllele(i).isReference()) { organismType = OrganismType.HETEROZYGOUS; genotypeArray[i] = 0; } else { if (organismType == null) { organismType = OrganismType.HETERO_VAR; } genotypeArray[i] = context.getAlternateAlleles().indexOf(genotype.getAllele(i)) + 1; } } return organismType; } private static void determineVariationType(VariantContext context, Integer sampleIndex, Variation variation) { VariantContext.Type type = context.getType(); // Determine VariationType switch (type) { case SNP: variation.setType(VariationType.SNV); break; case INDEL: case MIXED: variation.setType(determineInDel(context, sampleIndex)); break; case MNP: variation.setType(VariationType.MNP); break; case SYMBOLIC: parseSymbolicVariation(variation, context); break; default: variation.setType(null); setNoVariationOrganism(variation); break; } if (variation.getType() == null) { variation.setType(VariationType.UNK); } } private static void setNoVariationOrganism(Variation variation) { if (variation.getGenotypeData() == null) { variation.setGenotypeData(new GenotypeData()); } variation.getGenotypeData().setOrganismType(OrganismType.NO_VARIATION); } /** * Translates variation's extended information from {@code VariantContext} to our {@code Variation} object * * @param variation a {@code Variation} object, for which extended information should be set * @param context a {@code VariantContext} object, that is a source for this information * @param header a {@code VCFHeader} header of VCF file * @param sampleIndex {@code Integer} a sample index from a VCF file. If is null, no genotype information * @param vcfFile a {@code VcfFile}, where variation is located */ public void parseInfo(Variation variation, VariantContext context, VCFHeader header, Integer sampleIndex, VcfFile vcfFile) { final Map<String, Variation.InfoField> verboseAttributes = new HashMap<>(); final Map<String, Object> attributes = context.getAttributes(); for (final Map.Entry<String, Object> e : attributes.entrySet()) { final String attributeKey = e.getKey(); final VCFInfoHeaderLine headerLine = header.getInfoHeaderLine(attributeKey); if (headerLine != null) { verboseAttributes.put(attributeKey, new Variation.InfoField(headerLine.getDescription(), e.getValue())); } else { LOGGER.warn(MessageHelper.getMessage(MessagesConstants.ERROR_VCF_HEADER, attributeKey)); verboseAttributes.put(attributeKey, new Variation.InfoField(attributeKey, e.getValue())); } } variation.setInfo(verboseAttributes); if (sampleIndex != null) { Genotype genotype = context.getGenotype(sampleIndex); if (genotype != null) { setGenotypeData(variation, header, genotype); } } if (variation.getType() == VariationType.BND) { parseBNFDInfo(variation, context, sampleIndex, vcfFile); } } private void parseBNFDInfo(Variation variation, VariantContext context, Integer sampleIndex, VcfFile vcfFile) { if (variation.getBindInfo() == null) { variation.setBindInfo(new HashMap<>()); } Allele alt = context.getAlternateAllele(sampleIndex != null ? sampleIndex : 0); for (Pattern pattern : BIND_PATTERNS) { Matcher matcher = pattern.matcher(alt.getDisplayString()); if (matcher.matches()) { String chrName = matcher.group(1); Optional<Chromosome> chromosome = referenceGenomeManager.loadChromosomes(vcfFile.getReferenceId()) .stream().filter(c -> c.getName().equals(chrName) || c.getName().equals(Utils.changeChromosomeName(chrName))) .findAny(); variation.getBindInfo().put(BIND_CHR_ATTRIBUTE, chromosome.isPresent() ? chromosome.get().getId() : chrName); variation.getBindInfo().put(BIND_POS_ATTRIBUTE, matcher.group(2)); break; } } } private void setGenotypeData(Variation variation, VCFHeader header, Genotype genotype) { Map<String, Object> genotypeInfo = new HashMap<>(); if (genotype.getAD() != null) { genotypeInfo.put(header.getFormatHeaderLine("AD").getDescription(), genotype.getAD()); } genotypeInfo.put( header.getFormatHeaderLine("GQ") != null ? header.getFormatHeaderLine("GQ").getDescription() : "GQ", genotype.getGQ()); genotypeInfo.put( header.getFormatHeaderLine("DP") != null ? header.getFormatHeaderLine("DP").getDescription() : "DP", genotype.getDP()); if (genotype.getPL() != null) { genotypeInfo.put( header.getFormatHeaderLine("PL") != null ? header.getFormatHeaderLine("PL").getDescription() : "PL", genotype.getPL()); } genotypeInfo.put( header.getFormatHeaderLine("GT") != null ? header.getFormatHeaderLine("GT").getDescription() : "GT", genotype.getGenotypeString()); variation.getGenotypeData().setInfo(genotypeInfo); } /** * Translates HTSJDK's ambiguous INDEL type into our INS, DEL or MIXED variation types * * @param context {@code VariantContext}, from which variation type is being achieved. * @param sampleIndex {@code Integer} a sample index from VCF file. If is null, will try to guess VariationType by * first allele. * @return correct {@code VariationType} */ private static VariationType determineInDel(VariantContext context, Integer sampleIndex) { Genotype genotype = sampleIndex != null ? context.getGenotype(sampleIndex) : null; if (genotype == null || CollectionUtils.isEmpty(genotype.getAlleles())) { // No genotype information, trying to guess by first alt allele return context.getAlternateAlleles().get(0).length() > context.getReference().length() ? VariationType.INS : VariationType.DEL; } else { return getVariationTypeFromAlleles(context, genotype); } } @NotNull private static VariationType getVariationTypeFromAlleles(VariantContext context, Genotype genotype) { if (genotype.getAlleles().size() > 1) { // Complex deletion/insertion if (genotype.getAllele(0).isReference() ^ genotype.getAllele(1).isReference()) { // if one is reference return getVariationTypeForComplexInDels(genotype); } else { return getVariationTypeForSimpleInDels(context, genotype); } } else { return genotype.getAlleles().get(0).isReference() ? VariationType.DEL : VariationType.INS; } } @NotNull private static VariationType getVariationTypeForSimpleInDels(VariantContext context, Genotype genotype) { Allele firstAllele = genotype.getAllele(0); Allele secondAllele = genotype.getAllele(1); VariationType type; if (firstAllele.isNonReference() && secondAllele.isNonReference()) { // if both are alt if (firstAllele.length() > context.getReference().length() && secondAllele.length() > context.getReference().length()) { type = VariationType.INS; } else if (firstAllele.length() < context.getReference().length() && secondAllele.length() < context.getReference().length()) { type = VariationType.DEL; } else { type = VariationType.MIXED; } } else { // if both are ref type = VariationType.MIXED; } return type; } @NotNull private static VariationType getVariationTypeForComplexInDels(Genotype genotype) { Allele firstAllele = genotype.getAllele(0); Allele secondAllele = genotype.getAllele(1); if (firstAllele.length() == secondAllele.length()) { // if it is a change return firstAllele.length() == 1 ? VariationType.SNV : VariationType.MNP; } VariationType type; if (firstAllele.isReference()) { // if it is an insertion/deletion type = genotype.getAlleles().get(0).length() > genotype.getAlleles().get(1).length() ? VariationType.DEL : VariationType.INS; } else if (secondAllele.isReference()) { type = genotype.getAlleles().get(1).length() > genotype.getAlleles().get(0).length() ? VariationType.DEL : VariationType.INS; } else { type = VariationType.MIXED; } return type; } private static void parseSymbolicVariation(Variation variation, VariantContext context) { variation.setStructural(true); VariationType type = null; for (Allele alt : context.getAlternateAlleles()) { if (STRUCT_DEL_TAG.equals(alt.getDisplayString())) { type = VariationType.DEL; break; } if (STRUCT_INS_TAG.equals(alt.getDisplayString())) { type = VariationType.INS; break; } if (INV_TAG.equals(alt.getDisplayString())) { type = VariationType.INV; break; } if (alt.getDisplayString() != null && DUP_TAG.matcher(alt.getDisplayString()).matches()) { type = VariationType.DUP; break; } type = getBDNVariationType(variation, context, alt); if (type != null) { break; } } variation.setType(type); } private static VariationType getBDNVariationType(Variation variation, VariantContext context, Allele alt) { VariationType type = null; for (Pattern pattern : BIND_PATTERNS) { Matcher matcher = pattern.matcher(alt.getDisplayString()); if (matcher.matches()) { type = VariationType.BND; if (context.getAttribute(BIND_CIPOS_ATTRIBUTE) != null) { variation.setBindInfo(new HashMap<>()); variation.getBindInfo().put(BIND_CIPOS_ATTRIBUTE, context.getAttribute(BIND_CIPOS_ATTRIBUTE)); } return type; } } return type; } }