org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils.java Source code

Java tutorial

Introduction

Here is the source code for org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils.java

Source

/*
* Copyright 2012-2016 Broad Institute, Inc.
* 
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
* 
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* 
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

package org.broadinstitute.gatk.utils.variant;

import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import htsjdk.tribble.TribbleException;
import htsjdk.tribble.util.popgen.HardyWeinbergCalculation;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.vcf.VCFConstants;
import org.apache.commons.lang.ArrayUtils;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.*;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;

import java.io.Serializable;
import java.util.*;

public class GATKVariantContextUtils {

    private static Logger logger = Logger.getLogger(GATKVariantContextUtils.class);

    public static final int DEFAULT_PLOIDY = HomoSapiensConstants.DEFAULT_PLOIDY;

    public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.

    /**
     * Diploid NO_CALL allele list...
     *
     * @deprecated you should use {@link #noCallAlleles(int)} instead. It indicates the presence of a hardcoded diploid assumption which is bad.
     */
    @Deprecated
    public final static List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);

    public final static String MERGE_FILTER_PREFIX = "filterIn";
    public final static String MERGE_REF_IN_ALL = "ReferenceInAll";
    public final static String MERGE_FILTER_IN_ALL = "FilteredInAll";
    public final static String MERGE_INTERSECTION = "Intersection";

    /**
     * Checks whether a variant-context overlaps with a region.
     *
     * <p>
     *     No event overlaps an unmapped region.
     * </p>
     *
     * @param variantContext variant-context to test the overlap with.
     * @param region region to test the overlap with.
     *
     * @throws IllegalArgumentException if either region or event is {@code null}.
     *
     * @return {@code true} if there is an overlap between the event described and the active region provided.
     */
    public static boolean overlapsRegion(final VariantContext variantContext, final GenomeLoc region) {
        if (region == null)
            throw new IllegalArgumentException("the active region provided cannot be null");
        if (variantContext == null)
            throw new IllegalArgumentException("the variant context provided cannot be null");
        if (region.isUnmapped())
            return false;
        if (variantContext.getEnd() < region.getStart())
            return false;
        if (variantContext.getStart() > region.getStop())
            return false;
        if (!variantContext.getChr().equals(region.getContig()))
            return false;
        return true;
    }

    /**
     * Returns a homozygous call allele list given the only allele and the ploidy.
     *
     * @param allele the only allele in the allele list.
     * @param ploidy the ploidy of the resulting allele list.
     *
     * @throws IllegalArgumentException if {@code allele} is {@code null} or ploidy is negative.
     *
     * @return never {@code null}.
     */
    public static List<Allele> homozygousAlleleList(final Allele allele, final int ploidy) {
        if (allele == null || ploidy < 0)
            throw new IllegalArgumentException();

        // Use a tailored inner class to implement the list:
        return Collections.nCopies(ploidy, allele);
    }

    private static boolean hasPLIncompatibleAlleles(final Collection<Allele> alleleSet1,
            final Collection<Allele> alleleSet2) {
        final Iterator<Allele> it1 = alleleSet1.iterator();
        final Iterator<Allele> it2 = alleleSet2.iterator();

        while (it1.hasNext() && it2.hasNext()) {
            final Allele a1 = it1.next();
            final Allele a2 = it2.next();
            if (!a1.equals(a2))
                return true;
        }

        // by this point, at least one of the iterators is empty.  All of the elements
        // we've compared are equal up until this point.  But it's possible that the
        // sets aren't the same size, which is indicated by the test below.  If they
        // are of the same size, though, the sets are compatible
        return it1.hasNext() || it2.hasNext();
    }

    /**
     * Determines the common reference allele
     *
     * @param VCs    the list of VariantContexts
     * @param loc    if not null, ignore records that do not begin at this start location
     * @return possibly null Allele
     */
    public static Allele determineReferenceAllele(final List<VariantContext> VCs, final GenomeLoc loc) {
        Allele ref = null;

        for (final VariantContext vc : VCs) {
            if (contextMatchesLoc(vc, loc)) {
                final Allele myRef = vc.getReference();
                if (ref == null || ref.length() < myRef.length())
                    ref = myRef;
                else if (ref.length() == myRef.length() && !ref.equals(myRef))
                    throw new TribbleException(String.format(
                            "The provided variant file(s) have inconsistent references for the same position(s) at %s:%d, %s vs. %s",
                            vc.getChr(), vc.getStart(), ref, myRef));
            }
        }

        return ref;
    }

    /**
     * Calculates the total ploidy of a variant context as the sum of all ploidies across genotypes.
     * @param vc the target variant context.
     * @param defaultPloidy the default ploidy to be assume when there is no ploidy information for a genotype.
     * @return never {@code null}.
     */
    public static int totalPloidy(final VariantContext vc, final int defaultPloidy) {
        if (vc == null)
            throw new IllegalArgumentException("the vc provided cannot be null");
        if (defaultPloidy < 0)
            throw new IllegalArgumentException("the default ploidy must 0 or greater");
        int result = 0;
        for (final Genotype genotype : vc.getGenotypes()) {
            final int declaredPloidy = genotype.getPloidy();
            result += declaredPloidy <= 0 ? defaultPloidy : declaredPloidy;
        }

        return result;
    }

    public enum GenotypeMergeType {
        /**
         * Make all sample genotypes unique by file. Each sample shared across RODs gets named sample.ROD.
         */
        UNIQUIFY,
        /**
         * Take genotypes in priority order (see the priority argument).
         */
        PRIORITIZE,
        /**
         * Take the genotypes in any order.
         */
        UNSORTED,
        /**
         * Require that all samples/genotypes be unique between all inputs.
         */
        REQUIRE_UNIQUE
    }

    public enum FilteredRecordMergeType {
        /**
         * Union - leaves the record if any record is unfiltered.
         */
        KEEP_IF_ANY_UNFILTERED,
        /**
         * Requires all records present at site to be unfiltered. VCF files that don't contain the record don't influence this.
         */
        KEEP_IF_ALL_UNFILTERED,
        /**
         * If any record is present at this site (regardless of possibly being filtered), then all such records are kept and the filters are reset.
         */
        KEEP_UNCONDITIONAL
    }

    public enum MultipleAllelesMergeType {
        /**
         * Combine only alleles of the same type (SNP, indel, etc.) into a single VCF record.
         */
        BY_TYPE,
        /**
         * Merge all allele types at the same start position into the same VCF record.
         */
        MIX_TYPES
    }

    /**
     * Refactored out of the AverageAltAlleleLength annotation class
     * @param vc the variant context
     * @return the average length of the alt allele (a double)
     */
    public static double getMeanAltAlleleLength(VariantContext vc) {
        double averageLength = 1.0;
        if (!vc.isSNP() && !vc.isSymbolic()) {
            // adjust for the event length
            int averageLengthNum = 0;
            int averageLengthDenom = 0;
            int refLength = vc.getReference().length();
            for (final Allele a : vc.getAlternateAlleles()) {
                int numAllele = vc.getCalledChrCount(a);
                int alleleSize;
                if (a.length() == refLength) {
                    // SNP or MNP
                    byte[] a_bases = a.getBases();
                    byte[] ref_bases = vc.getReference().getBases();
                    int n_mismatch = 0;
                    for (int idx = 0; idx < a_bases.length; idx++) {
                        if (a_bases[idx] != ref_bases[idx])
                            n_mismatch++;
                    }
                    alleleSize = n_mismatch;
                } else if (a.isSymbolic()) {
                    alleleSize = 1;
                } else {
                    alleleSize = Math.abs(refLength - a.length());
                }
                averageLengthNum += alleleSize * numAllele;
                averageLengthDenom += numAllele;
            }
            averageLength = ((double) averageLengthNum) / averageLengthDenom;
        }

        return averageLength;
    }

    /**
     * create a genome location, given a variant context
     * @param genomeLocParser parser
     * @param vc the variant context
     * @return the genomeLoc
     */
    public static final GenomeLoc getLocation(GenomeLocParser genomeLocParser, VariantContext vc) {
        return genomeLocParser.createGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd(), true);
    }

    public static BaseUtils.BaseSubstitutionType getSNPSubstitutionType(VariantContext context) {
        if (!context.isSNP() || !context.isBiallelic())
            throw new IllegalStateException("Requested SNP substitution type for bialleic non-SNP " + context);
        return BaseUtils.SNPSubstitutionType(context.getReference().getBases()[0],
                context.getAlternateAllele(0).getBases()[0]);
    }

    /**
     * If this is a BiAllelic SNP, is it a transition?
     */
    public static boolean isTransition(VariantContext context) {
        return getSNPSubstitutionType(context) == BaseUtils.BaseSubstitutionType.TRANSITION;
    }

    /**
     * If this is a BiAllelic SNP, is it a transversion?
     */
    public static boolean isTransversion(VariantContext context) {
        return getSNPSubstitutionType(context) == BaseUtils.BaseSubstitutionType.TRANSVERSION;
    }

    public static boolean isTransition(Allele ref, Allele alt) {
        return BaseUtils.SNPSubstitutionType(ref.getBases()[0],
                alt.getBases()[0]) == BaseUtils.BaseSubstitutionType.TRANSITION;
    }

    public static boolean isTransversion(Allele ref, Allele alt) {
        return BaseUtils.SNPSubstitutionType(ref.getBases()[0],
                alt.getBases()[0]) == BaseUtils.BaseSubstitutionType.TRANSVERSION;
    }

    /**
     * Returns a context identical to this with the REF and ALT alleles reverse complemented.
     *
     * @param vc        variant context
     * @return new vc
     */
    public static VariantContext reverseComplement(VariantContext vc) {
        // create a mapping from original allele to reverse complemented allele
        HashMap<Allele, Allele> alleleMap = new HashMap<>(vc.getAlleles().size());
        for (final Allele originalAllele : vc.getAlleles()) {
            Allele newAllele;
            if (originalAllele.isNoCall())
                newAllele = originalAllele;
            else
                newAllele = Allele.create(BaseUtils.simpleReverseComplement(originalAllele.getBases()),
                        originalAllele.isReference());
            alleleMap.put(originalAllele, newAllele);
        }

        // create new Genotype objects
        GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples());
        for (final Genotype genotype : vc.getGenotypes()) {
            List<Allele> newAlleles = new ArrayList<>();
            for (final Allele allele : genotype.getAlleles()) {
                Allele newAllele = alleleMap.get(allele);
                if (newAllele == null)
                    newAllele = Allele.NO_CALL;
                newAlleles.add(newAllele);
            }
            newGenotypes.add(new GenotypeBuilder(genotype).alleles(newAlleles).make());
        }

        return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).make();
    }

    /**
     * Returns true iff VC is an non-complex indel where every allele represents an expansion or
     * contraction of a series of identical bases in the reference.
     *
     * For example, suppose the ref bases are CTCTCTGA, which includes a 3x repeat of CTCTCT
     *
     * If VC = -/CT, then this function returns true because the CT insertion matches exactly the
     * upcoming reference.
     * If VC = -/CTA then this function returns false because the CTA isn't a perfect match
     *
     * Now consider deletions:
     *
     * If VC = CT/- then again the same logic applies and this returns true
     * The case of CTA/- makes no sense because it doesn't actually match the reference bases.
     *
     * The logic of this function is pretty simple.  Take all of the non-null alleles in VC.  For
     * each insertion allele of n bases, check if that allele matches the next n reference bases.
     * For each deletion allele of n bases, check if this matches the reference bases at n - 2 n,
     * as it must necessarily match the first n bases.  If this test returns true for all
     * alleles you are a tandem repeat, otherwise you are not.
     *
     * @param vc
     * @param refBasesStartingAtVCWithPad not this is assumed to include the PADDED reference
     * @return
     */
    @Requires({ "vc != null", "refBasesStartingAtVCWithPad != null && refBasesStartingAtVCWithPad.length > 0" })
    public static boolean isTandemRepeat(final VariantContext vc, final byte[] refBasesStartingAtVCWithPad) {
        final String refBasesStartingAtVCWithoutPad = new String(refBasesStartingAtVCWithPad).substring(1);
        if (!vc.isIndel()) // only indels are tandem repeats
            return false;

        final Allele ref = vc.getReference();

        for (final Allele allele : vc.getAlternateAlleles()) {
            if (!isRepeatAllele(ref, allele, refBasesStartingAtVCWithoutPad))
                return false;
        }

        // we've passed all of the tests, so we are a repeat
        return true;
    }

    /**
     *
     * @param vc
     * @param refBasesStartingAtVCWithPad
     * @return
     */
    @Requires({ "vc != null", "refBasesStartingAtVCWithPad != null && refBasesStartingAtVCWithPad.length > 0" })
    public static Pair<List<Integer>, byte[]> getNumTandemRepeatUnits(final VariantContext vc,
            final byte[] refBasesStartingAtVCWithPad) {
        final boolean VERBOSE = false;
        final String refBasesStartingAtVCWithoutPad = new String(refBasesStartingAtVCWithPad).substring(1);
        if (!vc.isIndel()) // only indels are tandem repeats
            return null;

        final Allele refAllele = vc.getReference();
        final byte[] refAlleleBases = Arrays.copyOfRange(refAllele.getBases(), 1, refAllele.length());

        byte[] repeatUnit = null;
        final ArrayList<Integer> lengths = new ArrayList<>();

        for (final Allele allele : vc.getAlternateAlleles()) {
            Pair<int[], byte[]> result = getNumTandemRepeatUnits(refAlleleBases,
                    Arrays.copyOfRange(allele.getBases(), 1, allele.length()),
                    refBasesStartingAtVCWithoutPad.getBytes());

            final int[] repetitionCount = result.first;
            // repetition count = 0 means allele is not a tandem expansion of context
            if (repetitionCount[0] == 0 || repetitionCount[1] == 0)
                return null;

            if (lengths.isEmpty()) {
                lengths.add(repetitionCount[0]); // add ref allele length only once
            }
            lengths.add(repetitionCount[1]); // add this alt allele's length

            repeatUnit = result.second;
            if (VERBOSE) {
                System.out.println("RefContext:" + refBasesStartingAtVCWithoutPad);
                System.out.println("Ref:" + refAllele.toString() + " Count:" + String.valueOf(repetitionCount[0]));
                System.out.println("Allele:" + allele.toString() + " Count:" + String.valueOf(repetitionCount[1]));
                System.out.println("RU:" + new String(repeatUnit));
            }
        }

        return new Pair<List<Integer>, byte[]>(lengths, repeatUnit);
    }

    /**
     *
     * @param refBases
     * @param altBases
     * @param remainingRefContext
     * @return
     * @deprecated there is still no alternative for this method but eventually there needs to be one implemented in TandemRepeatFinder (protected for now).
     */
    @Deprecated
    public static Pair<int[], byte[]> getNumTandemRepeatUnits(final byte[] refBases, final byte[] altBases,
            final byte[] remainingRefContext) {
        /* we can't exactly apply same logic as in basesAreRepeated() to compute tandem unit and number of repeated units.
          Consider case where ref =ATATAT and we have an insertion of ATAT. Natural description is (AT)3 -> (AT)2.
        */

        byte[] longB;
        // find first repeat unit based on either ref or alt, whichever is longer
        if (altBases.length > refBases.length)
            longB = altBases;
        else
            longB = refBases;

        // see if non-null allele (either ref or alt, whichever is longer) can be decomposed into several identical tandem units
        // for example, -*,CACA needs to first be decomposed into (CA)2
        final int repeatUnitLength = findRepeatedSubstring(longB);
        final byte[] repeatUnit = Arrays.copyOf(longB, repeatUnitLength);

        final int[] repetitionCount = new int[2];
        // look for repetitions forward on the ref bases (i.e. starting at beginning of ref bases)
        int repetitionsInRef = findNumberOfRepetitions(repeatUnit, refBases, true);
        repetitionCount[0] = findNumberOfRepetitions(repeatUnit, ArrayUtils.addAll(refBases, remainingRefContext),
                true) - repetitionsInRef;
        repetitionCount[1] = findNumberOfRepetitions(repeatUnit, ArrayUtils.addAll(altBases, remainingRefContext),
                true) - repetitionsInRef;

        return new Pair<>(repetitionCount, repeatUnit);

    }

    /**
     * Find out if a string can be represented as a tandem number of substrings.
     * For example ACTACT is a 2-tandem of ACT,
     * but ACTACA is not.
     *
     * @param bases                 String to be tested
     * @return                      Length of repeat unit, if string can be represented as tandem of substring (if it can't
     *                              be represented as one, it will be just the length of the input string)
     */
    public static int findRepeatedSubstring(byte[] bases) {

        int repLength;
        for (repLength = 1; repLength <= bases.length; repLength++) {
            final byte[] candidateRepeatUnit = Arrays.copyOf(bases, repLength);
            boolean allBasesMatch = true;
            for (int start = repLength; start < bases.length; start += repLength) {
                // check that remaining of string is exactly equal to repeat unit
                final byte[] basePiece = Arrays.copyOfRange(bases, start, start + candidateRepeatUnit.length);
                if (!Arrays.equals(candidateRepeatUnit, basePiece)) {
                    allBasesMatch = false;
                    break;
                }
            }
            if (allBasesMatch)
                return repLength;
        }

        return repLength;
    }

    /**
     * Helper routine that finds number of repetitions a string consists of.
     * For example, for string ATAT and repeat unit AT, number of repetitions = 2
     * @param repeatUnit             Substring
     * @param testString             String to test
     * @oaram lookForward            Look for repetitions forward (at beginning of string) or backward (at end of string)
     * @return                       Number of repetitions (0 if testString is not a concatenation of n repeatUnit's
     * @deprecated Move to use TandemRepeatFinder in protected (move to public if needed).
     */
    @Deprecated
    public static int findNumberOfRepetitions(byte[] repeatUnit, byte[] testString, boolean lookForward) {

        if (repeatUnit == null)
            throw new IllegalArgumentException("the repeat unit cannot be null");
        if (testString == null)
            throw new IllegalArgumentException("the test string cannot be null");

        int numRepeats = 0;
        if (lookForward) {
            // look forward on the test string
            for (int start = 0; start < testString.length; start += repeatUnit.length) {
                final int end = start + repeatUnit.length;
                final byte[] unit = Arrays.copyOfRange(testString, start, end);
                if (!Arrays.equals(unit, repeatUnit))
                    break;
                numRepeats++;
            }
            return numRepeats;
        }

        // look backward. For example, if repeatUnit = AT and testString = GATAT, number of repeat units is still 2
        // look forward on the test string
        for (int start = testString.length - repeatUnit.length; start >= 0; start -= repeatUnit.length) {
            final int end = start + repeatUnit.length;
            final byte[] unit = Arrays.copyOfRange(testString, start, end);
            if (Arrays.equals(unit, repeatUnit))
                numRepeats++;
            else
                break;
        }
        return numRepeats;
    }

    /**
     * Helper function for isTandemRepeat that checks that allele matches somewhere on the reference
     * @param ref
     * @param alt
     * @param refBasesStartingAtVCWithoutPad
     * @return
     */
    protected static boolean isRepeatAllele(final Allele ref, final Allele alt,
            final String refBasesStartingAtVCWithoutPad) {
        if (!Allele.oneIsPrefixOfOther(ref, alt))
            return false; // we require one allele be a prefix of another

        if (ref.length() > alt.length()) { // we are a deletion
            return basesAreRepeated(ref.getBaseString(), alt.getBaseString(), refBasesStartingAtVCWithoutPad, 2);
        } else { // we are an insertion
            return basesAreRepeated(alt.getBaseString(), ref.getBaseString(), refBasesStartingAtVCWithoutPad, 1);
        }
    }

    protected static boolean basesAreRepeated(final String l, final String s, final String ref,
            final int minNumberOfMatches) {
        final String potentialRepeat = l.substring(s.length()); // skip s bases

        for (int i = 0; i < minNumberOfMatches; i++) {
            final int start = i * potentialRepeat.length();
            final int end = (i + 1) * potentialRepeat.length();
            if (ref.length() < end)
                return false; // we ran out of bases to test
            final String refSub = ref.substring(start, end);
            if (!refSub.equals(potentialRepeat))
                return false; // repeat didn't match, fail
        }

        return true; // we passed all tests, we matched
    }

    public enum GenotypeAssignmentMethod {
        /**
         * set all of the genotype GT values to NO_CALL
         */
        SET_TO_NO_CALL,

        /**
         * set all of the genotype GT values to NO_CALL and remove annotations
         */
        SET_TO_NO_CALL_NO_ANNOTATIONS,

        /**
         * Use the subsetted PLs to greedily assigned genotypes
         */
        USE_PLS_TO_ASSIGN,

        /**
         * Try to match the original GT calls, if at all possible
         *
         * Suppose I have 3 alleles: A/B/C and the following samples:
         *
         *       original_GT best_match to A/B best_match to A/C
         * S1 => A/A A/A A/A
         * S2 => A/B A/B A/A
         * S3 => B/B B/B A/A
         * S4 => B/C A/B A/C
         * S5 => C/C A/A C/C
         *
         * Basically, all alleles not in the subset map to ref.  It means that het-alt genotypes
         * when split into 2 bi-allelic variants will be het in each, which is good in some cases,
         * rather than the undetermined behavior when using the PLs to assign, which could result
         * in hom-var or hom-ref for each, depending on the exact PL values.
         */
        BEST_MATCH_TO_ORIGINAL,

        /**
         * do not even bother changing the GTs
         */
        DO_NOT_ASSIGN_GENOTYPES
    }

    /**
     * Subset the Variant Context to the specific set of alleles passed in (pruning the PLs appropriately)
     *
     * @param vc                 variant context with genotype likelihoods
     * @param allelesToUse       which alleles from the vc are okay to use; *** must be in the same relative order as those in the original VC ***
     * @param assignGenotypes    assignment strategy for the (subsetted) PLs
     * @return a new non-null GenotypesContext
     */
    public static GenotypesContext subsetDiploidAlleles(final VariantContext vc, final List<Allele> allelesToUse,
            final GenotypeAssignmentMethod assignGenotypes) {
        if (vc == null)
            throw new IllegalArgumentException("the VariantContext cannot be null");
        if (allelesToUse == null)
            throw new IllegalArgumentException("the alleles to use cannot be null");
        if (allelesToUse.isEmpty())
            throw new IllegalArgumentException("must have alleles to use");
        if (allelesToUse.get(0).isNonReference())
            throw new IllegalArgumentException("First allele must be the reference allele");
        if (allelesToUse.size() == 1)
            throw new IllegalArgumentException("Cannot subset to only 1 alt allele");

        // optimization: if no input genotypes, just exit
        if (vc.getGenotypes().isEmpty())
            return GenotypesContext.create();

        // find the likelihoods indexes to use from the used alternate alleles
        final List<Integer> likelihoodIndexesToUse = determineDiploidLikelihoodIndexesToUse(vc, allelesToUse);

        // find the strand allele count indexes to use from the used alternate alleles
        final List<Integer> sacIndexesToUse = determineSACIndexesToUse(vc, allelesToUse);

        // create the new genotypes
        return createGenotypesWithSubsettedLikelihoods(vc.getGenotypes(), vc, allelesToUse, likelihoodIndexesToUse,
                sacIndexesToUse, assignGenotypes);
    }

    /**
     * Find the likelihood indexes to use for a selected set of diploid alleles
     *
     * @param originalVC        the original VariantContext
     * @param allelesToUse      the subset of alleles to use
     * @return a list of PL indexes to use or null if none
     */
    private static List<Integer> determineDiploidLikelihoodIndexesToUse(final VariantContext originalVC,
            final List<Allele> allelesToUse) {

        if (originalVC == null)
            throw new IllegalArgumentException("the original VariantContext cannot be null");
        if (allelesToUse == null)
            throw new IllegalArgumentException("the alleles to use cannot be null");

        // the bitset representing the allele indexes we want to keep
        final boolean[] alleleIndexesToUse = getAlleleIndexBitset(originalVC, allelesToUse);

        // an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles,
        // then we can keep the PLs as is; otherwise, we determine which ones to keep
        if (MathUtils.countOccurrences(true, alleleIndexesToUse) == alleleIndexesToUse.length)
            return null;

        return getDiploidLikelihoodIndexes(originalVC, alleleIndexesToUse);
    }

    /**
     * Find the strand allele count indexes to use for a selected set of alleles
     *
     * @param originalVC   the original VariantContext
     * @param allelesToUse the subset of alleles to use
     * @return a list of SAC indexes to use or null if none
     */
    public static List<Integer> determineSACIndexesToUse(final VariantContext originalVC,
            final List<Allele> allelesToUse) {

        if (originalVC == null)
            throw new IllegalArgumentException("the original VC cannot be null");
        if (allelesToUse == null)
            throw new IllegalArgumentException("the alleles to use cannot be null");

        // the bitset representing the allele indexes we want to keep
        final boolean[] alleleIndexesToUse = getAlleleIndexBitset(originalVC, allelesToUse);

        // an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles,
        // then we can keep the SACs as is; otherwise, we determine which ones to keep
        if (MathUtils.countOccurrences(true, alleleIndexesToUse) == alleleIndexesToUse.length)
            return null;

        return getSACIndexes(alleleIndexesToUse);
    }

    /**
     * Get the actual likelihoods indexes to use given the corresponding diploid allele indexes
     *
     * @param originalVC           the original VariantContext
     * @param alleleIndexesToUse   the bitset representing the alleles to use (@see #getAlleleIndexBitset)
     * @return a non-null List
     */
    private static List<Integer> getDiploidLikelihoodIndexes(final VariantContext originalVC,
            final boolean[] alleleIndexesToUse) {

        if (originalVC == null)
            throw new IllegalArgumentException("the original VC cannot be null");
        if (alleleIndexesToUse == null)
            throw new IllegalArgumentException("the alleles to use cannot be null");

        // All samples must be diploid
        for (final Genotype g : originalVC.getGenotypes()) {
            if (g.getPloidy() != DEFAULT_PLOIDY)
                throw new ReviewedGATKException("All samples must be diploid");
        }

        final List<Integer> result = new ArrayList<>(30);

        // numLikelihoods takes total # of alleles.
        final int numLikelihoods = GenotypeLikelihoods.numLikelihoods(originalVC.getNAlleles(), DEFAULT_PLOIDY);

        for (int PLindex = 0; PLindex < numLikelihoods; PLindex++) {
            final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods
                    .getAllelePair(PLindex);
            // consider this entry only if both of the alleles are good
            if (alleleIndexesToUse[alleles.alleleIndex1] && alleleIndexesToUse[alleles.alleleIndex2])
                result.add(PLindex);
        }

        return result;
    }

    /**
     * Get the actual strand aleele counts indexes to use given the corresponding allele indexes
     *
     * @param alleleIndexesToUse    the bitset representing the alleles to use (@see #getAlleleIndexBitset)
     * @return a non-null List
     */
    private static List<Integer> getSACIndexes(final boolean[] alleleIndexesToUse) {

        if (alleleIndexesToUse == null)
            throw new IllegalArgumentException("the alleles to use cannot be null");
        if (alleleIndexesToUse.length == 0)
            throw new IllegalArgumentException("cannot have no alleles to use");

        final List<Integer> result = new ArrayList<>(2 * alleleIndexesToUse.length);

        for (int SACindex = 0; SACindex < alleleIndexesToUse.length; SACindex++) {
            if (alleleIndexesToUse[SACindex]) {
                result.add(2 * SACindex);
                result.add(2 * SACindex + 1);
            }
        }

        return result;
    }

    /**
     * Given an original VariantContext and a list of alleles from that VC to keep,
     * returns a bitset representing which allele indexes should be kept
     *
     * @param originalVC   the original VC
     * @param allelesToUse the list of alleles to keep
     * @return non-null bitset
     */
    private static boolean[] getAlleleIndexBitset(final VariantContext originalVC,
            final List<Allele> allelesToUse) {

        if (originalVC == null)
            throw new IllegalArgumentException("the original VC cannot be null");
        if (allelesToUse == null)
            throw new IllegalArgumentException("the alleles to use cannot be null");

        final int numOriginalAltAlleles = originalVC.getNAlleles() - 1;
        final boolean[] alleleIndexesToKeep = new boolean[numOriginalAltAlleles + 1];

        // the reference Allele is definitely still used
        alleleIndexesToKeep[0] = true;
        for (int i = 0; i < numOriginalAltAlleles; i++) {
            if (allelesToUse.contains(originalVC.getAlternateAllele(i)))
                alleleIndexesToKeep[i + 1] = true;
        }

        return alleleIndexesToKeep;
    }

    /**
     * Make a new SAC array from the a subset of the genotype's original SAC
     *
     * @param g               the genotype
     * @param sacIndexesToUse the indexes in the SAC to use given the allelesToUse (@see #determineSACIndexesToUse())
     * @return subset of SACs from the original genotype, the original SACs if sacIndexesToUse is null
     */
    public static int[] makeNewSACs(final Genotype g, final List<Integer> sacIndexesToUse) {

        if (g == null)
            throw new IllegalArgumentException("the genotype cannot be null");

        final int[] oldSACs = getSACs(g);

        if (sacIndexesToUse == null) {
            return oldSACs;
        } else {
            final int[] newSACs = new int[sacIndexesToUse.size()];
            int newIndex = 0;
            for (final int oldIndex : sacIndexesToUse) {
                newSACs[newIndex++] = oldSACs[oldIndex];
            }
            return newSACs;
        }
    }

    /**
     * Get the genotype SACs
     *
     * @param g the genotype
     * @return an arrays of SACs
     * @throws ReviewedGATKException if the type of the SACs is unexpected
     */
    private static int[] getSACs(final Genotype g) {

        if (g == null)
            throw new IllegalArgumentException("the Genotype cannot be null");
        if (!g.hasExtendedAttribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY))
            throw new IllegalArgumentException("Genotype must have SAC");

        if (g.getExtendedAttributes().get(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY).getClass()
                .equals(String.class)) {
            final String SACsString = (String) g.getExtendedAttributes()
                    .get(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY);
            ArrayList<String> stringSACs = Utils.split(SACsString, ",");
            final int[] intSACs = new int[stringSACs.size()];
            int i = 0;
            for (String sac : stringSACs)
                intSACs[i++] = Integer.parseInt(sac);

            return intSACs;
        } else if (g.getExtendedAttributes().get(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY).getClass()
                .equals(int[].class))
            return (int[]) g.getExtendedAttributes().get(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY);
        else
            throw new ReviewedGATKException("Unexpected SAC type");
    }

    /**
     * Create the new GenotypesContext with the subsetted PLs, SACs and ADs
     *
     * @param originalGs               the original GenotypesContext
     * @param originalVC               the original VariantContext
     * @param allelesToUse             the actual alleles to use with the new Genotypes
     * @param likelihoodIndexesToUse   the indexes in the PL to use given the allelesToUse (@see #determineDiploidLikelihoodIndexesToUse())
     * @param sacIndexesToUse          the indexes in the SAC to use given the allelesToUse (@see #determineSACIndexesToUse())
     * @param assignGenotypes          assignment strategy for the (subsetted) PLs
     * @return a new non-null GenotypesContext
     */
    private static GenotypesContext createGenotypesWithSubsettedLikelihoods(final GenotypesContext originalGs,
            final VariantContext originalVC, final List<Allele> allelesToUse,
            final List<Integer> likelihoodIndexesToUse, final List<Integer> sacIndexesToUse,
            final GenotypeAssignmentMethod assignGenotypes) {

        if (originalGs == null)
            throw new IllegalArgumentException("the original GenotypesContext cannot be null");
        if (originalVC == null)
            throw new IllegalArgumentException("the original VariantContext cannot be null");
        if (allelesToUse == null)
            throw new IllegalArgumentException("the alleles to use cannot be null");

        // the new genotypes to create
        final GenotypesContext newGTs = GenotypesContext.create(originalGs.size());

        // make sure we are seeing the expected number of likelihoods per sample
        final int expectedNumLikelihoods = GenotypeLikelihoods.numLikelihoods(originalVC.getNAlleles(), 2);

        // the samples
        final List<String> sampleIndices = originalGs.getSampleNamesOrderedByName();

        // create the new genotypes
        for (int k = 0; k < originalGs.size(); k++) {
            final Genotype g = originalGs.get(sampleIndices.get(k));
            final GenotypeBuilder gb = new GenotypeBuilder(g);

            // create the new likelihoods array from the used alleles
            double[] newLikelihoods;
            if (!g.hasLikelihoods()) {
                // we don't have any likelihoods, so we null out PLs and make G ./.
                newLikelihoods = null;
                gb.noPL();
            } else {
                final double[] originalLikelihoods = g.getLikelihoods().getAsVector();
                if (likelihoodIndexesToUse == null) {
                    newLikelihoods = originalLikelihoods;
                } else if (originalLikelihoods.length != expectedNumLikelihoods) {
                    logger.debug("Wrong number of likelihoods in sample " + g.getSampleName() + " at " + originalVC
                            + " got " + g.getLikelihoodsString() + " but expected " + expectedNumLikelihoods);
                    newLikelihoods = null;
                } else {
                    newLikelihoods = new double[likelihoodIndexesToUse.size()];
                    int newIndex = 0;
                    for (final int oldIndex : likelihoodIndexesToUse)
                        newLikelihoods[newIndex++] = originalLikelihoods[oldIndex];

                    // might need to re-normalize
                    newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
                }

                if (newLikelihoods == null || (originalVC.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0) == 0
                        && likelihoodsAreUninformative(newLikelihoods)))
                    gb.noPL();
                else
                    gb.PL(newLikelihoods);
            }

            // create the new strand allele counts array from the used alleles
            if (g.hasExtendedAttribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY)) {
                int[] newSACs = makeNewSACs(g, sacIndexesToUse);
                gb.attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, newSACs);
            }

            updateGenotypeAfterSubsetting(g.getAlleles(), gb, assignGenotypes, newLikelihoods, allelesToUse);
            newGTs.add(gb.make());
        }

        return fixADFromSubsettedAlleles(newGTs, originalVC, allelesToUse);
    }

    private static boolean likelihoodsAreUninformative(final double[] likelihoods) {
        return MathUtils.sum(likelihoods) > SUM_GL_THRESH_NOCALL;
    }

    /**
     * Add the genotype call (GT) field to GenotypeBuilder using the requested algorithm assignmentMethod
     *
     * @param originalGT the original genotype calls, cannot be null
     * @param gb the builder where we should put our newly called alleles, cannot be null
     * @param assignmentMethod the method to use to do the assignment, cannot be null
     * @param newLikelihoods a vector of likelihoods to use if the method requires PLs, should be log10 likelihoods, cannot be null
     * @param allelesToUse the alleles we are using for our subsetting
     */
    public static void updateGenotypeAfterSubsetting(final List<Allele> originalGT, final GenotypeBuilder gb,
            final GenotypeAssignmentMethod assignmentMethod, final double[] newLikelihoods,
            final List<Allele> allelesToUse) {
        if (originalGT == null)
            throw new IllegalArgumentException("originalGT cannot be null");
        if (gb == null)
            throw new IllegalArgumentException("gb cannot be null");
        if (allelesToUse.isEmpty() || allelesToUse == null)
            throw new IllegalArgumentException("allelesToUse cannot be empty or null");

        switch (assignmentMethod) {
        case DO_NOT_ASSIGN_GENOTYPES:
            break;
        case SET_TO_NO_CALL:
            gb.alleles(NO_CALL_ALLELES);
            gb.noGQ();
            break;
        case SET_TO_NO_CALL_NO_ANNOTATIONS:
            gb.alleles(NO_CALL_ALLELES);
            gb.noGQ();
            gb.noAD();
            gb.noPL();
            gb.noAttributes();
            break;
        case USE_PLS_TO_ASSIGN:
            if (newLikelihoods == null || likelihoodsAreUninformative(newLikelihoods)) {
                // if there is no mass on the (new) likelihoods, then just no-call the sample
                gb.alleles(NO_CALL_ALLELES);
                gb.noGQ();
            } else {
                // find the genotype with maximum likelihoods
                final int PLindex = MathUtils.maxElementIndex(newLikelihoods);
                GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods
                        .getAllelePair(PLindex);
                gb.alleles(Arrays.asList(allelesToUse.get(alleles.alleleIndex1),
                        allelesToUse.get(alleles.alleleIndex2)));
                gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods));
            }
            break;
        case BEST_MATCH_TO_ORIGINAL:
            final List<Allele> best = new LinkedList<>();
            final Allele ref = allelesToUse.get(0);
            for (final Allele originalAllele : originalGT) {
                best.add(allelesToUse.contains(originalAllele) ? originalAllele : ref);
            }
            gb.alleles(best);
            break;
        }
    }

    /**
     * Subset the samples in VC to reference only information with ref call alleles
     *
     * Preserves DP if present
     *
     * @param vc the variant context to subset down to
     * @param ploidy ploidy to use if a genotype doesn't have any alleles
     * @return a GenotypesContext
     */
    public static GenotypesContext subsetToRefOnly(final VariantContext vc, final int ploidy) {
        if (vc == null)
            throw new IllegalArgumentException("vc cannot be null");
        if (ploidy < 1)
            throw new IllegalArgumentException("ploidy must be >= 1 but got " + ploidy);

        // the genotypes with PLs
        final GenotypesContext oldGTs = vc.getGenotypes();

        // optimization: if no input genotypes, just exit
        if (oldGTs.isEmpty())
            return oldGTs;

        // the new genotypes to create
        final GenotypesContext newGTs = GenotypesContext.create(oldGTs.size());

        final Allele ref = vc.getReference();
        final List<Allele> diploidRefAlleles = Arrays.asList(ref, ref);

        // create the new genotypes
        for (final Genotype g : vc.getGenotypes()) {
            final int gPloidy = g.getPloidy() == 0 ? ploidy : g.getPloidy();
            final List<Allele> refAlleles = gPloidy == 2 ? diploidRefAlleles : Collections.nCopies(gPloidy, ref);
            final GenotypeBuilder gb = new GenotypeBuilder(g.getSampleName(), refAlleles);
            if (g.hasDP())
                gb.DP(g.getDP());
            if (g.hasGQ())
                gb.GQ(g.getGQ());
            newGTs.add(gb.make());
        }

        return newGTs;
    }

    /**
     * Assign genotypes (GTs) to the samples in the Variant Context greedily based on the PLs
     *
     * @param vc variant context with genotype likelihoods
     * @return genotypes context
     */
    public static GenotypesContext assignDiploidGenotypes(final VariantContext vc) {
        return subsetDiploidAlleles(vc, vc.getAlleles(), GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN);
    }

    /**
     * Split variant context into its biallelic components if there are more than 2 alleles
     *
     * For VC has A/B/C alleles, returns A/B and A/C contexts.
     * Genotypes are all no-calls now (it's not possible to fix them easily)
     * Alleles are right trimmed to satisfy VCF conventions
     *
     * If vc is biallelic or non-variant it is just returned
     *
     * Chromosome counts are updated (but they are by definition 0)
     *
     * @param vc a potentially multi-allelic variant context
     * @return a list of bi-allelic (or monomorphic) variant context
     */
    public static List<VariantContext> splitVariantContextToBiallelics(final VariantContext vc) {
        return splitVariantContextToBiallelics(vc, false, GenotypeAssignmentMethod.SET_TO_NO_CALL);
    }

    /**
     * Split variant context into its biallelic components if there are more than 2 alleles
     *
     * For VC has A/B/C alleles, returns A/B and A/C contexts.
     * Alleles are right trimmed to satisfy VCF conventions
     *
     * If vc is biallelic or non-variant it is just returned
     *
     * Chromosome counts are updated (but they are by definition 0)
     *
     * @param vc a potentially multi-allelic variant context
     * @param trimLeft if true, we will also left trim alleles, potentially moving the resulting vcs forward on the genome
     * @param genotypeAssignmentMethod assignment strategy for the (subsetted) PLs
     * @return a list of bi-allelic (or monomorphic) variant context
     */
    public static List<VariantContext> splitVariantContextToBiallelics(final VariantContext vc,
            final boolean trimLeft, final GenotypeAssignmentMethod genotypeAssignmentMethod) {
        return splitVariantContextToBiallelics(vc, trimLeft, genotypeAssignmentMethod, false);
    }

    /**
     * Split variant context into its biallelic components if there are more than 2 alleles
     *
     * For VC has A/B/C alleles, returns A/B and A/C contexts.
     * Alleles are right trimmed to satisfy VCF conventions
     *
     * If vc is biallelic or non-variant it is just returned
     *
     * Chromosome counts are updated (but they are by definition 0)
     *
     * @param vc a potentially multi-allelic variant context
     * @param trimLeft if true, we will also left trim alleles, potentially moving the resulting vcs forward on the genome
     * @param genotypeAssignmentMethod assignment strategy for the (subsetted) PLs
     * @param keepOriginalChrCounts keep the orignal chromosome counts before subsetting
     * @return a list of bi-allelic (or monomorphic) variant context
     */
    public static List<VariantContext> splitVariantContextToBiallelics(final VariantContext vc,
            final boolean trimLeft, final GenotypeAssignmentMethod genotypeAssignmentMethod,
            final boolean keepOriginalChrCounts) {
        if (vc == null)
            throw new IllegalArgumentException("vc cannot be null");

        if (!vc.isVariant() || vc.isBiallelic())
            // non variant or biallelics already satisfy the contract
            return Collections.singletonList(vc);
        else {
            final List<VariantContext> biallelics = new LinkedList<>();

            // if any of the genotypes ar ehet-not-ref (i.e. 1/2), set all of them to no-call
            final GenotypeAssignmentMethod genotypeAssignmentMethodUsed = hasHetNonRef(vc.getGenotypes())
                    ? GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL_NO_ANNOTATIONS
                    : genotypeAssignmentMethod;

            for (final Allele alt : vc.getAlternateAlleles()) {
                final VariantContextBuilder builder = new VariantContextBuilder(vc);

                // make biallelic alleles
                final List<Allele> alleles = Arrays.asList(vc.getReference(), alt);
                builder.alleles(alleles);

                // since the VC has been subset, remove the invalid attributes
                for (final String key : vc.getAttributes().keySet()) {
                    if (!(key.equals(VCFConstants.ALLELE_COUNT_KEY) || key.equals(VCFConstants.ALLELE_FREQUENCY_KEY)
                            || key.equals(VCFConstants.ALLELE_NUMBER_KEY))
                            || genotypeAssignmentMethodUsed == GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL_NO_ANNOTATIONS) {
                        builder.rmAttribute(key);
                    }
                }

                // subset INFO field annotations if available if genotype is called
                if (genotypeAssignmentMethodUsed != GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL_NO_ANNOTATIONS
                        && genotypeAssignmentMethodUsed != GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL)
                    addInfoFiledAnnotations(vc, builder, alt, keepOriginalChrCounts);

                builder.genotypes(subsetDiploidAlleles(vc, alleles, genotypeAssignmentMethodUsed));
                final VariantContext trimmed = trimAlleles(builder.make(), trimLeft, true);
                biallelics.add(trimmed);
            }

            return biallelics;
        }
    }

    /**
     * Check if any of the genotypes is heterozygous, non-reference (i.e. 1/2)
     *
     * @param genotypesContext  genotype information
     * @return true if any of the genotypes are heterozygous, non-reference, false otherwise
     */
    private static boolean hasHetNonRef(final GenotypesContext genotypesContext) {
        for (final Genotype gt : genotypesContext) {
            if (gt.isHetNonRef())
                return true;
        }
        return false;
    }

    public static Genotype removePLsAndAD(final Genotype g) {
        return (g.hasLikelihoods() || g.hasAD()) ? new GenotypeBuilder(g).noPL().noAD().make() : g;
    }

    //TODO consider refactor variant-context merging code so that we share as much as possible between
    //TODO simpleMerge and referenceConfidenceMerge
    //TODO likely using a separate helper class or hierarchy.
    /**
     * Merges VariantContexts into a single hybrid.  Takes genotypes for common samples in priority order, if provided.
     * If uniquifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
     * the sample name
     *
     * @param unsortedVCs               collection of unsorted VCs
     * @param priorityListOfVCs         priority list detailing the order in which we should grab the VCs
     * @param filteredRecordMergeType   merge type for filtered records
     * @param genotypeMergeOptions      merge option for genotypes
     * @param annotateOrigin            should we annotate the set it came from?
     * @param printMessages             should we print messages?
     * @param setKey                    the key name of the set
     * @param filteredAreUncalled       are filtered records uncalled?
     * @param mergeInfoWithMaxAC        should we merge in info from the VC with maximum allele count?
     * @return new VariantContext       representing the merge of unsortedVCs
     */
    public static VariantContext simpleMerge(final Collection<VariantContext> unsortedVCs,
            final List<String> priorityListOfVCs, final FilteredRecordMergeType filteredRecordMergeType,
            final GenotypeMergeType genotypeMergeOptions, final boolean annotateOrigin, final boolean printMessages,
            final String setKey, final boolean filteredAreUncalled, final boolean mergeInfoWithMaxAC) {
        int originalNumOfVCs = priorityListOfVCs == null ? 0 : priorityListOfVCs.size();
        return simpleMerge(unsortedVCs, priorityListOfVCs, originalNumOfVCs, filteredRecordMergeType,
                genotypeMergeOptions, annotateOrigin, printMessages, setKey, filteredAreUncalled,
                mergeInfoWithMaxAC);
    }

    /**
     * Merges VariantContexts into a single hybrid.  Takes genotypes for common samples in priority order, if provided.
     * If uniquifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
     * the sample name.
     * simpleMerge does not verify any more unique sample names EVEN if genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE. One should use
     * SampleUtils.verifyUniqueSamplesNames to check that before using simpleMerge.
     *
     * For more information on this method see: http://www.thedistractionnetwork.com/programmer-problem/
     *
     * @param unsortedVCs               collection of unsorted VCs
     * @param priorityListOfVCs         priority list detailing the order in which we should grab the VCs
     * @param filteredRecordMergeType   merge type for filtered records
     * @param genotypeMergeOptions      merge option for genotypes
     * @param annotateOrigin            should we annotate the set it came from?
     * @param printMessages             should we print messages?
     * @param setKey                    the key name of the set
     * @param filteredAreUncalled       are filtered records uncalled?
     * @param mergeInfoWithMaxAC        should we merge in info from the VC with maximum allele count?
     * @return new VariantContext       representing the merge of unsortedVCs
     */
    public static VariantContext simpleMerge(final Collection<VariantContext> unsortedVCs,
            final List<String> priorityListOfVCs, final int originalNumOfVCs,
            final FilteredRecordMergeType filteredRecordMergeType, final GenotypeMergeType genotypeMergeOptions,
            final boolean annotateOrigin, final boolean printMessages, final String setKey,
            final boolean filteredAreUncalled, final boolean mergeInfoWithMaxAC) {
        if (unsortedVCs == null || unsortedVCs.isEmpty())
            return null;

        if (priorityListOfVCs != null && originalNumOfVCs != priorityListOfVCs.size())
            throw new IllegalArgumentException(
                    "the number of the original VariantContexts must be the same as the number of VariantContexts in the priority list");

        if (annotateOrigin && priorityListOfVCs == null && originalNumOfVCs == 0)
            throw new IllegalArgumentException(
                    "Cannot merge calls and annotate their origins without a complete priority list of VariantContexts or the number of original VariantContexts");

        final List<VariantContext> preFilteredVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs,
                genotypeMergeOptions);
        // Make sure all variant contexts are padded with reference base in case of indels if necessary
        List<VariantContext> VCs = new ArrayList<>();

        for (final VariantContext vc : preFilteredVCs) {
            if (!filteredAreUncalled || vc.isNotFiltered())
                VCs.add(vc);
        }

        if (VCs.isEmpty()) // everything is filtered out and we're filteredAreUncalled
            return null;

        // establish the baseline info from the first VC
        final VariantContext first = VCs.get(0);
        final String name = first.getSource();
        final Allele refAllele = determineReferenceAllele(VCs);

        final LinkedHashSet<Allele> alleles = new LinkedHashSet<>();
        final Set<String> filters = new HashSet<>();
        final Map<String, Object> attributes = new LinkedHashMap<>();
        final Set<String> inconsistentAttributes = new HashSet<>();
        final Set<String> variantSources = new HashSet<>(); // contains the set of sources we found in our set of VCs that are variant
        final Set<String> rsIDs = new LinkedHashSet<>(1); // most of the time there's one id

        VariantContext longestVC = first;
        int depth = 0;
        int maxAC = -1;
        final Map<String, Object> attributesWithMaxAC = new LinkedHashMap<>();
        double log10PError = CommonInfo.NO_LOG10_PERROR;
        boolean anyVCHadFiltersApplied = false;
        VariantContext vcWithMaxAC = null;
        GenotypesContext genotypes = GenotypesContext.create();

        // counting the number of filtered and variant VCs
        int nFiltered = 0;

        boolean remapped = false;

        // cycle through and add info from the other VCs, making sure the loc/reference matches
        for (final VariantContext vc : VCs) {
            if (longestVC.getStart() != vc.getStart())
                throw new IllegalStateException(
                        "BUG: attempting to merge VariantContexts with different start sites: first="
                                + first.toString() + " second=" + vc.toString());

            if (VariantContextUtils.getSize(vc) > VariantContextUtils.getSize(longestVC))
                longestVC = vc; // get the longest location

            nFiltered += vc.isFiltered() ? 1 : 0;
            if (vc.isVariant())
                variantSources.add(vc.getSource());

            AlleleMapper alleleMapping = resolveIncompatibleAlleles(refAllele, vc, alleles);
            remapped = remapped || alleleMapping.needsRemapping();

            alleles.addAll(alleleMapping.values());

            mergeGenotypes(genotypes, vc, alleleMapping, genotypeMergeOptions == GenotypeMergeType.UNIQUIFY);

            // We always take the QUAL of the first VC with a non-MISSING qual for the combined value
            if (log10PError == CommonInfo.NO_LOG10_PERROR)
                log10PError = vc.getLog10PError();

            filters.addAll(vc.getFilters());
            anyVCHadFiltersApplied |= vc.filtersWereApplied();

            //
            // add attributes
            //
            // special case DP (add it up) and ID (just preserve it)
            //
            if (vc.hasAttribute(VCFConstants.DEPTH_KEY))
                depth += vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0);
            if (vc.hasID())
                rsIDs.add(vc.getID());
            if (mergeInfoWithMaxAC && vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY)) {
                String rawAlleleCounts = vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY, null);
                // lets see if the string contains a "," separator
                if (rawAlleleCounts.contains(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR)) {
                    final List<String> alleleCountArray = Arrays
                            .asList(rawAlleleCounts.substring(1, rawAlleleCounts.length() - 1)
                                    .split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR));
                    for (final String alleleCount : alleleCountArray) {
                        final int ac = Integer.valueOf(alleleCount.trim());
                        if (ac > maxAC) {
                            maxAC = ac;
                            vcWithMaxAC = vc;
                        }
                    }
                } else {
                    final int ac = Integer.valueOf(rawAlleleCounts);
                    if (ac > maxAC) {
                        maxAC = ac;
                        vcWithMaxAC = vc;
                    }
                }
            }

            for (final Map.Entry<String, Object> p : vc.getAttributes().entrySet()) {
                final String key = p.getKey();
                final Object value = p.getValue();
                // only output annotations that have the same value in every input VC
                // if we don't like the key already, don't go anywhere
                if (!inconsistentAttributes.contains(key)) {
                    final boolean alreadyFound = attributes.containsKey(key);
                    final Object boundValue = attributes.get(key);
                    final boolean boundIsMissingValue = alreadyFound
                            && boundValue.equals(VCFConstants.MISSING_VALUE_v4);

                    if (alreadyFound && !boundValue.equals(value) && !boundIsMissingValue) {
                        // we found the value but we're inconsistent, put it in the exclude list
                        inconsistentAttributes.add(key);
                        attributes.remove(key);
                    } else if (!alreadyFound || boundIsMissingValue) { // no value
                        attributes.put(key, value);
                    }
                }
            }
        }

        // if we have more alternate alleles in the merged VC than in one or more of the
        // original VCs, we need to strip out the GL/PLs (because they are no longer accurate), as well as allele-dependent attributes like AC,AF, and AD
        for (final VariantContext vc : VCs) {
            if (vc.getAlleles().size() == 1)
                continue;
            if (hasPLIncompatibleAlleles(alleles, vc.getAlleles())) {
                if (!genotypes.isEmpty()) {
                    logger.debug(String.format(
                            "Stripping PLs at %s:%d-%d due to incompatible alleles merged=%s vs. single=%s",
                            vc.getChr(), vc.getStart(), vc.getEnd(), alleles, vc.getAlleles()));
                }
                genotypes = stripPLsAndAD(genotypes);
                // this will remove stale AC,AF attributed from vc
                VariantContextUtils.calculateChromosomeCounts(vc, attributes, true);
                break;
            }
        }

        // take the VC with the maxAC and pull the attributes into a modifiable map
        if (mergeInfoWithMaxAC && vcWithMaxAC != null) {
            attributesWithMaxAC.putAll(vcWithMaxAC.getAttributes());
        }

        // if at least one record was unfiltered and we want a union, clear all of the filters
        if ((filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size())
                || filteredRecordMergeType == FilteredRecordMergeType.KEEP_UNCONDITIONAL)
            filters.clear();

        if (annotateOrigin) { // we care about where the call came from
            String setValue;
            if (nFiltered == 0 && variantSources.size() == originalNumOfVCs) // nothing was unfiltered
                setValue = MERGE_INTERSECTION;
            else if (nFiltered == VCs.size()) // everything was filtered out
                setValue = MERGE_FILTER_IN_ALL;
            else if (variantSources.isEmpty()) // everyone was reference
                setValue = MERGE_REF_IN_ALL;
            else {
                final LinkedHashSet<String> s = new LinkedHashSet<>();
                for (final VariantContext vc : VCs)
                    if (vc.isVariant())
                        s.add(vc.isFiltered() ? MERGE_FILTER_PREFIX + vc.getSource() : vc.getSource());
                setValue = Utils.join("-", s);
            }

            if (setKey != null) {
                attributes.put(setKey, setValue);
                if (mergeInfoWithMaxAC && vcWithMaxAC != null) {
                    attributesWithMaxAC.put(setKey, setValue);
                }
            }
        }

        if (depth > 0)
            attributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth));

        final String ID = rsIDs.isEmpty() ? VCFConstants.EMPTY_ID_FIELD : Utils.join(",", rsIDs);

        final VariantContextBuilder builder = new VariantContextBuilder().source(name).id(ID);
        builder.loc(longestVC.getChr(), longestVC.getStart(), longestVC.getEnd());
        builder.alleles(alleles);
        builder.genotypes(genotypes);
        builder.log10PError(log10PError);
        if (anyVCHadFiltersApplied) {
            builder.filters(filters.isEmpty() ? filters : new TreeSet<>(filters));
        }
        builder.attributes(new TreeMap<>(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes));

        // Trim the padded bases of all alleles if necessary
        final VariantContext merged = builder.make();
        if (printMessages && remapped)
            System.out.printf("Remapped => %s%n", merged);
        return merged;
    }

    //TODO as part of a larger refactoring effort remapAlleles can be merged with createAlleleMapping.

    public static GenotypesContext stripPLsAndAD(final GenotypesContext genotypes) {
        final GenotypesContext newGs = GenotypesContext.create(genotypes.size());

        for (final Genotype g : genotypes) {
            newGs.add(removePLsAndAD(g));
        }

        return newGs;
    }

    /**
     * Updates the PLs, SACs and AD of the Genotypes in the newly selected VariantContext to reflect the fact that some alleles
     * from the original VariantContext are no longer present.
     *
     * @param selectedVC  the selected (new) VariantContext
     * @param originalVC  the original VariantContext
     * @return a new non-null GenotypesContext
     */
    public static GenotypesContext updatePLsSACsAD(final VariantContext selectedVC,
            final VariantContext originalVC) {
        if (selectedVC == null)
            throw new IllegalArgumentException("the selected VariantContext cannot be null");
        if (originalVC == null)
            throw new IllegalArgumentException("the original VariantContext cannot be null");

        final int numNewAlleles = selectedVC.getAlleles().size();
        final int numOriginalAlleles = originalVC.getAlleles().size();

        // if we have more alternate alleles in the selected VC than in the original VC, then something is wrong
        if (numNewAlleles > numOriginalAlleles)
            throw new IllegalArgumentException(
                    "Attempting to fix PLs, SACs and AD from what appears to be a *combined* VCF and not a selected one");

        final GenotypesContext oldGs = selectedVC.getGenotypes();

        // if we have the same number of alternate alleles in the selected VC as in the original VC, then we don't need to fix anything
        if (numNewAlleles == numOriginalAlleles)
            return oldGs;

        return fixDiploidGenotypesFromSubsettedAlleles(oldGs, originalVC, selectedVC.getAlleles());
    }

    /**
     * Fix the PLs, SACs and ADs for the GenotypesContext of a VariantContext that has been subset
     *
     * @param originalGs       the original GenotypesContext
     * @param originalVC       the original VariantContext
     * @param allelesToUse     the new subset of alleles to use
     * @return a new non-null GenotypesContext
     */
    static private GenotypesContext fixDiploidGenotypesFromSubsettedAlleles(final GenotypesContext originalGs,
            final VariantContext originalVC, final List<Allele> allelesToUse) {

        if (originalGs == null)
            throw new IllegalArgumentException("the selected GenotypesContext cannot be null");
        if (originalVC == null)
            throw new IllegalArgumentException("the original VariantContext cannot be null");
        if (allelesToUse == null)
            throw new IllegalArgumentException("the alleles to use cannot be null");

        // find the likelihoods indexes to use from the used alternate alleles
        final List<Integer> likelihoodIndexesToUse = determineDiploidLikelihoodIndexesToUse(originalVC,
                allelesToUse);

        // find the strand allele count indexes to use from the used alternate alleles
        final List<Integer> sacIndexesToUse = determineSACIndexesToUse(originalVC, allelesToUse);

        // create the new genotypes
        return createGenotypesWithSubsettedLikelihoods(originalGs, originalVC, allelesToUse, likelihoodIndexesToUse,
                sacIndexesToUse, GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES);
    }

    /**
     * Fix the AD for the GenotypesContext of a VariantContext that has been subset
     *
     * @param originalGs       the original GenotypesContext
     * @param originalVC       the original VariantContext
     * @param allelesToUse     the new (sub)set of alleles to use
     * @return a new non-null GenotypesContext
     */
    public static GenotypesContext fixADFromSubsettedAlleles(final GenotypesContext originalGs,
            final VariantContext originalVC, final List<Allele> allelesToUse) {
        if (originalGs == null)
            throw new IllegalArgumentException("the original Gs cannot be null");
        if (originalVC == null)
            throw new IllegalArgumentException("the original VC cannot be null");
        if (allelesToUse == null)
            throw new IllegalArgumentException("the alleles to use list cannot be null");

        // the bitset representing the allele indexes we want to keep
        final boolean[] alleleIndexesToUse = getAlleleIndexBitset(originalVC, allelesToUse);

        // the new genotypes to create
        final GenotypesContext newGTs = GenotypesContext.create(originalGs.size());

        // the samples
        final List<String> sampleIndices = originalGs.getSampleNamesOrderedByName();

        // create the new genotypes
        for (int k = 0; k < originalGs.size(); k++) {
            final Genotype g = originalGs.get(sampleIndices.get(k));
            newGTs.add(fixAD(g, alleleIndexesToUse, allelesToUse.size()));
        }

        return newGTs;
    }

    /**
     * Fix the AD for the given Genotype
     *
     * @param genotype              the original Genotype
     * @param alleleIndexesToUse    a bitset describing whether or not to keep a given index
     * @param nAllelesToUse         how many alleles we are keeping
     * @return a non-null Genotype
     */
    private static Genotype fixAD(final Genotype genotype, final boolean[] alleleIndexesToUse,
            final int nAllelesToUse) {
        // if it ain't broke don't fix it
        if (!genotype.hasAD())
            return genotype;

        final GenotypeBuilder builder = new GenotypeBuilder(genotype);

        final int[] oldAD = genotype.getAD();
        if (oldAD.length != alleleIndexesToUse.length) {
            builder.noAD();
        } else {
            final int[] newAD = new int[nAllelesToUse];
            int currentIndex = 0;
            for (int i = 0; i < oldAD.length; i++) {
                if (alleleIndexesToUse[i])
                    newAD[currentIndex++] = oldAD[i];
            }
            builder.AD(newAD);
        }
        return builder.make();
    }

    private static Allele determineReferenceAllele(final List<VariantContext> VCs) {
        return determineReferenceAllele(VCs, null);
    }

    public static boolean contextMatchesLoc(final VariantContext vc, final GenomeLoc loc) {
        return loc == null || loc.getStart() == vc.getStart();
    }

    static private AlleleMapper resolveIncompatibleAlleles(final Allele refAllele, final VariantContext vc,
            final LinkedHashSet<Allele> allAlleles) {
        if (refAllele.equals(vc.getReference()))
            return new AlleleMapper(vc);
        else {
            final Map<Allele, Allele> map = createAlleleMapping(refAllele, vc, allAlleles);
            map.put(vc.getReference(), refAllele);
            return new AlleleMapper(map);
        }
    }

    //TODO as part of a larger refactoring effort {@link #createAlleleMapping} can be merged with {@link ReferenceConfidenceVariantContextMerger#remapAlleles}.
    /**
     * Create an allele mapping for the given context where its reference allele must (potentially) be extended to the given allele
     *
     * The refAllele is the longest reference allele seen at this start site.
     * So imagine it is:
     * refAllele: ACGTGA
     * myRef:     ACGT
     * myAlt:     A
     *
     * We need to remap all of the alleles in vc to include the extra GA so that
     * myRef => refAllele and myAlt => AGA
     *
     * @param refAllele          the new (extended) reference allele
     * @param oneVC              the Variant Context to extend
     * @param currentAlleles     the list of alleles already created
     * @return a non-null mapping of original alleles to new (extended) ones
     */
    protected static Map<Allele, Allele> createAlleleMapping(final Allele refAllele, final VariantContext oneVC,
            final Collection<Allele> currentAlleles) {
        final Allele myRef = oneVC.getReference();
        if (refAllele.length() <= myRef.length())
            throw new IllegalStateException("BUG: myRef=" + myRef + " is longer than refAllele=" + refAllele);

        final byte[] extraBases = Arrays.copyOfRange(refAllele.getBases(), myRef.length(), refAllele.length());

        final Map<Allele, Allele> map = new HashMap<>();
        for (final Allele a : oneVC.getAlternateAlleles()) {
            if (isUsableAlternateAllele(a)) {
                Allele extended = Allele.extend(a, extraBases);
                for (final Allele b : currentAlleles)
                    if (extended.equals(b))
                        extended = b;
                map.put(a, extended);
            }
            // as long as it's not a reference allele then we want to add it as is (this covers e.g. symbolic and spanning deletion alleles)
            else if (!a.isReference()) {
                map.put(a, a);
            }
        }

        return map;
    }

    static private boolean isUsableAlternateAllele(final Allele allele) {
        return !(allele.isReference() || allele.isSymbolic() || allele == Allele.SPAN_DEL);
    }

    public static List<VariantContext> sortVariantContextsByPriority(Collection<VariantContext> unsortedVCs,
            List<String> priorityListOfVCs, GenotypeMergeType mergeOption) {
        if (mergeOption == GenotypeMergeType.PRIORITIZE && priorityListOfVCs == null)
            throw new IllegalArgumentException("Cannot merge calls by priority with a null priority list");

        if (priorityListOfVCs == null || mergeOption == GenotypeMergeType.UNSORTED)
            return new ArrayList<>(unsortedVCs);
        else {
            ArrayList<VariantContext> sorted = new ArrayList<>(unsortedVCs);
            Collections.sort(sorted, new CompareByPriority(priorityListOfVCs));
            return sorted;
        }
    }

    private static void mergeGenotypes(GenotypesContext mergedGenotypes, VariantContext oneVC,
            AlleleMapper alleleMapping, boolean uniquifySamples) {
        //TODO: should we add a check for cases when the genotypeMergeOption is REQUIRE_UNIQUE
        for (final Genotype g : oneVC.getGenotypes()) {
            final String name = mergedSampleName(oneVC.getSource(), g.getSampleName(), uniquifySamples);
            if (!mergedGenotypes.containsSample(name)) {
                // only add if the name is new
                Genotype newG = g;

                if (uniquifySamples || alleleMapping.needsRemapping()) {
                    final List<Allele> alleles = alleleMapping.needsRemapping()
                            ? alleleMapping.remap(g.getAlleles())
                            : g.getAlleles();
                    newG = new GenotypeBuilder(g).name(name).alleles(alleles).make();
                }

                mergedGenotypes.add(newG);
            }
        }
    }

    /**
     * Cached NO_CALL immutable lists where the position ith contains the list with i elements.
     */
    private static List<Allele>[] NOCALL_LISTS = new List[] { Collections.emptyList(),
            Collections.singletonList(Allele.NO_CALL), Collections.nCopies(2, Allele.NO_CALL) };

    /**
     * Synchronized code to ensure that {@link #NOCALL_LISTS} has enough entries beyod the requested ploidy
     * @param capacity the requested ploidy.
     */
    private static synchronized void ensureNoCallListsCapacity(final int capacity) {
        final int currentCapacity = NOCALL_LISTS.length - 1;
        if (currentCapacity >= capacity)
            return;
        NOCALL_LISTS = Arrays.copyOf(NOCALL_LISTS, Math.max(capacity, currentCapacity << 1) + 1);
        for (int i = currentCapacity + 1; i < NOCALL_LISTS.length; i++)
            NOCALL_LISTS[i] = Collections.nCopies(i, Allele.NO_CALL);
    }

    /**
     * Returns a {@link Allele#NO_CALL NO_CALL} allele list provided the ploidy.
     *
     * @param ploidy the required ploidy.
     *
     * @return never {@code null}, but an empty list if {@code ploidy} is equal or less than 0. The returned list
     *   might or might not be mutable.
     */
    public static List<Allele> noCallAlleles(final int ploidy) {
        if (NOCALL_LISTS.length <= ploidy)
            ensureNoCallListsCapacity(ploidy);
        return NOCALL_LISTS[ploidy];
    }

    /**
     * This is just a safe wrapper around GenotypeLikelihoods.calculatePLindex()
     *
     * @param originalIndex1   the index of the first allele
     * @param originalIndex2   the index of the second allele
     * @return the PL index
     */
    protected static int calculatePLindexFromUnorderedIndexes(final int originalIndex1, final int originalIndex2) {
        // we need to make sure they are ordered correctly
        return (originalIndex2 < originalIndex1)
                ? GenotypeLikelihoods.calculatePLindex(originalIndex2, originalIndex1)
                : GenotypeLikelihoods.calculatePLindex(originalIndex1, originalIndex2);
    }

    public static String mergedSampleName(String trackName, String sampleName, boolean uniquify) {
        return uniquify ? sampleName + "." + trackName : sampleName;
    }

    /**
     * Trim the alleles in inputVC from the reverse direction
     *
     * @param inputVC a non-null input VC whose alleles might need a haircut
     * @return a non-null VariantContext (may be == to inputVC) with alleles trimmed up
     */
    public static VariantContext reverseTrimAlleles(final VariantContext inputVC) {
        return trimAlleles(inputVC, false, true);
    }

    /**
     * Trim the alleles in inputVC from the forward direction
     *
     * @param inputVC a non-null input VC whose alleles might need a haircut
     * @return a non-null VariantContext (may be == to inputVC) with alleles trimmed up
     */
    public static VariantContext forwardTrimAlleles(final VariantContext inputVC) {
        return trimAlleles(inputVC, true, false);
    }

    /**
     * Trim the alleles in inputVC forward and reverse, as requested
     *
     * @param inputVC a non-null input VC whose alleles might need a haircut
     * @param trimForward should we trim up the alleles from the forward direction?
     * @param trimReverse should we trim up the alleles from the reverse direction?
     * @return a non-null VariantContext (may be == to inputVC) with trimmed up alleles
     */
    @Ensures("result != null")
    public static VariantContext trimAlleles(final VariantContext inputVC, final boolean trimForward,
            final boolean trimReverse) {
        if (inputVC == null)
            throw new IllegalArgumentException("inputVC cannot be null");

        if (inputVC.getNAlleles() <= 1 || inputVC.isSNP())
            return inputVC;

        // see whether we need to trim common reference base from all alleles
        final int revTrim = trimReverse
                ? computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes())
                : 0;
        final VariantContext revTrimVC = trimAlleles(inputVC, -1, revTrim);
        final int fwdTrim = trimForward ? computeForwardClipping(revTrimVC.getAlleles()) : -1;
        final VariantContext vc = trimAlleles(revTrimVC, fwdTrim, 0);
        return vc;
    }

    /**
     * Trim up alleles in inputVC, cutting out all bases up to fwdTrimEnd inclusive and
     * the last revTrim bases from the end
     *
     * @param inputVC a non-null input VC
     * @param fwdTrimEnd bases up to this index (can be -1) will be removed from the start of all alleles
     * @param revTrim the last revTrim bases of each allele will be clipped off as well
     * @return a non-null VariantContext (may be == to inputVC) with trimmed up alleles
     */
    @Requires({ "inputVC != null" })
    @Ensures("result != null")
    protected static VariantContext trimAlleles(final VariantContext inputVC, final int fwdTrimEnd,
            final int revTrim) {
        if (fwdTrimEnd == -1 && revTrim == 0) // nothing to do, so just return inputVC unmodified
            return inputVC;

        final List<Allele> alleles = new LinkedList<>();
        final Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<>();

        for (final Allele a : inputVC.getAlleles()) {
            if (a.isSymbolic()) {
                alleles.add(a);
                originalToTrimmedAlleleMap.put(a, a);
            } else {
                // get bases for current allele and create a new one with trimmed bases
                final byte[] newBases = Arrays.copyOfRange(a.getBases(), fwdTrimEnd + 1, a.length() - revTrim);
                final Allele trimmedAllele = Allele.create(newBases, a.isReference());
                alleles.add(trimmedAllele);
                originalToTrimmedAlleleMap.put(a, trimmedAllele);
            }
        }

        // now we can recreate new genotypes with trimmed alleles
        final AlleleMapper alleleMapper = new AlleleMapper(originalToTrimmedAlleleMap);
        final GenotypesContext genotypes = updateGenotypesWithMappedAlleles(inputVC.getGenotypes(), alleleMapper);

        final int start = inputVC.getStart() + (fwdTrimEnd + 1);
        final VariantContextBuilder builder = new VariantContextBuilder(inputVC);
        builder.start(start);
        builder.stop(start + alleles.get(0).length() - 1);
        builder.alleles(alleles);
        builder.genotypes(genotypes);
        return builder.make();
    }

    @Requires("originalGenotypes != null && alleleMapper != null")
    protected static GenotypesContext updateGenotypesWithMappedAlleles(final GenotypesContext originalGenotypes,
            final AlleleMapper alleleMapper) {
        final GenotypesContext updatedGenotypes = GenotypesContext.create(originalGenotypes.size());

        for (final Genotype genotype : originalGenotypes) {
            final List<Allele> updatedAlleles = alleleMapper.remap(genotype.getAlleles());
            updatedGenotypes.add(new GenotypeBuilder(genotype).alleles(updatedAlleles).make());
        }

        return updatedGenotypes;
    }

    public static int computeReverseClipping(final List<Allele> unclippedAlleles, final byte[] ref) {
        int clipping = 0;
        boolean stillClipping = true;

        while (stillClipping) {
            for (final Allele a : unclippedAlleles) {
                if (a.isSymbolic())
                    continue;

                // we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong
                // position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine).
                if (a.length() - clipping == 0)
                    return clipping - 1;

                if (a.length() - clipping <= 0 || a.length() == 0) {
                    stillClipping = false;
                } else if (ref.length == clipping) {
                    return -1;
                } else if (a.getBases()[a.length() - clipping - 1] != ref[ref.length - clipping - 1]) {
                    stillClipping = false;
                }
            }
            if (stillClipping)
                clipping++;
        }

        return clipping;
    }

    /**
     * Clip out any unnecessary bases off the front of the alleles
     *
     * The VCF spec represents alleles as block substitutions, replacing AC with A for a
     * 1 bp deletion of the C.  However, it's possible that we'd end up with alleles that
     * contain extra bases on the left, such as GAC/GA to represent the same 1 bp deletion.
     * This routine finds an offset among all alleles that can be safely trimmed
     * off the left of each allele and still represent the same block substitution.
     *
     * A/C => A/C
     * AC/A => AC/A
     * ACC/AC => CC/C
     * AGT/CAT => AGT/CAT
     * <DEL>/C => <DEL>/C
     *
     * @param unclippedAlleles a non-null list of alleles that we want to clip
     * @return the offset into the alleles where we can safely clip, inclusive, or
     *   -1 if no clipping is tolerated.  So, if the result is 0, then we can remove
     *   the first base of every allele.  If the result is 1, we can remove the
     *   second base.
     */
    public static int computeForwardClipping(final List<Allele> unclippedAlleles) {
        // cannot clip unless there's at least 1 alt allele
        if (unclippedAlleles.size() <= 1)
            return -1;

        // we cannot forward clip any set of alleles containing a symbolic allele
        int minAlleleLength = Integer.MAX_VALUE;
        for (final Allele a : unclippedAlleles) {
            if (a.isSymbolic())
                return -1;
            minAlleleLength = Math.min(minAlleleLength, a.length());
        }

        final byte[] firstAlleleBases = unclippedAlleles.get(0).getBases();
        int indexOflastSharedBase = -1;

        // the -1 to the stop is that we can never clip off the right most base
        for (int i = 0; i < minAlleleLength - 1; i++) {
            final byte base = firstAlleleBases[i];

            for (final Allele allele : unclippedAlleles) {
                if (allele.getBases()[i] != base)
                    return indexOflastSharedBase;
            }

            indexOflastSharedBase = i;
        }

        return indexOflastSharedBase;
    }

    public static double computeHardyWeinbergPvalue(VariantContext vc) {
        if (vc.getCalledChrCount() == 0)
            return 0.0;
        return HardyWeinbergCalculation.hwCalculate(vc.getHomRefCount(), vc.getHetCount(), vc.getHomVarCount());
    }

    public static boolean requiresPaddingBase(final List<String> alleles) {

        // see whether one of the alleles would be null if trimmed through

        for (final String allele : alleles) {
            if (allele.isEmpty())
                return true;
        }

        int clipping = 0;
        Character currentBase = null;

        while (true) {
            for (final String allele : alleles) {
                if (allele.length() - clipping == 0)
                    return true;

                char myBase = allele.charAt(clipping);
                if (currentBase == null)
                    currentBase = myBase;
                else if (currentBase != myBase)
                    return false;
            }

            clipping++;
            currentBase = null;
        }
    }

    private final static Map<String, Object> subsetAttributes(final CommonInfo igc,
            final Collection<String> keysToPreserve) {
        Map<String, Object> attributes = new HashMap<>(keysToPreserve.size());
        for (final String key : keysToPreserve) {
            if (igc.hasAttribute(key))
                attributes.put(key, igc.getAttribute(key));
        }
        return attributes;
    }

    /**
     * @deprecated use variant context builder version instead
     * @param vc                  the variant context
     * @param keysToPreserve      the keys to preserve
     * @return a pruned version of the original variant context
     */
    @Deprecated
    public static VariantContext pruneVariantContext(final VariantContext vc, Collection<String> keysToPreserve) {
        return pruneVariantContext(new VariantContextBuilder(vc), keysToPreserve).make();
    }

    public static VariantContextBuilder pruneVariantContext(final VariantContextBuilder builder,
            Collection<String> keysToPreserve) {
        final VariantContext vc = builder.make();
        if (keysToPreserve == null)
            keysToPreserve = Collections.emptyList();

        // VC info
        final Map<String, Object> attributes = subsetAttributes(vc.getCommonInfo(), keysToPreserve);

        // Genotypes
        final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples());
        for (final Genotype g : vc.getGenotypes()) {
            final GenotypeBuilder gb = new GenotypeBuilder(g);
            // remove AD, DP, PL, and all extended attributes, keeping just GT and GQ
            gb.noAD().noDP().noPL().noAttributes();
            genotypes.add(gb.make());
        }

        return builder.genotypes(genotypes).attributes(attributes);
    }

    public static boolean allelesAreSubset(VariantContext vc1, VariantContext vc2) {
        // if all alleles of vc1 are a contained in alleles of vc2, return true
        if (!vc1.getReference().equals(vc2.getReference()))
            return false;

        for (final Allele a : vc1.getAlternateAlleles()) {
            if (!vc2.getAlternateAlleles().contains(a))
                return false;
        }

        return true;
    }

    public static Map<VariantContext.Type, List<VariantContext>> separateVariantContextsByType(
            final Collection<VariantContext> VCs) {
        if (VCs == null) {
            throw new IllegalArgumentException("VCs cannot be null.");
        }

        final HashMap<VariantContext.Type, List<VariantContext>> mappedVCs = new HashMap<>();
        for (final VariantContext vc : VCs) {
            VariantContext.Type vcType = vc.getType();

            // look at previous variant contexts of different type. If:
            // a) otherVC has alleles which are subset of vc, remove otherVC from its list and add otherVC to vc's list
            // b) vc has alleles which are subset of otherVC. Then, add vc to otherVC's type list (rather, do nothing since vc will be added automatically to its list)
            // c) neither: do nothing, just add vc to its own list
            boolean addtoOwnList = true;
            for (final VariantContext.Type type : VariantContext.Type.values()) {
                if (type.equals(vcType))
                    continue;

                if (!mappedVCs.containsKey(type))
                    continue;

                List<VariantContext> vcList = mappedVCs.get(type);
                for (int k = 0; k < vcList.size(); k++) {
                    VariantContext otherVC = vcList.get(k);
                    if (allelesAreSubset(otherVC, vc)) {
                        // otherVC has a type different than vc and its alleles are a subset of vc: remove otherVC from its list and add it to vc's type list
                        vcList.remove(k);
                        // avoid having empty lists
                        if (vcList.isEmpty())
                            mappedVCs.remove(type);
                        if (!mappedVCs.containsKey(vcType))
                            mappedVCs.put(vcType, new ArrayList<VariantContext>());
                        mappedVCs.get(vcType).add(otherVC);
                        break;
                    } else if (allelesAreSubset(vc, otherVC)) {
                        // vc has a type different than otherVC and its alleles are a subset of VC: add vc to otherVC's type list and don't add to its own
                        mappedVCs.get(type).add(vc);
                        addtoOwnList = false;
                        break;
                    }
                }
            }
            if (addtoOwnList) {
                if (!mappedVCs.containsKey(vcType))
                    mappedVCs.put(vcType, new ArrayList<VariantContext>());
                mappedVCs.get(vcType).add(vc);
            }
        }

        return mappedVCs;
    }

    public static VariantContext purgeUnallowedGenotypeAttributes(VariantContext vc,
            Set<String> allowedAttributes) {
        if (allowedAttributes == null)
            return vc;

        final GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples());
        for (final Genotype genotype : vc.getGenotypes()) {
            final Map<String, Object> attrs = new HashMap<>();
            for (final Map.Entry<String, Object> attr : genotype.getExtendedAttributes().entrySet()) {
                if (allowedAttributes.contains(attr.getKey()))
                    attrs.put(attr.getKey(), attr.getValue());
            }
            newGenotypes.add(new GenotypeBuilder(genotype).attributes(attrs).make());
        }

        return new VariantContextBuilder(vc).genotypes(newGenotypes).make();
    }

    protected static class AlleleMapper {
        private VariantContext vc = null;
        private Map<Allele, Allele> map = null;

        public AlleleMapper(VariantContext vc) {
            this.vc = vc;
        }

        public AlleleMapper(Map<Allele, Allele> map) {
            this.map = map;
        }

        public boolean needsRemapping() {
            return this.map != null;
        }

        public Collection<Allele> values() {
            return map != null ? map.values() : vc.getAlleles();
        }

        public Allele remap(Allele a) {
            return map != null && map.containsKey(a) ? map.get(a) : a;
        }

        public List<Allele> remap(List<Allele> as) {
            List<Allele> newAs = new ArrayList<>();
            for (final Allele a : as) {
                //System.out.printf("  Remapping %s => %s%n", a, remap(a));
                newAs.add(remap(a));
            }
            return newAs;
        }

        /**
         * @return the list of unique values
         */
        public List<Allele> getUniqueMappedAlleles() {
            if (map == null)
                return Collections.emptyList();
            return new ArrayList<>(new HashSet<>(map.values()));
        }
    }

    private static class CompareByPriority implements Comparator<VariantContext>, Serializable {
        List<String> priorityListOfVCs;

        public CompareByPriority(List<String> priorityListOfVCs) {
            this.priorityListOfVCs = priorityListOfVCs;
        }

        private int getIndex(VariantContext vc) {
            int i = priorityListOfVCs.indexOf(vc.getSource());
            if (i == -1)
                throw new IllegalArgumentException("Priority list " + priorityListOfVCs
                        + " doesn't contain variant context " + vc.getSource());
            return i;
        }

        public int compare(VariantContext vc1, VariantContext vc2) {
            return Integer.valueOf(getIndex(vc1)).compareTo(getIndex(vc2));
        }
    }

    /**
     * For testing purposes only.  Create a site-only VariantContext at contig:start containing alleles
     *
     * @param name the name of the VC
     * @param contig the contig for the VC
     * @param start the start of the VC
     * @param alleleStrings a non-null, non-empty list of strings for the alleles.  The first will be the ref allele, and others the
     *                      alt.  Will compute the stop of the VC from the length of the reference allele
     * @return a non-null VariantContext
     */
    public static VariantContext makeFromAlleles(final String name, final String contig, final int start,
            final List<String> alleleStrings) {
        if (alleleStrings == null || alleleStrings.isEmpty())
            throw new IllegalArgumentException("alleleStrings must be non-empty, non-null list");

        final List<Allele> alleles = new LinkedList<>();
        final int length = alleleStrings.get(0).length();

        boolean first = true;
        for (final String alleleString : alleleStrings) {
            alleles.add(Allele.create(alleleString, first));
            first = false;
        }
        return new VariantContextBuilder(name, contig, start, start + length - 1, alleles).make();
    }

    /**
     * Splits the alleles for the provided variant context into its primitive parts.
     * Requires that the input VC be bi-allelic, so calling methods should first call splitVariantContextToBiallelics() if needed.
     * Currently works only for MNPs.
     *
     * @param vc  the non-null VC to split
     * @return a non-empty list of VCs split into primitive parts or the original VC otherwise
     */
    public static List<VariantContext> splitIntoPrimitiveAlleles(final VariantContext vc) {
        if (vc == null)
            throw new IllegalArgumentException("Trying to break a null Variant Context into primitive parts");

        if (!vc.isBiallelic())
            throw new IllegalArgumentException(
                    "Trying to break a multi-allelic Variant Context into primitive parts");

        // currently only works for MNPs
        if (!vc.isMNP())
            return Arrays.asList(vc);

        final byte[] ref = vc.getReference().getBases();
        final byte[] alt = vc.getAlternateAllele(0).getBases();

        if (ref.length != alt.length)
            throw new IllegalStateException("ref and alt alleles for MNP have different lengths");

        final List<VariantContext> result = new ArrayList<>(ref.length);

        for (int i = 0; i < ref.length; i++) {

            // if the ref and alt bases are different at a given position, create a new SNP record (otherwise do nothing)
            if (ref[i] != alt[i]) {

                // create the ref and alt SNP alleles
                final Allele newRefAllele = Allele.create(ref[i], true);
                final Allele newAltAllele = Allele.create(alt[i], false);

                // create a new VariantContext with the new SNP alleles
                final VariantContextBuilder newVC = new VariantContextBuilder(vc).start(vc.getStart() + i)
                        .stop(vc.getStart() + i).alleles(Arrays.asList(newRefAllele, newAltAllele));

                // create new genotypes with updated alleles
                final Map<Allele, Allele> alleleMap = new HashMap<>();
                alleleMap.put(vc.getReference(), newRefAllele);
                alleleMap.put(vc.getAlternateAllele(0), newAltAllele);
                final GenotypesContext newGenotypes = updateGenotypesWithMappedAlleles(vc.getGenotypes(),
                        new AlleleMapper(alleleMap));

                result.add(newVC.genotypes(newGenotypes).make());
            }
        }

        if (result.isEmpty())
            result.add(vc);

        return result;
    }

    /**
     * Are vc1 and 2 equal including their position and alleles?
     * @param vc1 non-null VariantContext
     * @param vc2 non-null VariantContext
     * @return true if vc1 and vc2 are equal, false otherwise
     */
    public static boolean equalSites(final VariantContext vc1, final VariantContext vc2) {
        if (vc1 == null)
            throw new IllegalArgumentException("vc1 cannot be null");
        if (vc2 == null)
            throw new IllegalArgumentException("vc2 cannot be null");

        if (vc1.getStart() != vc2.getStart())
            return false;
        if (vc1.getEnd() != vc2.getEnd())
            return false;
        if (!vc1.getChr().equals(vc2.getChr()))
            return false;
        if (!vc1.getAlleles().equals(vc2.getAlleles()))
            return false;
        return true;
    }

    /**
     * Returns the absolute 0-based index of an allele.
     *
     * <p/>
     * If the allele is equal to the reference, the result is 0, if it equal to the first alternative the result is 1
     * and so forth.
     * <p/>
     * Therefore if you want the 0-based index within the alternative alleles you need to do the following:
     *
     * <p/>
     * You can indicate whether the Java object reference comparator {@code ==} can be safelly used by setting {@code useEquals} to {@code false}.
     *
     * @param vc the target variant context.
     * @param allele the target allele.
     * @param ignoreRefState whether the reference states of the allele is important at all. Has no effect if {@code useEquals} is {@code false}.
     * @param considerRefAllele whether the reference allele should be considered. You should set it to {@code false} if you are only interested in alternative alleles.
     * @param useEquals whether equal method should be used in the search: {@link Allele#equals(Allele,boolean)}.
     *
     * @throws IllegalArgumentException if {@code allele} is {@code null}.
     * @return {@code -1} if there is no such allele that satify those criteria, a value between 0 and {@link VariantContext#getNAlleles()} {@code -1} otherwise.
     */
    public static int indexOfAllele(final VariantContext vc, final Allele allele, final boolean ignoreRefState,
            final boolean considerRefAllele, final boolean useEquals) {
        if (allele == null)
            throw new IllegalArgumentException();
        return useEquals ? indexOfEqualAllele(vc, allele, ignoreRefState, considerRefAllele)
                : indexOfSameAllele(vc, allele, considerRefAllele);
    }

    /**
     * Returns the relative 0-based index of an alternative allele.
     * <p/>
     * The the query allele is the same as the first alternative allele, the result is 0,
     * if it is equal to the second 1 and so forth.
     *
     *
     * <p/>
     * Notice that the ref-status of the query {@code allele} is ignored.
     *
     * @param vc the target variant context.
     * @param allele the query allele.
     * @param useEquals  whether equal method should be used in the search: {@link Allele#equals(Allele,boolean)}.
     *
     * @throws IllegalArgumentException if {@code allele} is {@code null}.
     *
     * @return {@code -1} if there is no such allele that satify those criteria, a value between 0 and the number
     *  of alternative alleles - 1.
     */
    public static int indexOfAltAllele(final VariantContext vc, final Allele allele, final boolean useEquals) {
        final int absoluteIndex = indexOfAllele(vc, allele, true, false, useEquals);
        return absoluteIndex == -1 ? -1 : absoluteIndex - 1;
    }

    // Impements index search using equals.
    private static int indexOfEqualAllele(final VariantContext vc, final Allele allele,
            final boolean ignoreRefState, final boolean considerRefAllele) {
        int i = 0;
        for (final Allele a : vc.getAlleles())
            if (a.equals(allele, ignoreRefState))
                return i == 0 ? (considerRefAllele ? 0 : -1) : i;
            else
                i++;
        return -1;
    }

    // Implements index search using ==.
    private static int indexOfSameAllele(final VariantContext vc, final Allele allele,
            final boolean considerRefAllele) {
        int i = 0;

        for (final Allele a : vc.getAlleles())
            if (a == allele)
                return i == 0 ? (considerRefAllele ? 0 : -1) : i;
            else
                i++;

        return -1;
    }

    /**
     * Add the VCF INFO field annotations for the used alleles
     *
     * @param vc                original variant context
     * @param builder           variant context builder with subset of original variant context's alleles
     * @param altAllele         alternate allele
     * @param keepOriginalChrCounts keep the orignal chromosome counts before subsetting
     * @return variant context builder with updated INFO field attribute values
     */
    private static void addInfoFiledAnnotations(final VariantContext vc, final VariantContextBuilder builder,
            final Allele altAllele, final boolean keepOriginalChrCounts) {

        if (vc == null)
            throw new IllegalArgumentException("the variant context cannot be null");
        if (builder == null)
            throw new IllegalArgumentException("the variant context builder cannot be null");
        if (builder.getAlleles() == null)
            throw new IllegalArgumentException("the variant context builder alleles cannot be null");

        final List<Allele> alleles = builder.getAlleles();
        if (alleles.size() < 2)
            throw new IllegalArgumentException("the variant context builder must contain at least 2 alleles");

        // don't have to subset, the original vc has the same number and hence, the same alleles
        boolean keepOriginal = (vc.getAlleles().size() == builder.getAlleles().size());

        if (keepOriginalChrCounts) {
            if (vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY))
                builder.attribute(GATKVCFConstants.ORIGINAL_AC_KEY,
                        keepOriginal ? vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY)
                                : getAltAlleleInfoFieldValue(VCFConstants.ALLELE_COUNT_KEY, vc, altAllele));
            if (vc.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY))
                builder.attribute(GATKVCFConstants.ORIGINAL_AF_KEY,
                        keepOriginal ? vc.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY)
                                : getAltAlleleInfoFieldValue(VCFConstants.ALLELE_FREQUENCY_KEY, vc, altAllele));
            if (vc.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY)) {
                builder.attribute(GATKVCFConstants.ORIGINAL_AN_KEY,
                        vc.getAttribute(VCFConstants.ALLELE_NUMBER_KEY));
            }
        }

        VariantContextUtils.calculateChromosomeCounts(builder, true);
    }

    /**
     * Get the alternate allele INFO field value
     *
     * @param infoFieldName     VCF INFO field name
     * @param vc                variant context
     * @param altAllele         the alternate allele
     * @return alternate allele INFO field value
     * @throws ReviewedGATKException if the alternate allele is part of the variant context
     */
    private static Object getAltAlleleInfoFieldValue(final String infoFieldName, final VariantContext vc,
            final Allele altAllele) {

        //final String[] splitOriginalField = vc.getAttribute(infoFieldName).toString().split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR);
        final Object[] splitOriginalField = getVAttributeValues(vc.getAttribute(infoFieldName));

        // subset the field
        final boolean[] alleleIndexesToUse = getAlleleIndexBitset(vc, Arrays.asList(altAllele));

        // skip the first allele, which is the reference
        for (int i = 1; i < alleleIndexesToUse.length; i++) {
            if (alleleIndexesToUse[i] == true)
                return splitOriginalField[i - 1];
        }

        throw new ReviewedGATKException(
                "Alternate allele " + altAllele.toString() + " not in Variant Context " + vc.toString());
    }

    /**
     * Pulls out the appropriate values for the INFO field attribute
     *
     * @param attribute    INFO field attribute
     * @return tokenized attribute values
     */
    private static Object[] getVAttributeValues(final Object attribute) {

        if (attribute == null)
            throw new IllegalArgumentException("the attribute cannot be null");

        // break the original attributes into separate tokens
        final Object[] tokens;
        if (attribute.getClass().isArray())
            tokens = (Object[]) attribute;
        else if (List.class.isAssignableFrom(attribute.getClass()))
            tokens = ((List) attribute).toArray();
        else
            tokens = attribute.toString().split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR);

        return tokens;
    }

    /**
     * Increment the number of called alternate and reference plus alternate alleles for a genotype
     *
     * @param calledAltAlleles number of called alternate alleles for all genotypes
     * @param calledAlleles    number of called alleles for all genotypes
     * @param genotype         genotype
     * @return incremented called alleles
     * @throws IllegalArgumentException if calledAltAlleles or genotype are null
     */
    public static int incrementChromosomeCountsInfo(final Map<Allele, Integer> calledAltAlleles,
            final int calledAlleles, final Genotype genotype) {
        if (calledAltAlleles == null)
            throw new IllegalArgumentException("Called alternate alleles can not be null");
        if (genotype == null)
            throw new IllegalArgumentException("Genotype can not be null");

        int incrementedCalledAlleles = calledAlleles;
        if (genotype.isCalled()) {
            for (final Allele allele : genotype.getAlleles()) {
                incrementedCalledAlleles++;
                if (allele.isNonReference()) {
                    calledAltAlleles.put(allele, calledAltAlleles.get(allele) + 1);
                }
            }
        }

        return incrementedCalledAlleles;
    }

    /**
     * Update the variant context chromosome counts info fields (AC, AN, AF)
     *
     * @param calledAltAlleles  number of called alternate alleles for all genotypes
     * @param calledAlleles     number of called alleles for all genotypes
     * @param builder           builder for variant context
     * @throws IllegalArgumentException if calledAltAlleles or builder are null
     */
    public static void updateChromosomeCountsInfo(final Map<Allele, Integer> calledAltAlleles,
            final int calledAlleles, final VariantContextBuilder builder) {
        if (calledAltAlleles == null)
            throw new IllegalArgumentException("Called alternate alleles can not be null");
        if (builder == null)
            throw new IllegalArgumentException("Variant context builder can not be null");

        builder.attribute(VCFConstants.ALLELE_COUNT_KEY, calledAltAlleles.values().toArray())
                .attribute(VCFConstants.ALLELE_NUMBER_KEY, calledAlleles);
        // Add AF is there are called alleles
        if (calledAlleles != 0) {
            final Set<Double> alleleFrequency = new LinkedHashSet<Double>(calledAltAlleles.size());
            for (final Integer value : calledAltAlleles.values()) {
                alleleFrequency.add(value.doubleValue() / calledAlleles);
            }
            builder.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, alleleFrequency.toArray());
        }
    }
}