org.ut.biolab.medsavant.shared.vcf.VariantRecord.java Source code

Java tutorial

Introduction

Here is the source code for org.ut.biolab.medsavant.shared.vcf.VariantRecord.java

Source

/**
 * See the NOTICE file distributed with this work for additional information
 * regarding copyright ownership.
 *
 * This is free software; you can redistribute it and/or modify it under the
 * terms of the GNU Lesser General Public License as published by the Free
 * Software Foundation; either version 2.1 of the License, or (at your option)
 * any later version.
 *
 * This software is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
 * details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this software; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA, or see the FSF
 * site: http://www.fsf.org.
 */
package org.ut.biolab.medsavant.shared.vcf;

import java.io.Serializable;
import org.apache.commons.lang.NumberUtils;
import org.apache.commons.lang3.StringEscapeUtils;
import org.ut.biolab.medsavant.shared.util.MiscUtils;

/**
 *
 * @author mfiume, AndrewBrook
 */
public class VariantRecord implements Serializable {

    public static enum VariantType {

        //BND => complex rearrangement
        SNP, Insertion, Deletion, Various, Unknown, InDel, Complex, HomoRef;

        public static VariantType getVariantType(int type) {
            switch (type) {
            case 0:
                return SNP;
            case 1:
                return Insertion;
            case 2:
                return Deletion;
            case 3:
                return Various;
            case 4:
                return Unknown;
            case 5:
                return InDel;
            case 6:
                return Complex;
            case 7:
                return HomoRef;
            default:
                return Unknown;
            }
        }
    };

    public static enum Zygosity {

        HomoRef, HomoAlt, Hetero, HeteroTriallelic, Missing;

        public static Zygosity getZygosity(int zygosity) {
            switch (zygosity) {
            case 0:
                return HomoRef;
            case 1:
                return HomoAlt;
            case 2:
                return Hetero;
            case 3:
                return HeteroTriallelic;
            case 4:
                return Missing;
            default:
                return HomoRef;
            }
        }
    };

    private static final int FILE_INDEX_OF_CHROM = 0;
    private static final int FILE_INDEX_OF_POS = FILE_INDEX_OF_CHROM + 1;
    private static final int FILE_INDEX_OF_DBSNPID = FILE_INDEX_OF_POS + 1;
    private static final int FILE_INDEX_OF_REF = FILE_INDEX_OF_DBSNPID + 1;
    private static final int FILE_INDEX_OF_ALT = FILE_INDEX_OF_REF + 1;
    private static final int FILE_INDEX_OF_QUAL = FILE_INDEX_OF_ALT + 1;
    private static final int FILE_INDEX_OF_FILTER = FILE_INDEX_OF_QUAL + 1;
    private static final int FILE_INDEX_OF_INFO = FILE_INDEX_OF_FILTER + 1;
    public static final Class CLASS_OF_VARIANTID = Integer.class;
    public static final Class CLASS_OF_GENOMEID = Integer.class;
    public static final Class CLASS_OF_PIPELINEID = String.class;
    public static final Class CLASS_OF_DNAID = String.class;
    public static final Class CLASS_OF_CHROM = String.class;
    public static final Class CLASS_OF_POSITION = Long.class;
    public static final Class CLASS_OF_DBSNPID = String.class;
    public static final Class CLASS_OF_REF = String.class;
    public static final Class CLASS_OF_ALT = String.class;
    public static final Class CLASS_OF_QUAL = Float.class;
    public static final Class CLASS_OF_FILTER = String.class;
    public static final Class CLASS_OF_CUSTOMINFO = String.class;
    private int uploadID;
    private int fileID;
    private int variantID;
    private int genomeID;
    private int pipelineID;
    private String dnaID;
    private String chrom;
    private Long start_position;
    private Long end_position;
    private String dbSNPID;
    private String ref;
    private String alt;
    private int altNumber;
    private Float qual;
    private String filter;
    private VariantType type;
    private Zygosity zygosity;
    private String genotype;
    private String customInfo;
    private Object[] customFields;
    private String ancestralAllele;
    private Integer alleleCount;
    private Float alleleFrequency;
    private Integer numberOfAlleles;
    private Integer baseQuality;
    private String cigar;
    private Boolean dbSNPMembership;
    private Integer depthOfCoverage;
    // private Long endPosition;
    private Boolean hapmap2Membership;
    private Boolean hapmap3Membership;
    private Integer mappingQuality;
    private Integer numberOfZeroMQ;
    private Integer numberOfSamplesWithData;
    private Float strandBias;
    private Boolean isSomatic;
    private Boolean isValidated;
    private Boolean isInThousandGenomes;
    public static final String nullString = ".";

    /*
     * DO NOT USE THIS UNLESS YOU KNOW WHAT YOU'RE DOING
     * Useful for creating empty record in some circumstances
     */
    public VariantRecord() {
    }

    public VariantRecord(String[] line, long start, long end, String ref, String alt, int altNumber, VariantType vt)
            throws Exception {//, String[] infoKeys, Class[] infoClasses) {
        dnaID = null;
        chrom = (String) parse(CLASS_OF_CHROM, line[FILE_INDEX_OF_CHROM]);
        if (!chrom.toLowerCase().startsWith("chr")) {
            String s = MiscUtils.homogenizeSequence(chrom);
            chrom = "chr" + s;

            //Comment out chromosome check
            /*
            if (!s.equalsIgnoreCase("x") && !s.equalsIgnoreCase("y") && !s.equalsIgnoreCase("m")) {
                
            if (NumberUtils.isNumber(s)) {
                int x = Integer.parseInt(s);
                if (x < 1 || x > 22) {
                    throw new IllegalArgumentException("Invalid chromosome " + chrom) {
                        @Override
                        public Throwable fillInStackTrace() {
                            return this;
                        }
                    };
                }
            }else{
                throw new IllegalArgumentException("Invalid chromosome "+chrom){
                    @Override
                    public Throwable fillInStackTrace(){ return this; }
                };
            }
                
            }*/
        }

        dbSNPID = (String) parse(CLASS_OF_DBSNPID, line[FILE_INDEX_OF_DBSNPID]);

        qual = (Float) parse(CLASS_OF_QUAL, line[FILE_INDEX_OF_QUAL]);
        filter = (String) parse(CLASS_OF_FILTER, line[FILE_INDEX_OF_FILTER]);
        customInfo = (String) parse(CLASS_OF_CUSTOMINFO, line[FILE_INDEX_OF_INFO]);

        this.start_position = start;
        this.end_position = end;
        this.alt = alt;
        this.altNumber = altNumber;
        this.ref = ref;
        this.type = vt;

        extractInfo(customInfo);
    }

    public VariantRecord(int uploadID, int fileID, int variantID, int genomeID, int pipelineID, String dnaID,
            String chrom, long start_position, long end_position, String dbSNPID, String ref, String alt,
            int altNumber, float qual, String filter, String customInfo, Object[] customFields) {
        this.uploadID = uploadID;
        this.fileID = fileID;
        this.variantID = variantID;
        this.genomeID = genomeID;
        this.pipelineID = pipelineID;
        this.dnaID = dnaID;
        this.chrom = chrom;
        this.start_position = start_position;
        this.end_position = end_position;
        this.dbSNPID = dbSNPID;
        this.ref = ref;
        this.alt = alt;
        this.altNumber = altNumber;
        this.qual = qual;
        this.filter = filter;
        this.setCustomInfo(customInfo);
        this.customFields = customFields;
    }

    public VariantRecord(VariantRecord r) {
        this.setVariantID(r.getVariantID());
        this.setDnaID(r.getDnaID());
        this.setGenomeID(r.getGenomeID());
        this.setPipelineID(r.getPipelineID());
        this.setChrom(r.getChrom());
        //this.setPosition(r.getPosition());
        this.setStartPosition(r.getStartPosition());
        this.setEndPosition(r.getEndPosition());
        this.setDbSNPID(r.getDbSNPID());
        this.setRef(r.getRef());
        this.setAlt(r.getAlt());
        this.setAltNumber(r.getAltNumber());
        this.setQual(r.getQual());
        this.setFilter(r.getFilter());
        this.setCustomInfo(r.getCustomInfo());
        this.setCustomFields(r.getCustomFields());
        this.setType(r.getType());
    }

    public void setAltNumber(int a) {
        this.altNumber = a;
    }

    public int getAltNumber() {
        return this.altNumber;
    }

    private static Object parse(Class c, String value) {

        if (c == String.class) {
            if (value.equals(nullString)) {
                return "";
            }
            return value;
        }

        if (value.equals(nullString)) {
            return null;
        }

        if (c == Long.class) {
            try {
                return NumberUtils.isDigits(value) ? Long.parseLong(value) : null;
            } catch (Exception e) {
                return null;
            }
        }

        if (c == Float.class) {
            try {
                return NumberUtils.isNumber(value) ? Float.parseFloat(value) : null;
            } catch (Exception e) {
                return null;
            }
        }

        //if flag exists, set to true
        if (c == Boolean.class) {
            return true;
        }

        if (c == Integer.class) {
            try {
                return NumberUtils.isDigits(value) ? Integer.parseInt(value) : null;
            } catch (Exception e) {
                return null;
            }
        }

        throw new UnsupportedOperationException("Parser doesn't deal with objects of type " + c);
    }

    /*
    @Deprecated
    public VariantType getVariantType(String ref, String alt) {
    if (ref.startsWith("<") || alt.startsWith("<")) {
        if (alt.contains("<DEL>") || ref.contains("<DEL>")) {
            return VariantType.Deletion;
        } else if (alt.contains("<INS>") || ref.contains("<INS>")) {
            return VariantType.Insertion;
        } else {
            return VariantType.Unknown;
        }
    }
        
    VariantType result = null;
    for (String s : alt.split(",")) {
        if (ref == null || (s != null && s.length() > ref.length())) {
            result = variantTypeHelper(result, VariantType.Insertion);
        } else if (s == null || (ref != null && ref.length() > s.length())) {
            result = variantTypeHelper(result, VariantType.Deletion);
        } else {
            result = variantTypeHelper(result, VariantType.SNP);
        }
    }
    return result;
    }
    */
    private VariantType variantTypeHelper(VariantType currentType, VariantType newType) {
        if (currentType == null || currentType == newType) {
            return newType;
        } else {
            return VariantType.Various;
        }
    }

    public static Object[] parseInfo(String infoString, String[] infoKeys, Class[] infoClasses) {
        Object[] values = new Object[infoKeys.length];

        infoString = infoString.trim();
        String[] list = infoString.split(";");

        for (String element : list) {
            String name = element;
            String value = "";
            int equals = element.indexOf("=");
            if (equals != -1) {
                name = element.substring(0, equals);
                value = element.substring(equals + 1);
            }
            for (int i = 0; i < infoKeys.length; i++) {
                if (name.toLowerCase().equals(infoKeys[i].toLowerCase())) {
                    values[i] = parse(infoClasses[i], value);
                }
            }
        }

        return values;
    }

    public int getUploadID() {
        return uploadID;
    }

    public void setUploadIDID(int uploadID) {
        this.uploadID = uploadID;
    }

    public int getFileID() {
        return fileID;
    }

    public void setFileID(int fileID) {
        this.fileID = fileID;
    }

    public int getVariantID() {
        return variantID;
    }

    public void setVariantID(int variantID) {
        this.variantID = variantID;
    }

    public int getGenomeID() {
        return genomeID;
    }

    public void setGenomeID(int genomeID) {
        this.genomeID = genomeID;
    }

    public int getPipelineID() {
        return pipelineID;
    }

    public void setPipelineID(int pipelineID) {
        this.pipelineID = pipelineID;
    }

    public String getAlt() {
        return alt;
    }

    public void setAlt(String alt) {
        this.alt = alt;
    }

    public String getChrom() {
        return chrom;
    }

    public void setChrom(String chrom) {
        this.chrom = chrom;
    }

    public String getDbSNPID() {
        return dbSNPID;
    }

    public void setDbSNPID(String id) {
        this.dbSNPID = id;
    }

    public String getFilter() {
        return filter;
    }

    public void setFilter(String filter) {
        this.filter = filter;
    }

    public Long getStartPosition() {
        return start_position;
    }

    public void setStartPosition(Long pos) {
        this.start_position = pos;
    }

    public Long getEndPosition() {
        return end_position;
    }

    public void setEndPosition(Long pos) {
        this.end_position = pos;
    }

    public Float getQual() {
        return qual;
    }

    public void setQual(Float qual) {
        this.qual = qual;
    }

    public String getRef() {
        return ref;
    }

    public void setRef(String ref) {
        this.ref = ref;
    }

    public String getDnaID() {
        return dnaID;
    }

    public void setDnaID(String dnaID) {
        this.dnaID = dnaID;
    }

    public String getCustomInfo() {
        return customInfo;
    }

    public void setCustomInfo(String customInfo) {
        this.customInfo = customInfo;
        extractInfo(customInfo);
    }

    public Object[] getCustomFields() {
        return customFields;
    }

    public VariantType getType() {
        return type;
    }

    public void setType(VariantType type) {
        this.type = type;
    }

    public Zygosity getZygosity() {
        return zygosity;
    }

    public void setZygosity(Zygosity zygosity) {
        this.zygosity = zygosity;
    }

    public String getGenotype() {
        return genotype;
    }

    public void setGenotype(String genotype) {
        this.genotype = genotype;
    }

    public void setCustomFields(Object[] customFields) {
        this.customFields = customFields;
    }

    public int compareTo(VariantRecord other) {
        return compareTo(other.getChrom(), other.getStartPosition(), other.getEndPosition());
    }

    public int compareTo(String chrom, long startpos, long endpos) {
        int chromCompare = compareChrom(this.getChrom(), chrom);
        if (chromCompare != 0) {
            return chromCompare;
        }

        int i1 = this.getStartPosition().compareTo(startpos);
        int i2 = this.getEndPosition().compareTo(endpos);
        if (i1 == 0 && i2 == 0) {
            return 0;
        }
        //sort by startpos, then endpos.
        if (i1 == 0) {
            return i2;
        } else {
            return i1;
        }

        //return this.getStartPosition().compareTo(startpos);// && this.getEndPosition().compareTo(endpos);
        //return this.getPosition().compareTo(pos);
    }

    public static int compareChrom(String chrom1, String chrom2) {
        chrom1 = chrom1.substring(3);
        chrom2 = chrom2.substring(3);
        try {
            if (NumberUtils.isNumber(chrom1) && NumberUtils.isNumber(chrom2)) {
                Integer a = Integer.parseInt(chrom1);
                Integer b = Integer.parseInt(chrom2);
                return a.compareTo(b);
            }

        } catch (NumberFormatException e) {
            //return chrom1.compareTo(chrom2);
        }
        return chrom1.compareTo(chrom2);
    }

    /*
     * CUSTOM INFO
     */
    public Integer getAlleleCount() {
        return alleleCount;
    }

    public Float getAlleleFrequency() {
        return alleleFrequency;
    }

    public String getAncestralAllele() {
        return ancestralAllele;
    }

    public Integer getBaseQuality() {
        return baseQuality;
    }

    public String getCigar() {
        return cigar;
    }

    public Boolean getDbSNPMembership() {
        return dbSNPMembership;
    }

    public Integer getDepthOfCoverage() {
        return depthOfCoverage;
    }

    public Boolean getHapmap2Membership() {
        return hapmap2Membership;
    }

    public Boolean getHapmap3Membership() {
        return hapmap3Membership;
    }

    public Boolean getIsInThousandGenomes() {
        return isInThousandGenomes;
    }

    public Boolean getIsSomatic() {
        return isSomatic;
    }

    public Boolean getIsValidated() {
        return isValidated;
    }

    public Integer getMappingQuality() {
        return mappingQuality;
    }

    public Integer getNumberOfAlleles() {
        return numberOfAlleles;
    }

    public Integer getNumberOfSamplesWithData() {
        return numberOfSamplesWithData;
    }

    public Integer getNumberOfZeroMQ() {
        return numberOfZeroMQ;
    }

    public Float getStrandBias() {
        return strandBias;
    }

    private Float extractFloatFromInfo(String key, String customInfo) {
        String val = extractValueFromInfo(key, customInfo);
        return (Float) VariantRecord.parse(Float.class, val);
    }

    private String extractStringFromInfo(String key, String customInfo) {
        String val = extractValueFromInfo(key, customInfo);
        return val;
    }

    private Integer extractIntegerFromInfo(String key, String customInfo) {
        String val = extractValueFromInfo(key, customInfo);
        return (Integer) VariantRecord.parse(Integer.class, val);
    }

    private Boolean extractBooleanFromInfo(String key, String customInfo) {
        Integer val = extractIntegerFromInfo(key, customInfo);
        if (val == null) {
            return null;
        }
        if (val == 1) {
            return true;
        } else {
            return false;
        }
    }

    private String extractValueFromInfo(String key, String customInfo) {

        String eKey = key + "=";
        int startIndex = customInfo.indexOf(eKey);
        if (startIndex == -1) {
            return "";
        }
        startIndex += eKey.length();
        String sub = customInfo.substring(startIndex);
        int endIndex = sub.indexOf(";");

        String result;
        if (endIndex == -1) {
            result = sub;
        } else {
            result = sub.substring(0, endIndex);
        }

        if (result.equals("")) {
            return null;
        } else {
            return result;
        }

    }

    private void extractInfo(String info) {
        this.ancestralAllele = extractStringFromInfo("AA", info);
        this.alleleCount = extractIntegerFromInfo("AC", info);
        this.alleleFrequency = extractFloatFromInfo("AF", info);
        this.numberOfAlleles = extractIntegerFromInfo("AN", info);
        this.baseQuality = extractIntegerFromInfo("BQ", info);
        this.cigar = extractStringFromInfo("CIGAR", info);
        this.dbSNPMembership = extractBooleanFromInfo("DB", info);
        this.depthOfCoverage = extractIntegerFromInfo("DP", info);
        //this.end_position = extractLongFromInfo("END", info); //We figure out END based on start, ref and alt.
        this.hapmap2Membership = extractBooleanFromInfo("H2", info);
        this.hapmap3Membership = extractBooleanFromInfo("H3", info);
        this.mappingQuality = extractIntegerFromInfo("MQ", info);
        this.numberOfZeroMQ = extractIntegerFromInfo("MQ0", info);
        this.numberOfSamplesWithData = extractIntegerFromInfo("NS", info);
        this.strandBias = extractFloatFromInfo("SB", info);
        this.isSomatic = extractBooleanFromInfo("SOMATIC", info);
        this.isValidated = extractBooleanFromInfo("VALIDATED", info);
        this.isInThousandGenomes = extractBooleanFromInfo("1000G", info);
    }

    /**
     * END CUSTOM INFO
     */
    @Override
    public String toString() {
        return "VariantRecord{" + "dnaID=" + dnaID + "chrom=" + chrom + "startpos=" + start_position + "endpos="
                + end_position + "id=" + dbSNPID + "ref=" + ref + "alt=" + alt + "qual=" + qual + "filter=" + filter
                + '}';
    }

    private static String delim = "\t";

    public String toTabString(int uploadId, int fileId, int variantId) {
        String s = "\"" + getString(uploadId) + "\"" + delim + "\"" + getString(fileId) + "\"" + delim + "\""
                + getString(variantId) + "\"" + delim + "\"" + getString(this.dnaID) + "\"" + delim + "\""
                + getString(this.chrom) + "\"" + delim + "\"" + getString(this.start_position) + "\"" + delim + "\""
                + getString(this.end_position) + "\"" + delim + "\"" + getString(this.dbSNPID) + "\"" + delim + "\""
                + getString(this.ref) + "\"" + delim + "\"" + getString(this.alt) + "\"" + delim + "\""
                + getString(this.altNumber) + "\"" + delim + "\"" + getString(this.qual) + "\"" + delim + "\""
                + getString(this.filter) + "\"" + delim + "\"" + getString(this.type) + "\"" + delim + "\""
                + getString(this.zygosity) + "\"" + delim + "\"" + getString(this.genotype) + "\"" + delim + "\""
                + getString(this.customInfo) + "\"";// + delim;       
        return s;
    }

    public static String createTabString(Object[] values) {
        String s = "";
        if (values.length == 0) {
            return s;
        }
        for (Object o : values) {
            s += "\"" + StringEscapeUtils.escapeJava(getString(o)) + "\"" + delim;
        }
        return s.substring(0, s.length() - 1);
    }

    private static String getString(Object value) {
        if (value == null) {
            return "";
        } else if (value instanceof Boolean) {
            if ((Boolean) value) {
                return "1";
            } else {
                return "0";
            }
        } else {
            return value.toString();
        }
    }

    public void setSampleInformation(String format, String info) {
        String formatted = "FORMAT=" + format + ";SAMPLE_INFO=" + info;
        String newCustomInfo;
        if (customInfo == null) {
            newCustomInfo = formatted;
        } else {
            newCustomInfo = customInfo + ";" + formatted;
        }
        setCustomInfo(newCustomInfo);
    }
}