MSUmpire.PSMDataStructure.ProtID.java Source code

Java tutorial

Introduction

Here is the source code for MSUmpire.PSMDataStructure.ProtID.java

Source

/* 
 * Author: Chih-Chiang Tsou <chihchiang.tsou@gmail.com>
 *             Nesvizhskii Lab, Department of Computational Medicine and Bioinformatics, 
 *             University of Michigan, Ann Arbor
 *
 * Copyright 2014 University of Michigan, Ann Arbor, MI
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package MSUmpire.PSMDataStructure;

import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.PriorityQueue;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.log4j.Logger;
import org.xmlpull.v1.XmlPullParserException;

/**
 * Protein ID data structure
 * @author Chih-Chiang Tsou <chihchiang.tsou@gmail.com>
 */
public class ProtID implements Serializable {

    private static final long serialVersionUID = 863236278237L;

    public transient boolean IDByDBSearch = false;
    public ArrayList<String> IndisProteins;
    public transient ArrayList<String> IndisProtDes;
    private String AccNo;
    public String UniProtID;
    public String ProteinGroup;
    public String Description;
    public String GeneName;
    public int ProteinLength;
    public float Mass;
    public float Probability;
    public float GroupProb;
    public float MaxLocalPW;
    public float MaxIniProb = 0f;
    public String Sequence;
    public HashMap<String, PepIonID> PeptideID;
    public HashMap<String, PepIonID> ProtPeptideID;
    public ArrayList<String> TheoPeptides = null;
    public ArrayList<String> ProtPepSeq = null;

    public void AddDisProteins(String proID) {
        if (!IndisProteins.contains(proID)) {
            IndisProteins.add(proID);
        }
    }

    public void SetDescription(String Des) {
        Description = Des;
        if (Description.contains("ID=") && Description.contains("GeneName")) {
            UniProtID = Description.substring(Description.indexOf("ID=") + 4, Description.indexOf("GeneName") - 2);
        }
        if (Description.contains("GeneName=") && Description.contains("OtherGeneNames")) {
            GeneName = Description
                    .substring(Description.indexOf("GeneName=") + 9, Description.indexOf("OtherGeneNames") - 2)
                    .trim();
        }
    }

    public ProtID CloneProtein() {
        ProtID newprotein = new ProtID();
        try {
            if (Sequence != null) {
                newprotein.SetSequence(Sequence);
            } else {
                Logger.getRootLogger().error("Sequence of protein:" + getAccNo() + " is null");
            }
        } catch (Exception ex) {
            Logger.getRootLogger().error(ExceptionUtils.getStackTrace(ex));
        }
        newprotein.Description = Description;
        newprotein.Mass = Mass;
        newprotein.setAccNo(getAccNo());
        newprotein.UniProtID = UniProtID;
        newprotein.IndisProteins.add(getAccNo());
        return newprotein;
    }

    public float GetAbundanceByFragment_IBAQ(int topNfragment) {
        float totalabundance = 0f;
        for (PepIonID peptide : PeptideID.values()) {
            totalabundance += peptide.GetPepAbundanceByTopFragments(topNfragment);
        }
        return totalabundance / TheoPeptides.size();
    }

    public float GetAbundanceByTopPepFrag(int toppep, int topfrag) {
        return GetAbundanceByTopPepFrag(toppep, topfrag, -1f);
    }

    public float GetAbundanceByTopPepFrag(int toppep, int topfrag, float pepweight) {
        if (PeptideID.isEmpty()) {
            return 0;
        }
        PriorityQueue<Float> TopQueue = new PriorityQueue<>(PeptideID.size(), Collections.reverseOrder());
        for (PepIonID peptide : PeptideID.values()) {
            if (peptide.FilteringWeight > pepweight) {
                TopQueue.add(peptide.GetPepAbundanceByTopFragments(topfrag));
            }
        }
        float totalabundance = 0f;
        int num = Math.min(toppep, TopQueue.size());

        for (int i = 0; i < num; i++) {
            totalabundance += TopQueue.poll();
        }
        return totalabundance / num;
    }

    public int GetSpectralCount() {
        int spc = 0;
        for (PepIonID pep : PeptideID.values()) {
            spc += pep.GetSpectralCount();
        }
        return spc;
    }

    public int GetFragCount() {
        int frag = 0;
        for (PepIonID pep : PeptideID.values()) {
            frag += pep.GetFragCount();
        }
        return frag;
    }

    public float GetAbundanceByMS1_IBAQ() {
        return GetAbundanceByMS1_IBAQ(-1f);
    }

    public float GetAbundanceByMS1_IBAQ(float pepweight) {
        float totalabundance = 0f;
        for (PepIonID peptide : PeptideID.values()) {
            if (peptide.PeakHeight != null && peptide.FilteringWeight > pepweight) {
                totalabundance += peptide.PeakHeight[0];
            }
        }
        return totalabundance / TheoPeptides.size();
    }

    public float GetAbundanceByMS1_TopN(int topN) {
        return GetAbundanceByMS1_TopN(topN, -1f);
    }

    public float GetAbundanceByMS1_TopN(int topN, float pepweight) {
        if (PeptideID.isEmpty()) {
            return 0;
        }
        PriorityQueue<Float> TopQueue = new PriorityQueue<>(PeptideID.size(), Collections.reverseOrder());
        for (PepIonID peptide : PeptideID.values()) {
            if (peptide.PeakHeight != null && peptide.FilteringWeight > pepweight) {
                TopQueue.add(peptide.PeakHeight[0]);
            }
        }

        float totalabundance = 0f;
        int num = Math.min(topN, TopQueue.size());

        for (int i = 0; i < num; i++) {
            totalabundance += TopQueue.poll();
        }
        return totalabundance / num;
    }

    public float GetAbundanceByTopCorrFragAcrossSample(ArrayList<String> ProPep,
            HashMap<String, ArrayList<String>> PepFrag) {
        float totalabundance = 0f;
        int count = 0;
        if (ProPep != null) {
            for (PepIonID pepIonID : PeptideID.values()) {
                if (ProPep.contains(pepIonID.GetKey())) {
                    totalabundance += pepIonID
                            .GetPepAbundanceByTopCorrFragAcrossSample(PepFrag.get(pepIonID.GetKey()));
                    count++;
                }
            }
        }
        return totalabundance;
    }

    public ProtID() {
        PeptideID = new HashMap<>();
        IndisProteins = new ArrayList<>();
        IndisProtDes = new ArrayList<>();
        ProtPeptideID = new HashMap<>();
        ProtPepSeq = new ArrayList<>();
    }

    public void AddPeptideID(PepIonID pepID) {
        if (!PeptideID.containsKey(pepID.GetKey())) {
            PeptideID.put(pepID.GetKey(), pepID);
        }
    }

    public void SetSequence(String Seq) throws IOException, XmlPullParserException {
        Sequence = Seq;
        InsilicosDigestion(1, 6, 30);
    }

    public void InsilicosDigestion(int missedcleave, int minlength, int maxlength)
            throws XmlPullParserException, IOException {
        TheoPeptides = EnzymeManager.GetInstance().GetTrypsin().digest(Sequence, missedcleave, minlength,
                maxlength);
        if (String.valueOf(Sequence.charAt(0)).equals("M")) {
            int mc = 0;
            for (int i = 1; i < Sequence.length(); i++) {
                if (String.valueOf(Sequence.charAt(i)).equals("K")
                        || String.valueOf(Sequence.charAt(i)).equals("R")) {
                    mc++;
                    if (mc > missedcleave) {
                        return;
                    }
                    String pep = Sequence.substring(1, i + 1);
                    if (pep.length() >= minlength && pep.length() <= maxlength && !TheoPeptides.contains(pep)) {
                        TheoPeptides.add(pep);
                    }
                }
            }
        }
    }

    public String GetGeneName() {
        if (Description.contains("GN=")) {
            return Description.split("GN=")[1].split(" ")[0];
        }
        return getAccNo();
    }

    public boolean IsDecoy(String decoytag) {
        if (getAccNo().startsWith(decoytag) | getAccNo().endsWith(decoytag)) {
            return true;
        }
        return false;
    }

    /**
     * @return the AccNo
     */
    public String getAccNo() {
        return AccNo;
    }

    /**
     * @param AccNo the AccNo to set
     */
    public void setAccNo(String AccNo) {
        this.AccNo = AccNo;
    }

    public void UpdateMaxIniProb() {
        MaxIniProb = 0f;
        for (PepIonID pepion : PeptideID.values()) {
            if (pepion.MaxProbability > MaxIniProb) {
                MaxIniProb = pepion.MaxProbability;
            }
        }
    }

    public void RemoveLowWeightPepID(float threshold) {
        ArrayList<PepIonID> removelist = new ArrayList<>();
        MaxIniProb = 0f;
        for (PepIonID pepion : PeptideID.values()) {
            if (pepion.FilteringWeight < threshold) {
                removelist.add(pepion);
            }
        }
        for (PepIonID pepIonID : removelist) {
            PeptideID.remove(pepIonID.GetKey());
        }
        for (PepIonID pepion : ProtPeptideID.values()) {
            if (pepion.FilteringWeight < threshold) {
                removelist.add(pepion);
            }
        }
        for (PepIonID pepIonID : removelist) {
            ProtPeptideID.remove(pepIonID.GetKey());
        }
    }

}