Java tutorial
/* * Author: Chih-Chiang Tsou <chihchiang.tsou@gmail.com> * Nesvizhskii Lab, Department of Computational Medicine and Bioinformatics, * University of Michigan, Ann Arbor * * Copyright 2014 University of Michigan, Ann Arbor, MI * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package MSUmpire.PSMDataStructure; import MSUmpire.BaseDataStructure.InstrumentParameter; import MSUmpire.BaseDataStructure.ScanCollection; import MSUmpire.BaseDataStructure.ScanData; import MSUmpire.BaseDataStructure.XYData; import MSUmpire.SeqUtility.FastaParser; import MSUmpire.PeakDataStructure.PeakCluster; import com.compomics.util.experiment.biology.AminoAcid; import com.compomics.util.experiment.biology.Ion; import com.compomics.util.experiment.biology.PTM; import com.compomics.util.experiment.biology.ions.ElementaryIon; import com.compomics.util.experiment.biology.ions.PeptideFragmentIon; import org.nustaq.serialization.FSTObjectInput; import org.nustaq.serialization.FSTObjectOutput; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.Serializable; import java.sql.SQLException; import java.util.ArrayList; import java.util.HashMap; import org.apache.commons.io.FilenameUtils; import org.apache.commons.lang.exception.ExceptionUtils; import org.apache.log4j.Logger; import org.xmlpull.v1.XmlPullParserException; /** * Identification data structure for a LC-MS run * @author Chih-Chiang Tsou <chihchiang.tsou@gmail.com> */ public class LCMSID implements Serializable { private static final long serialVersionUID = 25494643464616L; public HashMap<String, PSM> PSMList; public HashMap<String, PSM> LowScorePSMByPepKey; public transient HashMap<String, PepIonID> LowScorePep; public transient HashMap<String, PSM> LowScorePSM; public transient HashMap<Integer, ArrayList<ProtID>> ProteinGroups; private HashMap<String, PepIonID> PepIonList; private HashMap<Integer, PepIonID> PepIonIndexList; public HashMap<String, HashMap<String, PepIonID>> PeptideList; private HashMap<String, PepIonID> MappedPepIonList; private HashMap<Integer, PepIonID> MappedPepIonIndexList; public HashMap<String, HashMap<String, PepIonID>> MappedPeptideList; public HashMap<String, PepIonID> AssignedPepIonList; public HashMap<String, PepIonID> ProtXMLPepIonList; public HashMap<String, ProtID> ProteinList; public HashMap<String, ProtID> IndisProteinIDList; public HashMap<String, ModificationInfo> ModificationList; public HashMap<String, ProtID> PepXMLProteinList; public String DataBase; public String SearchEngine; public String msModel; public String msManufacturer; public String msIonization; public String msMassAnalyzer; public String msDetector; public String mzXMLFileName; public float FDR = Float.POSITIVE_INFINITY; public float ProteinFDR = Float.POSITIVE_INFINITY; public float ExpectThreshold = Float.POSITIVE_INFINITY; public float SpecProbThreshold = 0f; public float PepProbThreshold = 0f; public float ProteinProbThreshold = 0f; public String DecoyTag = "rev_"; public String FastaPath; private float NorFactor = 1f; private transient FastaParser fastaParser; public HashMap<String, String> LuciphorResult; public String Filename; private FastaParser GetFastaParser() { if (fastaParser == null) { fastaParser = new FastaParser(FastaPath); fastaParser.RemoveDecoy(DecoyTag); } return fastaParser; } public void WriteLCMSIDSerialization(String filepath) { WriteLCMSIDSerialization(filepath, ""); } public void WriteLCMSIDSerialization(String filepath, String tag) { if (!FSWrite(filepath, tag)) { Logger.getRootLogger().debug("Writing LCMSID FS failed. writing standard serialization instead"); } } public void RemoveLowWeightPep(float threshold) { for (ProtID protein : ProteinList.values()) { protein.RemoveLowWeightPepID(threshold); } } public LCMSID CreateEmptyLCMSID() { LCMSID newLcmsid = new LCMSID(mzXMLFileName, DecoyTag, FastaPath); newLcmsid.SearchEngine = SearchEngine; newLcmsid.msDetector = msDetector; newLcmsid.msIonization = msIonization; newLcmsid.msManufacturer = msManufacturer; newLcmsid.msMassAnalyzer = msMassAnalyzer; newLcmsid.msModel = msModel; newLcmsid.DataBase = DataBase; for (ModificationInfo mod : ModificationList.values()) { newLcmsid.ModificationList.put(mod.GetKey(), mod); } return newLcmsid; } public HashMap<String, LCMSID> GetLCMSIDFileMap() { HashMap<String, LCMSID> lcmsidmap = new HashMap<>(); for (PSM psm : this.PSMList.values()) { if (!lcmsidmap.containsKey(psm.GetRawNameString())) { LCMSID newLcmsid = CreateEmptyLCMSID(); newLcmsid.mzXMLFileName = psm.GetRawNameString(); lcmsidmap.put(psm.GetRawNameString(), newLcmsid); } lcmsidmap.get(psm.GetRawNameString()).AddPSM(psm); } return lcmsidmap; } private boolean FSWrite(String filepath, String tag) { try { if (!tag.equals("")) { tag = "_" + tag; } Logger.getRootLogger().info("Writing ID results to file:" + FilenameUtils.getFullPath(filepath) + FilenameUtils.getBaseName(filepath) + tag + "_LCMSID.serFS..."); FileOutputStream fout = new FileOutputStream(FilenameUtils.getFullPath(filepath) + FilenameUtils.getBaseName(filepath) + tag + "_LCMSID.serFS", false); FSTObjectOutput out = new FSTObjectOutput(fout); ReduceMemoryUsage(); out.writeObject(this, LCMSID.class); out.close(); fout.close(); } catch (Exception ex) { Logger.getRootLogger().error(ExceptionUtils.getStackTrace(ex)); return false; } return true; } private static LCMSID FS_Read(String filepath, String tag) throws Exception { if (!tag.equals("")) { tag = "_" + tag; } if (!new File( FilenameUtils.getFullPath(filepath) + FilenameUtils.getBaseName(filepath) + tag + "_LCMSID.serFS") .exists()) { return null; } try { Logger.getRootLogger().info("Reading ID results from file:" + FilenameUtils.getFullPath(filepath) + FilenameUtils.getBaseName(filepath) + tag + "_LCMSID.serFS..."); FileInputStream fileIn = new FileInputStream(FilenameUtils.getFullPath(filepath) + FilenameUtils.getBaseName(filepath) + tag + "_LCMSID.serFS"); FSTObjectInput in = new FSTObjectInput(fileIn); LCMSID lcmsid = (LCMSID) in.readObject(LCMSID.class); in.close(); fileIn.close(); return lcmsid; } catch (Exception ex) { Logger.getRootLogger().info("Reading LCMSID FS results failed."); Logger.getRootLogger().error(ExceptionUtils.getStackTrace(ex)); return null; } } public static LCMSID ReadLCMSIDSerialization(String filepath) throws Exception { return ReadLCMSIDSerialization(filepath, ""); } public static LCMSID ReadLCMSIDSerialization(String filepath, String tag) throws Exception { LCMSID lcmsid = FS_Read(filepath, tag); return lcmsid; } public float GetNorFactor() { if (NorFactor == 1f) { NorFactor = 0f; for (PepIonID pepIonID : GetPepIonList().values()) { NorFactor += pepIonID.PeakHeight[0]; } NorFactor /= GetPepIonList().size(); } return NorFactor; } public LCMSID(String mzXMLFileName, String Decoytag, String Fasta) { ModificationList = new HashMap<>(); ProteinList = new HashMap<>(); PSMList = new HashMap<>(); PepIonList = new HashMap<>(); AssignedPepIonList = new HashMap<>(); ProtXMLPepIonList = new HashMap<>(); IndisProteinIDList = new HashMap<>(); ProteinGroups = new HashMap<>(); PeptideList = new HashMap<>(); MappedPepIonList = new HashMap<>(); this.mzXMLFileName = mzXMLFileName; this.FastaPath = Fasta; this.DecoyTag = Decoytag; } public HashMap<String, PepIonID> GetPepIonList() { return PepIonList; } public HashMap<String, PepIonID> GetMappedPepIonList() { return MappedPepIonList; } public void ResetMappedPepProb() { for (PepIonID pepIonID : MappedPepIonList.values()) { pepIonID.UScoreProbability_MS1 = 0f; pepIonID.MS1AlignmentProbability = 0f; pepIonID.UScoreProbability_MS2 = 0f; pepIonID.MS2AlignmentProbability = 0f; } } public void SetMappedPepIonList(HashMap<String, PepIonID> list) { MappedPepIonList = list; } public void GenerateFragmentPeakForPepIonByMSMS(ScanCollection scanCollection, float fragPPM) { double protonMass = ElementaryIon.proton.getTheoreticMass(); for (PepIonID pepIonID : GetPepIonList().values()) { for (PSM psm : pepIonID.GetPSMList()) { ScanData scan = scanCollection.GetScan(psm.ScanNo); for (Ion frag : pepIonID.GetFragments()) { XYData closetPeak = null; float targetmz = (float) (frag.getTheoreticMass() + protonMass); closetPeak = scan.GetHighestPeakInMzWindow(targetmz, fragPPM); if (closetPeak != null) { FragmentPeak fragmentpeak = new FragmentPeak(); fragmentpeak.ObservedMZ = closetPeak.getX(); fragmentpeak.FragMZ = targetmz; fragmentpeak.intensity = closetPeak.getY(); fragmentpeak.Charge = 1; fragmentpeak.ppm = InstrumentParameter.CalcSignedPPM(closetPeak.getX(), targetmz); fragmentpeak.IonType = frag.getSubTypeAsString() + ((PeptideFragmentIon) frag).getNumber(); pepIonID.FragmentPeaks.add(fragmentpeak); } targetmz = (float) (frag.getTheoreticMass() + protonMass * 2) / 2; closetPeak = scan.GetHighestPeakInMzWindow(targetmz, fragPPM); if (closetPeak != null) { FragmentPeak fragmentpeak = new FragmentPeak(); fragmentpeak.ObservedMZ = closetPeak.getX(); fragmentpeak.FragMZ = targetmz; fragmentpeak.intensity = closetPeak.getY(); fragmentpeak.Charge = 2; fragmentpeak.ppm = InstrumentParameter.CalcSignedPPM(closetPeak.getX(), targetmz); fragmentpeak.IonType = frag.getSubTypeAsString() + ((PeptideFragmentIon) frag).getNumber(); pepIonID.FragmentPeaks.add(fragmentpeak); } } } } } public void AssignProtForPepIon() { if (PeptideList != null) { for (String pepseq : PeptideList.keySet()) { for (ProtID protein : ProteinList.values()) { if (protein.Sequence.contains(pepseq) || (protein.ProtPepSeq != null && protein.ProtPepSeq.contains(pepseq))) { for (PepIonID pepIonID : PeptideList.get(pepseq).values()) { protein.PeptideID.put(pepIonID.GetKey(), pepIonID); if (!pepIonID.ParentProtID_ProtXML.contains(protein)) { pepIonID.ParentProtID_ProtXML.add(protein); } } } } } } } public void AssignProtForMappedIon() { if (MappedPeptideList != null) { for (String pepseq : MappedPeptideList.keySet()) { for (ProtID protein : ProteinList.values()) { if (protein.Sequence.contains(pepseq) || (protein.ProtPepSeq != null && protein.ProtPepSeq.contains(pepseq))) { for (PepIonID pepIonID : MappedPeptideList.get(pepseq).values()) { protein.PeptideID.put(pepIonID.GetKey(), pepIonID); if (!pepIonID.ParentProtID_ProtXML.contains(protein)) { pepIonID.ParentProtID_ProtXML.add(protein); } } } } } } } public void ExportMappedPepID() throws SQLException, IOException { ExportMappedPepIonCSV(); } public void ExportPepID() throws SQLException, IOException { ExportPepID(null); } public void ExportPepID(String folder) throws IOException { ExportPepIonCSV(folder); ExportPepPSMCSV(folder); } public void ExportProtID() throws SQLException, IOException { ExportProtID(null); } public void ExportProtID(String folder) throws SQLException, IOException { ExportProtIDCSV(folder); } public void ExportPepFragmentPeak() throws SQLException, IOException { ExportPepFragmentCSV(); } public void ExportMappedPepFragmentPeak() throws SQLException, IOException { ExportMappedPepFragmentCSV(); } private void ExportPepPSMCSV(String folder) throws IOException { if (folder == null | "".equals(folder)) { folder = FilenameUtils.getFullPath(mzXMLFileName); } Logger.getRootLogger().info( "Writing PSM result to file:" + folder + FilenameUtils.getBaseName(mzXMLFileName) + "_PSMs.csv..."); FileWriter writer = new FileWriter(folder + FilenameUtils.getBaseName(mzXMLFileName) + "_PSMs.csv"); writer.write( "SpecID,Sequence,ModSeq,TPPModSeq,Modification,Charge,mz,NeutralPepMass,ObservedMass,RT,AdjustedRT,Rank,ScanNo,PreAA,NextAA,MissedCleavage,ExpectValue,MassError,Prob,Rawname,ParentPepIndex,MS1Quant\n"); for (PepIonID pepion : PepIonList.values()) { for (PSM psm : pepion.GetPSMList()) { writer.write(psm.SpecNumber + "," + psm.Sequence + "," + psm.ModSeq + "," + psm.TPPModSeq + "," + psm.GetModificationString() + "," + psm.Charge + "," + psm.ObserPrecursorMz() + "," + psm.NeutralPepMass + "," + psm.ObserPrecursorMass + "," + psm.RetentionTime + "," + psm.NeighborMaxRetentionTime + "," + psm.Rank + "," + psm.ScanNo + "," + psm.PreAA + "," + psm.NextAA + "," + psm.MissedCleavage + "," + psm.expect + "," + psm.MassError + "," + psm.Probability + "," + psm.RawDataName + "," + pepion.Index + "," + pepion.GetMS1() + "\n"); } } writer.close(); } private void ExportMappedPepIonCSV() throws IOException { Logger.getRootLogger() .info("Writing MappedPepIonIDs result to file:" + FilenameUtils.getFullPath(mzXMLFileName) + FilenameUtils.getBaseName(mzXMLFileName) + "_MappedPepIonIDs.csv..."); FileWriter writer = new FileWriter(FilenameUtils.getFullPath(mzXMLFileName) + FilenameUtils.getBaseName(mzXMLFileName) + "_MappedPepIonIDs.csv"); writer.write( "PepIndex,Sequence,ModSeq,TPPModSeq,ModInfo,Charge,mz,PredictRT, PeakRT,MS1ClusIndex,MS2ClusIndex,PeakScore,PeakHeight1,PeakHeight2,PeakHeight3,PeakArea1,PeakArea2,PeakArea3,MS1AlignmentProb,MS1AlignmentLProb,MS2AlignmentProb,MS2AlignmentLProb\n"); int index = 1; for (PepIonID pepion : MappedPepIonList.values()) { writer.write((index++) + "," + pepion.Sequence + "," + pepion.ModSequence + "," + pepion.TPPModSeq + "," + pepion.GetModificationString() + "," + pepion.Charge + "," + pepion.NeutralPrecursorMz() + "," + pepion.PredictRTString() + "," + pepion.PeakRT + "," + pepion.GetMS1ClusIndex() + "," + pepion.GetMS2ClusIndex() + "," + pepion.PeakClusterScore + "," + pepion.PeakHeight[0] + "," + pepion.PeakHeight[1] + "," + pepion.PeakHeight[2] + "," + pepion.PeakArea[0] + "," + pepion.PeakArea[1] + "," + pepion.PeakArea[2] + "," + pepion.MS1AlignmentProbability + "," + pepion.UScoreProbability_MS1 + "," + pepion.MS2AlignmentProbability + "," + pepion.UScoreProbability_MS2 + "\n"); } writer.close(); } private void ExportPepIonCSV(String folder) throws IOException { if (folder == null | "".equals(folder)) { folder = FilenameUtils.getFullPath(mzXMLFileName); } Logger.getRootLogger().info("Writing PepIon result to file:" + folder + FilenameUtils.getBaseName(mzXMLFileName) + "_PepIonIDs.csv..."); FileWriter writer = new FileWriter(folder + FilenameUtils.getBaseName(mzXMLFileName) + "_PepIonIDs.csv"); writer.write( "PepIndex,Sequence,ModSeq,TPPModSeq,IsNonDegenerate,Charge,mz,IDRT,PeakRT,NoPSMs,MS1ClusIndex,MS2ClusIndex,PeakScore,PeakHeight1,PeakHeight2,PeakHeight3,PeakArea1,PeakArea2,PeakArea3\n"); for (PepIonID pepion : PepIonList.values()) { writer.write(pepion.Index + "," + pepion.Sequence + "," + pepion.ModSequence + "," + pepion.TPPModSeq + "," + (pepion.Is_NonDegenerate ? 1 : 0) + "," + pepion.Charge + "," + pepion.NeutralPrecursorMz() + "," + pepion.GetIDRT() + "," + pepion.PeakRT + "," + pepion.GetSpectralCount() + "," + pepion.GetMS1ClusIndex() + "," + pepion.GetMS2ClusIndex() + "," + pepion.PeakClusterScore + "," + pepion.PeakHeight[0] + "," + pepion.PeakHeight[1] + "," + pepion.PeakHeight[2] + "," + pepion.PeakArea[0] + "," + pepion.PeakArea[1] + "," + pepion.PeakArea[2] + "\n"); } writer.close(); } private void ExportProtIDCSV(String folder) throws IOException { if (folder == null | "".equals(folder)) { folder = FilenameUtils.getFullPath(mzXMLFileName); } Logger.getRootLogger().info("Writing ProteinID result to file:" + folder + FilenameUtils.getBaseName(mzXMLFileName) + "_ProtIDs.csv..."); FileWriter writer = new FileWriter(folder + FilenameUtils.getBaseName(mzXMLFileName) + "_ProtIDs.csv"); writer.write( "AccNo,UniProtID,ProteinLength,ProteinGroup,IndisProt,Description,Mass,Score,Peptides,Sequence\n"); for (ProtID protein : ProteinList.values()) { String pepstring = ""; for (PepIonID pep : protein.PeptideID.values()) { pepstring += pep.GetKey() + ";"; } String IndisProt = ""; for (String indisprot : protein.IndisProteins) { IndisProt += indisprot + ";"; } writer.write(protein.getAccNo() + "," + protein.UniProtID + "," + protein.ProteinLength + "," + protein.ProteinGroup + "," + IndisProt + "," + protein.Description + "," + protein.Mass + "," + protein.Probability + "," + pepstring + "," + protein.Sequence + "\n"); } writer.close(); } private void ExportMappedPepFragmentCSV() throws IOException { Logger.getRootLogger().info("Writing PepFragment result to file:" + FilenameUtils.getFullPath(mzXMLFileName) + FilenameUtils.getBaseName(mzXMLFileName) + "_MappedPepFragments.csv..."); FileWriter writer = new FileWriter(FilenameUtils.getFullPath(mzXMLFileName) + FilenameUtils.getBaseName(mzXMLFileName) + "_MappedPepFragments.csv"); writer.write("PepIndex,IonType,fragMZ,ObservedMZ,Charge,Intensity,Correlation,PPM,ApexDelta,RTOverlapP\n"); for (PepIonID pepion : MappedPepIonList.values()) { for (FragmentPeak frag : pepion.FragmentPeaks) { writer.write(pepion.Index + "," + frag.IonType + "," + frag.FragMZ + "," + frag.ObservedMZ + "," + frag.Charge + "," + frag.intensity + "," + frag.corr + "," + frag.ppm + "," + frag.ApexDelta + "," + frag.RTOverlapP + "\n"); } } writer.close(); } private void ExportPepFragmentCSV() throws IOException { Logger.getRootLogger().info("Writing PepFragment result to file:" + FilenameUtils.getFullPath(mzXMLFileName) + FilenameUtils.getBaseName(mzXMLFileName) + "_PepFragments.csv..."); FileWriter writer = new FileWriter(FilenameUtils.getFullPath(mzXMLFileName) + FilenameUtils.getBaseName(mzXMLFileName) + "_PepFragments.csv"); writer.write("PepIndex,IonType,fragMZ,ObservedMZ,Charge,Intensity,Correlation,PPM,ApexDelta,RTOverlapP\n"); for (PepIonID pepion : PepIonList.values()) { for (FragmentPeak frag : pepion.FragmentPeaks) { writer.write(pepion.Index + "," + frag.IonType + "," + frag.FragMZ + "," + frag.ObservedMZ + "," + frag.Charge + "," + frag.intensity + "," + frag.corr + "," + frag.ppm + "," + frag.ApexDelta + "," + frag.RTOverlapP + "\n"); } } writer.close(); } public PepIonID GetPepID(PSM psm) { return PepIonList.get(psm.GetPepKey()); } public void AddPSM(PSM psm) { PSMList.put(psm.SpecNumber, psm); PepIonID pepIonID = GetPepID(psm); if (pepIonID == null) { pepIonID = new PepIonID(); pepIonID.SetInfobyPSM(psm); AddPeptideID(pepIonID); } pepIonID.AddPSM(psm); } public void LoadSequence() throws IOException, XmlPullParserException { for (ProtID protID : ProteinList.values()) { if (protID.Sequence == null || "".equals(protID.Sequence) || "null".equals(protID.Sequence)) { if (!protID.IsDecoy(DecoyTag)) { if (FastaPath != null && !"".equals(FastaPath)) { //String Sequence = GetSequenceFactory().getProtein(protID.UniProtID).getSequence(); //String Sequence = GetFastaParser().ProteinList.get(protID.getAccNo()).Seq; try { String Sequence = GetFastaParser().GetProtSeq(protID.getAccNo()); if (Sequence != null) { protID.SetSequence(Sequence); } else { Logger.getRootLogger().error( "Can't find sequence in fasta file for protein:" + protID.getAccNo()); } } catch (Exception ex) { Logger.getRootLogger() .error("Can't find sequence in fasta file for protein:" + protID.getAccNo()); Logger.getRootLogger().error(ExceptionUtils.getStackTrace(ex)); } } } } } } public void AddProtID(ProtID protID) throws ClassNotFoundException, InterruptedException, IOException, XmlPullParserException { if (!ProteinList.containsKey(protID.getAccNo())) { ProteinList.put(protID.getAccNo(), protID); } } public ArrayList<PSM> FindPsmsBymzRT(float mz, float RT, int charge, float PPM, float RTtol) { ArrayList<PSM> psms = new ArrayList<>(); for (PSM psm : PSMList.values()) { if (psm.Charge == charge && InstrumentParameter.CalcPPM(mz, psm.ObserPrecursorMz()) < PPM && Math.abs(psm.RetentionTime - RT) < RTtol) { psms.add(psm); } } return psms; } private void FindMaxIniProbThresholdByFDR() { if (ProteinList.isEmpty()) { return; } SortedProteinListMaxIniProb sortedlist = new SortedProteinListMaxIniProb(); sortedlist.addAll(ProteinList.values()); //writer = new FileWriter(FilenameUtils.getFullPath(mzXMLFileName)+"/" + FilenameUtils.getBaseName(mzXMLFileName)+"_Pro.txt"); int positive = 0; int negative = 0; ProtID protein = sortedlist.get(0); if (protein.IsDecoy(DecoyTag)) { negative++; } else { positive++; } for (int i = 1; i < sortedlist.size(); i++) { protein = sortedlist.get(i); if (protein.IsDecoy(DecoyTag)) { negative++; //System.out.println(protein.getAccNo()+"-"+protein.ProteinGroup+"-Decoy"); } else { positive++; //System.out.println(protein.getAccNo()+"-"+ protein.ProteinGroup); } if (i + 1 == sortedlist.size() || (protein.MaxIniProb > sortedlist.get(i + 1).MaxIniProb && (float) negative / (float) (positive) >= ProteinFDR)) { ProteinProbThreshold = protein.MaxIniProb; Logger.getRootLogger() .info("Protein maxiniprob threshold=" + ProteinProbThreshold + " Estimated raw protein FDR:" + (float) negative / (float) (positive) + "(Target/Decoy)=(" + positive + "/" + negative + ")"); return; } } Logger.getRootLogger() .info("Protein maxiniprob threshold=" + ProteinProbThreshold + " Estimated raw protein FDR:" + (float) negative / (float) (positive) + "(Target/Decoy)=(" + positive + "/" + negative + ")"); } public void ClearPeakData() { for (PepIonID pepIonID : PepIonList.values()) { for (PeakCluster peak : pepIonID.MS1PeakClusters) { peak.IsoPeaksCurves = null; peak.MonoIsotopePeak = null; } for (PeakCluster peak : pepIonID.MS2UnfragPeakClusters) { peak.IsoPeaksCurves = null; peak.MonoIsotopePeak = null; } } } public void ClearAssignPeakCluster() { for (PepIonID pepIonID : PepIonList.values()) { pepIonID.MS1PeakClusters.clear(); pepIonID.MS2UnfragPeakClusters.clear(); } } public void ClearAssignPeakClusterMappedion() { for (PepIonID pepIonID : MappedPepIonList.values()) { pepIonID.MS1PeakClusters.clear(); pepIonID.MS2UnfragPeakClusters.clear(); } } public void ReduceMemoryUsage() { fastaParser = null; for (PepIonID pepIonID : PepIonList.values()) { pepIonID.ClearPepFragFactory(); for (PeakCluster peakCluster : pepIonID.MS1PeakClusters) { peakCluster.GroupedFragmentPeaks.clear(); peakCluster.MonoIsotopePeak = null; } for (PeakCluster peakCluster : pepIonID.MS2UnfragPeakClusters) { peakCluster.GroupedFragmentPeaks.clear(); peakCluster.MonoIsotopePeak = null; } } for (PepIonID pepIonID : MappedPepIonList.values()) { pepIonID.ClearPepFragFactory(); for (PeakCluster peakCluster : pepIonID.MS1PeakClusters) { peakCluster.GroupedFragmentPeaks.clear(); peakCluster.MonoIsotopePeak = null; } for (PeakCluster peakCluster : pepIonID.MS2UnfragPeakClusters) { peakCluster.GroupedFragmentPeaks.clear(); peakCluster.MonoIsotopePeak = null; } } } public void ClearPSMs() { PSMList.clear(); for (PepIonID pepIonID : PepIonList.values()) { pepIonID.CalcRT(); pepIonID.GetSpectralCount(); pepIonID.GetPSMList().clear(); } } public void ReMapProPep() { ClearProPeplist(); GeneratePepSeqList(); AssignProtForPepIon(); GenerateMappedPepSeqList(); AssignProtForMappedIon(); GenearteAssignIonList(); } public void SetFilterByGroupWeight() { for (PepIonID pep : GetPepIonList().values()) { pep.FilteringWeight = pep.GroupWeight; } for (PepIonID pep : GetMappedPepIonList().values()) { pep.FilteringWeight = pep.GroupWeight; } } public void SetFilterByWeight() { for (PepIonID pep : GetPepIonList().values()) { pep.FilteringWeight = pep.Weight; } for (PepIonID pep : GetMappedPepIonList().values()) { pep.FilteringWeight = pep.Weight; } } public void FindPepProbThresholdByFDR() { if (PepIonList.isEmpty()) { return; } SortedPepListProb sortedlist = new SortedPepListProb(); sortedlist.addAll(PepIonList.values()); int positive = 0; int negative = 0; PepIonID pep = sortedlist.get(0); if (pep.IsDecoy(DecoyTag)) { negative++; } else { positive++; } for (int i = 1; i < sortedlist.size(); i++) { pep = sortedlist.get(i); if (pep.MaxProbability < 0.1f) { break; } if (pep.IsDecoy(DecoyTag)) { negative++; } else { positive++; } if (pep.MaxProbability < sortedlist.get(i - 1).MaxProbability && ((float) negative / (float) (positive) >= FDR)) { PepProbThreshold = pep.MaxProbability; Logger.getRootLogger() .info("Probability threshold=" + PepProbThreshold + " Estimated FDR:" + (float) negative / (float) (positive) + "(Target/Decoy)=(" + positive + "/" + negative + ")"); return; } } Logger.getRootLogger().info("Probability threshold=" + PepProbThreshold + " Estimated FDR:" + (float) negative / (float) (positive) + "(Target/Decoy)=(" + positive + "/" + negative + ")"); } public void GenearteAssignIonList() { AssignedPepIonList.clear(); for (ProtID protein : ProteinList.values()) { for (PepIonID pepIonID : protein.PeptideID.values()) { AssignedPepIonList.put(pepIonID.GetKey(), pepIonID); } } } public void FilterByProteinDecoyFDRUsingLocalPW(String DecoyTag, float fdr) { this.DecoyTag = DecoyTag; this.ProteinFDR = fdr; FindLocalPWThresholdByFDR(); RemoveLowProbProteinDecoy(); } private void RemoveLowProbProteinDecoy() { ArrayList<ProtID> removelist = new ArrayList<>(); for (ProtID protein : ProteinList.values()) { if (protein.Probability < ProteinProbThreshold || protein.IsDecoy(DecoyTag)) { removelist.add(protein); } } for (ProtID protein : removelist) { ProteinList.remove(protein.getAccNo()); } GenearteAssignIonList(); } private void FindLocalPWThresholdByFDR() { //FileWriter writer = null; //try { if (ProteinList.isEmpty()) { return; } SortedProteinListProb sortedlist = new SortedProteinListProb(); sortedlist.addAll(ProteinList.values()); //writer = new FileWriter(FilenameUtils.getFullPath(mzXMLFileName)+"/" + FilenameUtils.getBaseName(mzXMLFileName)+"_Pro.txt"); int positive = 0; int negative = 0; ProtID protein = sortedlist.get(0); if (protein.IsDecoy(DecoyTag)) { negative++; } else { positive++; } for (int i = 1; i < sortedlist.size(); i++) { protein = sortedlist.get(i); if (protein.IsDecoy(DecoyTag)) { negative++; //System.out.println(protein.getAccNo()+"-"+protein.ProteinGroup+"-Decoy"); } else { positive++; //System.out.println(protein.getAccNo()+"-"+ protein.ProteinGroup); } if (protein.Probability < sortedlist.get(i - 1).Probability && (float) negative / (float) (positive) >= ProteinFDR) { ProteinProbThreshold = protein.Probability; Logger.getRootLogger() .info("Protein probability threshold=" + ProteinProbThreshold + " Estimated raw protein FDR:" + (float) negative / (float) (positive) + "(Target/Decoy)=(" + positive + "/" + negative + ")"); return; } } } public void RemoveLowLocalPWProtein(float LocalPW) { ArrayList<ProtID> removelist = new ArrayList<>(); for (ProtID protein : ProteinList.values()) { if (protein.Probability < LocalPW) { removelist.add(protein); } } for (ProtID protein : removelist) { ProteinList.remove(protein.getAccNo()); } GenearteAssignIonList(); } public void RemoveLowMaxIniProbProteinDecoy() { ArrayList<ProtID> removelist = new ArrayList<>(); for (ProtID protein : ProteinList.values()) { if (protein.MaxIniProb < ProteinProbThreshold || protein.IsDecoy(DecoyTag)) { removelist.add(protein); } } for (ProtID protein : removelist) { ProteinList.remove(protein.getAccNo()); } GenearteAssignIonList(); } public void RemoveLowMaxIniProbProtein(float maxiniprob) { ArrayList<ProtID> removelist = new ArrayList<>(); for (ProtID protein : ProteinList.values()) { if (protein.MaxIniProb < maxiniprob) { removelist.add(protein); } } for (ProtID protein : removelist) { ProteinList.remove(protein.getAccNo()); } } public void DetermineAssignIonListByProtPepSeq() { for (ProtID proid : ProteinList.values()) { for (String seq : proid.ProtPepSeq) { if (PeptideList.containsKey(seq)) { for (PepIonID pep : PeptideList.get(seq).values()) { proid.AddPeptideID(pep); if (!pep.ParentProtID_ProtXML.contains(proid)) { pep.ParentProtID_ProtXML.add(proid); } if (!AssignedPepIonList.containsKey(pep.GetKey())) { AssignedPepIonList.put(pep.GetKey(), pep); } } } } } } public void RemoveLowProbPep() { // = new HashMap<>(); ArrayList<PepIonID> removelist = new ArrayList<>(); for (PepIonID pep : GetPepIonList().values()) { if (pep.MaxProbability < PepProbThreshold) { removelist.add(pep); } } for (PepIonID pep : removelist) { GetPepIonList().remove(pep.GetKey()); //LowScorePep.put(pep.GetKey(), pep); } } public void RemoveDecoyPep() { ArrayList<PepIonID> removelist = new ArrayList<>(); for (PepIonID pep : PepIonList.values()) { if (pep.IsDecoy(DecoyTag)) { removelist.add(pep); } } for (PepIonID pep : removelist) { PepIonList.remove(pep.GetKey()); } } public void RemoveDecoyProtein() { ArrayList<ProtID> removelist = new ArrayList<>(); for (ProtID protein : ProteinList.values()) { if (protein.IsDecoy(DecoyTag)) { removelist.add(protein); } } for (ProtID protein : removelist) { ProteinList.remove(protein.getAccNo()); } } public void FilterByPepDecoyFDR(String DecoyTag, float fdr) { this.DecoyTag = DecoyTag; this.FDR = fdr; FindPepProbThresholdByFDR(); RemoveLowProbPep(); RemoveDecoyPep(); GeneratePepSeqList(); } private void GeneratePepSeqList() { PeptideList = new HashMap(); for (PepIonID pepID : GetPepIonList().values()) { if (!PeptideList.containsKey(pepID.Sequence)) { PeptideList.put(pepID.Sequence, new HashMap<String, PepIonID>()); } if (!PeptideList.get(pepID.Sequence).containsKey(pepID.GetKey())) { PeptideList.get(pepID.Sequence).put(pepID.GetKey(), pepID); } } } public void GenerateMappedPepSeqList() { MappedPeptideList = new HashMap<>(); for (PepIonID pepID : GetMappedPepIonList().values()) { if (!MappedPeptideList.containsKey(pepID.Sequence)) { MappedPeptideList.put(pepID.Sequence, new HashMap<String, PepIonID>()); } if (!MappedPeptideList.get(pepID.Sequence).containsKey(pepID.GetKey())) { MappedPeptideList.get(pepID.Sequence).put(pepID.GetKey(), pepID); } } } //Generate protein list according to mapping of peptide ions to a master protein list public void GenerateProteinByRefIDByPepSeq(LCMSID RefID, boolean UseMappedIon) { ProteinList.clear(); AssignedPepIonList.clear(); for (PepIonID pepIonID : GetPepIonList().values()) { pepIonID.Weight = 0f; pepIonID.GroupWeight = 0f; } for (ProtID protein : RefID.ProteinList.values()) { if (!protein.IsDecoy(DecoyTag)) { ProtID newprot = protein.CloneProtein(); newprot.Probability = protein.Probability; newprot.ProtPepSeq = (ArrayList<String>) protein.ProtPepSeq.clone(); for (PepIonID pep : protein.ProtPeptideID.values()) { if (PeptideList.containsKey(pep.Sequence)) { for (PepIonID pepIonID : PeptideList.get(pep.Sequence).values()) { pepIonID.Weight = pep.Weight; pepIonID.GroupWeight = pep.GroupWeight; newprot.AddPeptideID(pepIonID); newprot.IDByDBSearch = true; if (!pepIonID.ParentProtID_ProtXML.contains(newprot)) { pepIonID.ParentProtID_ProtXML.add(newprot); } } } if (UseMappedIon) { if (MappedPeptideList.containsKey(pep.Sequence)) { for (PepIonID pepIonID : MappedPeptideList.get(pep.Sequence).values()) { pepIonID.Weight = pep.Weight; pepIonID.GroupWeight = pep.GroupWeight; newprot.AddPeptideID(pepIonID); if (!pepIonID.ParentProtID_ProtXML.contains(newprot)) { pepIonID.ParentProtID_ProtXML.add(newprot); } } } } } if (!newprot.PeptideID.isEmpty()) { ProteinList.put(newprot.getAccNo(), newprot); } } } } public void UpdateProteinKey() throws ClassNotFoundException, InterruptedException, IOException, XmlPullParserException { ArrayList<ProtID> temp = new ArrayList<>(); for (ProtID protein : ProteinList.values()) { temp.add(protein); } ProteinList.clear(); for (ProtID protein : temp) { AddProtID(protein); } } public void FixProteinWithDecoyHead() throws ClassNotFoundException, InterruptedException, IOException, XmlPullParserException { for (ProtID protein : ProteinList.values()) { if (protein.IsDecoy(DecoyTag)) { for (int i = 0; i < protein.IndisProteins.size(); i++) { if (!(protein.IndisProteins.get(i).startsWith(DecoyTag) | protein.IndisProteins.get(i).endsWith(DecoyTag))) { protein.setAccNo(protein.IndisProteins.get(i)); protein.SetDescription(protein.IndisProtDes.get(i)); break; } } } } UpdateProteinKey(); } public void UpdateDecoyMaxIniProb() { for (ProtID protein : ProteinList.values()) { if (protein.IsDecoy(DecoyTag)) { protein.MaxIniProb = 0f; for (PepIonID pep : protein.ProtPeptideID.values()) { boolean include = true; for (String prot : pep.ParentProtString_ProtXML) { if (!(prot.startsWith(DecoyTag) | prot.endsWith(DecoyTag))) { include = false; } } if (include && protein.MaxIniProb < pep.MaxProbability) { protein.MaxIniProb = pep.MaxProbability; } } } } } public void SetGroupProbForNonDecoyGroupHead() { for (ArrayList<ProtID> group : ProteinGroups.values()) { boolean allzero = true; for (ProtID protein : group) { if (protein.Probability > 0) { allzero = false; break; } } if (allzero) { for (ProtID protein : group) { if (!protein.IsDecoy(DecoyTag)) { protein.Probability = protein.GroupProb; break; } } } } } public void FilterByProteinDecoyFDRUsingMaxIniProb(String DecoyTag, float fdr) { this.DecoyTag = DecoyTag; this.ProteinFDR = fdr; FindMaxIniProbThresholdByFDR(); RemoveLowMaxIniProbProteinDecoy(); } public void AddPeptideID(PepIonID pepID) { // if (!PepIonList.containsKey(pepID.GetKey())) { // pepID.Index = PepIonList.size(); // PepIonList.put(pepID.GetKey(), pepID); // } { final PepIonID pep = PepIonList.get(pepID.GetKey()); if (pep == null || pep.MaxProbability < pepID.MaxProbability) { // if not in map or a higher probability is found PepIonList.put(pepID.GetKey(), pepID); pepID.Index = PepIonList.size() - 1; } } for (PSM psm : pepID.GetPSMList()) { PSMList.put(psm.SpecNumber, psm); } } public PSM GetPSM(int SpecNum) { return PSMList.get(SpecNum); } public void AddModification(PTM ptm, String site) { ModificationInfo modification = new ModificationInfo(); modification.site = site; modification.massdiff = (float) ptm.getMass(); modification.mass = (float) (ptm.getMass() + AminoAcid.getAminoAcid(site).monoisotopicMass); modification.modification = ptm; if (!ModificationList.containsKey(ptm.getName() + "_" + site)) { ModificationList.put(ptm.getName() + "_" + site, modification); } } public void CreateInstanceForAllPepIon() { for (PepIonID pepIonID : GetPepIonList().values()) { pepIonID.CreateQuantInstance(4); } } public void ClearMappedPep() { MappedPepIonList.clear(); MappedPeptideList.clear(); if (MappedPepIonIndexList != null) { MappedPepIonIndexList.clear(); } } public void ClearProPeplist() { for (ProtID protein : ProteinList.values()) { protein.PeptideID.clear(); } for (PepIonID pep : GetPepIonList().values()) { pep.ParentProtID_ProtXML.clear(); } for (PepIonID pep : GetMappedPepIonList().values()) { pep.ParentProtID_ProtXML.clear(); } } public void RemoveLowProbMappedIon(float ProbThreshold) { //LowScorePep = new HashMap<>(); ArrayList<PepIonID> removelist = new ArrayList<>(); for (PepIonID pep : GetMappedPepIonList().values()) { if (pep.TargetedProbability() < ProbThreshold) { removelist.add(pep); } } for (PepIonID pep : removelist) { GetMappedPepIonList().remove(pep.GetKey()); //LowScorePep.put(pep.GetKey(), pep); } GenerateMappedPepSeqList(); } public void ReleaseIDs() { PSMList = null; LowScorePSMByPepKey = null; LowScorePep = null; LowScorePSM = null; PepIonList = null; PepIonIndexList = null; MappedPepIonIndexList = null; AssignedPepIonList = null; ProtXMLPepIonList = null; ProteinList = null; IndisProteinIDList = null; MappedPepIonList = null; PepXMLProteinList = null; PeptideList = null; MappedPeptideList = null; } public float GetRFactor(float ProbThreshold) { int forward = 0; int decoy = 0; for (ProtID protID : ProteinList.values()) { if (protID.Probability < ProbThreshold) { if (protID.IsDecoy(DecoyTag)) { decoy++; } else { forward++; } } } float rf = (float) forward / decoy; Logger.getRootLogger().info("Caculating R factor: probability threshold =" + ProbThreshold); Logger.getRootLogger().info("R factor=" + rf + " (forward/decoy=" + forward + "/" + decoy + ")"); return rf; } }