MSUmpire.LCMSPeakStructure.LCMSPeakBase.java Source code

Java tutorial

Introduction

Here is the source code for MSUmpire.LCMSPeakStructure.LCMSPeakBase.java

Source

/* 
 * Author: Chih-Chiang Tsou <chihchiang.tsou@gmail.com>
 *             Nesvizhskii Lab, Department of Computational Medicine and Bioinformatics, 
 *             University of Michigan, Ann Arbor
 *
 * Copyright 2014 University of Michigan, Ann Arbor, MI
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package MSUmpire.LCMSPeakStructure;

import MSUmpire.BaseDataStructure.InstrumentParameter;
import MSUmpire.BaseDataStructure.SpectralDataType;
import MSUmpire.PSMDataStructure.PSM;
import MSUmpire.PSMDataStructure.PepIonID;
import MSUmpire.PeakDataStructure.PeakCluster;
import MSUmpire.PeakDataStructure.PeakCurve;
import MSUmpire.PeakDataStructure.SortedCurveCollectionApexRT;
import MSUmpire.SpectrumParser.SpectrumParserBase;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.*;
import java.util.logging.Level;
import net.sf.javaml.core.kdtree.KDTree;
import net.sf.javaml.core.kdtree.KeyDuplicateException;
import net.sf.javaml.core.kdtree.KeySizeException;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.commons.math3.analysis.polynomials.PolynomialSplineFunction;
import org.apache.log4j.Logger;
import org.nustaq.serialization.FSTObjectInput;
import org.nustaq.serialization.FSTObjectOutput;

/**
 * Parent peak data structure related to an LCMS map
 * @author Chih-Chiang Tsou <chihchiang.tsou@gmail.com>
 */
public class LCMSPeakBase {

    public ArrayList<PeakCluster> PeakClusters = new ArrayList<>(1000);
    private KDTree PeakClusterMassSearchTree = null;
    private KDTree PeakCurveSearchTree = null;
    public String ScanCollectionName;
    public String ParentmzXMLName;
    protected SpectrumParserBase SpectrumParser;
    public int MaxNoPeakCluster;
    public int MinNoPeakCluster;
    public int StartCharge;
    public int EndCharge;
    public float MiniIntensity;
    public float SNR;
    private SortedCurveCollectionApexRT PeakCurveListRT = null;

    public ArrayList<PeakCurve> UnSortedPeakCurves;
    public InstrumentParameter parameter;
    public SpectralDataType.DataType datattype;
    public boolean Resume = true;
    public boolean ExportPeakClusterTable = true;
    public boolean SaveSerializationFile = true;
    public boolean ExportPeakCurveTable = false;
    int NoCPUs = Runtime.getRuntime().availableProcessors() - 2;
    public PolynomialSplineFunction Masscalibrationfunction;

    public void ClearAllPeaks() {
        BaseClearAllPeaks();
    }

    public void BaseClearAllPeaks() {
        PeakClusters = null;
        PeakCurveListRT = null;
        UnSortedPeakCurves = null;
        PeakCurveSearchTree = null;
        PeakClusterMassSearchTree = null;
    }

    public void ClearRawPeaks() {
        for (PeakCurve peakCurve : UnSortedPeakCurves) {
            //            peakCurve.GetPeakList().clear();
            peakCurve.nullifyPeakList();
        }
    }

    //Get all peak clusters given PSMs related to a peptide ion
    public ArrayList<PeakCluster> FindAllPeakClustersForPepByPSM(PepIonID pep) {
        ArrayList<PeakCluster> allclusterList = new ArrayList<>();
        for (PSM psm : pep.GetPSMList()) {
            ArrayList<PeakCluster> clusterList = FindPeakClustersByMassRTTol(psm.ObserPrecursorMass, psm.Charge,
                    psm.RetentionTime, parameter.MaxCurveRTRange / 2);
            for (PeakCluster cluster : clusterList) {
                if (!allclusterList.contains(cluster)) {
                    allclusterList.add(cluster);
                }
            }
        }
        return allclusterList;
    }

    //Get mass error given a RT according to mass calibration model
    private float GetMassError(float RT) {
        if (RT > Masscalibrationfunction.getKnots()[Masscalibrationfunction.getN()]) {
            RT = (float) Masscalibrationfunction.getKnots()[Masscalibrationfunction.getN()];
        }
        if (RT < Masscalibrationfunction.getKnots()[Masscalibrationfunction.getN()]) {
            RT = (float) Masscalibrationfunction.getKnots()[0];
        }
        return (float) Masscalibrationfunction.value(RT);
    }

    public ArrayList<PeakCluster> FindAllPeakClustersForMappedPep(PepIonID pep) {
        ArrayList<PeakCluster> allclusterList = new ArrayList<>();
        float idrt = pep.GetRT();
        if (idrt != -1) {
            float calibratedmass = InstrumentParameter.GetMzByPPM(pep.CalcNeutralPepMass(), 1, -GetMassError(idrt));
            ArrayList<PeakCluster> clusterList = FindPeakClustersByMassIDTime(calibratedmass, pep.Charge, idrt,
                    parameter.RTtol);
            return clusterList;
        }
        for (float rt : pep.PredictRT) {
            float calibratedmass = InstrumentParameter.GetMzByPPM(pep.CalcNeutralPepMass(), 1, -GetMassError(rt));
            float rtrange = parameter.RT_window_Targeted;
            if (rtrange == -1) {
                rtrange = Math.min(5, Math.max(pep.GetRTSD() * 2, parameter.RTtol));
            }
            ArrayList<PeakCluster> clusterList = FindPeakClustersByMassRTTol(calibratedmass, pep.Charge, rt,
                    rtrange);
            for (PeakCluster cluster : clusterList) {
                if (!cluster.Identified && !allclusterList.contains(cluster)) {
                    allclusterList.add(cluster);
                }
            }
        }
        return allclusterList;
    }

    public ArrayList<PeakCluster> FindPeakClustersByMassIDTime(float mass, int charge, float RT, float RTtol) {
        ArrayList<PeakCluster> ReturnList = new ArrayList<>();
        float lowrt = RT - RTtol;
        float highrt = RT + RTtol;
        float lowmass = InstrumentParameter.GetMzByPPM(mass, 1, parameter.MS1PPM);
        float highmass = InstrumentParameter.GetMzByPPM(mass, 1, -parameter.MS1PPM);

        Object[] found = null;
        try {
            found = GetPeakClusterMassSearchTree().range(new double[] { lowrt, lowmass },
                    new double[] { highrt, highmass });
        } catch (KeySizeException ex) {

        }
        if (found == null || found.length == 0) {
            return ReturnList;
        }
        for (Object obj : found) {
            PeakCluster candidateCluster = (PeakCluster) obj;
            if (candidateCluster.Charge == charge && Math.abs(candidateCluster.PeakHeightRT[0] - RT) <= RTtol) {
                ReturnList.add(candidateCluster);
            }
        }
        return ReturnList;
    }

    public ArrayList<PeakCluster> FindPeakClustersByMassRTTol(float mass, int charge, float RT, float RTtol) {

        ArrayList<PeakCluster> Clusters = new ArrayList<>();
        ArrayList<PeakCluster> TightRangeClusters = new ArrayList<>();

        float lowrt = RT - RTtol;
        float highrt = RT + RTtol;
        float lowmass = InstrumentParameter.GetMzByPPM(mass, 1, parameter.MS1PPM);
        float highmass = InstrumentParameter.GetMzByPPM(mass, 1, -parameter.MS1PPM);

        Object[] found = null;
        try {
            found = GetPeakClusterMassSearchTree().range(new double[] { lowrt, lowmass },
                    new double[] { highrt, highmass });
        } catch (KeySizeException ex) {

        }
        if (found == null || found.length == 0) {
            return Clusters;
        }
        for (Object obj : found) {
            PeakCluster candidateCluster = (PeakCluster) obj;
            if (candidateCluster.Charge == charge && candidateCluster.startRT - RTtol <= RT
                    && candidateCluster.endRT + RTtol >= RT) {
                Clusters.add(candidateCluster);
            }
            if (candidateCluster.Charge == charge && candidateCluster.startRT <= RT
                    && candidateCluster.endRT >= RT) {
                TightRangeClusters.add(candidateCluster);
            }
        }
        if (!TightRangeClusters.isEmpty()) {
            return TightRangeClusters;
        }
        return Clusters;
    }

    public ArrayList<PeakCluster> FindPeakClustersByMassRTRange(float mass, int charge, float startrt,
            float endrt) {
        ArrayList<PeakCluster> Clusters = new ArrayList<>();
        float lowrt = startrt;
        float highrt = endrt;
        float lowmass = InstrumentParameter.GetMzByPPM(mass, 1, parameter.MS1PPM);
        float highmass = InstrumentParameter.GetMzByPPM(mass, 1, -parameter.MS1PPM);

        Object[] found = null;
        try {
            found = GetPeakClusterMassSearchTree().range(new double[] { lowrt, lowmass },
                    new double[] { highrt, highmass });
        } catch (KeySizeException ex) {

        }
        if (found == null || found.length == 0) {
            return Clusters;
        }
        for (Object obj : found) {
            PeakCluster candidateCluster = (PeakCluster) obj;

            if (candidateCluster.Charge == charge && candidateCluster.PeakHeightRT[0] >= startrt
                    && candidateCluster.PeakHeightRT[0] <= endrt) {
                Clusters.add(candidateCluster);
            }

        }
        return Clusters;
    }

    public void ExportPeakClusterResultCSV() throws IOException {

        Logger.getRootLogger().info("Writing PeakCluster CSV:" + FilenameUtils.getFullPath(ScanCollectionName)
                + FilenameUtils.getBaseName(ScanCollectionName) + "_PeakCluster.csv...");
        FileWriter writer = new FileWriter(FilenameUtils.getFullPath(ScanCollectionName)
                + FilenameUtils.getBaseName(ScanCollectionName) + "_PeakCluster.csv");

        String mzstring = "";
        String Idxstring = "";
        String CorrString = "";
        String SNRString = "";
        String PeakheightString = "";
        String PeakheightRTString = "";
        String PeakAreaString = "";
        String IdentifiedString = "0";

        for (int i = 0; i < MaxNoPeakCluster; i++) {
            mzstring += ",mz" + (i + 1);
            Idxstring += ",PeakIdx" + (i + 1);
            if (i > 0) {
                CorrString += ",Corr" + (i + 1);
            }
            SNRString += ",SNR" + (i + 1);
            PeakheightString += ",PeakHeight" + (i + 1);
            PeakheightRTString += ",PeakHeightRT" + (i + 1);
            PeakAreaString += ",PeakArea" + (i + 1);
        }

        writer.write("Cluster_Index,StartRT,EndRT,StartScan,EndScan,Identified,Charge" + mzstring + Idxstring
                + CorrString + SNRString + PeakheightString + PeakheightRTString + PeakAreaString
                + ",IsoMapProb,ConflictCorr,LeftInt,RightInt,NoRidges,MS1Score,MS1Prob,MS1LProb\n");

        for (PeakCluster cluster : PeakClusters) {
            IdentifiedString = "0";
            if (cluster.Identified) {
                IdentifiedString = "1";
            }

            String statementString = cluster.Index + "," + cluster.startRT + "," + cluster.endRT + ","
                    + cluster.StartScan + "," + cluster.EndScan + "," + IdentifiedString + "," + cluster.Charge
                    + ",";

            mzstring = "";
            Idxstring = "";
            CorrString = "";
            SNRString = "";
            PeakheightString = "";
            PeakheightRTString = "";
            PeakAreaString = "";

            for (int i = 0; i < MaxNoPeakCluster; i++) {
                mzstring += cluster.mz[i] + ",";
                Idxstring += cluster.IsoPeakIndex[i] + ",";
                if (i > 0) {
                    CorrString += cluster.Corrs[i - 1] + ",";
                }
                SNRString += cluster.GetSNR(i) + ",";
                PeakheightString += cluster.PeakHeight[i] + ",";
                PeakheightRTString += cluster.PeakHeightRT[i] + ",";
                PeakAreaString += cluster.PeakArea[i] + ",";
            }
            statementString += mzstring + Idxstring + CorrString + SNRString + PeakheightString + PeakheightRTString
                    + PeakAreaString + cluster.IsoMapProb + "," + cluster.GetConflictCorr() + "," + cluster.LeftInt
                    + "," + cluster.RightInt + "," + cluster.NoRidges + "," + cluster.MS1Score + ","
                    + cluster.MS1ScoreProbability + "," + cluster.MS1ScoreLocalProb + "\n";
            writer.write(statementString);
        }
        writer.close();
        //System.out.print("Finished multithreading\n");
    }

    public void ExportPeakCluster() throws IOException {
        WritePeakClusterSerialization();
    }

    //Default path to store peak data serialization files  
    public void CreatePeakFolder() {
        new File(FilenameUtils.getFullPath(ParentmzXMLName) + FilenameUtils.getBaseName(ParentmzXMLName) + "_Peak/")
                .mkdir();
    }

    public void WritePeakClusterSerialization() {
        FS_PeakClusterWrite();
    }

    private void FS_PeakClusterWrite() {
        try {
            Logger.getRootLogger().info("Writing PeakCluster serialization to file:"
                    + FilenameUtils.getBaseName(ScanCollectionName) + "_PeakCluster.serFS...");
            FileOutputStream fout = new FileOutputStream(
                    FilenameUtils.getFullPath(ParentmzXMLName) + FilenameUtils.getBaseName(ParentmzXMLName)
                            + "_Peak/" + FilenameUtils.getBaseName(ScanCollectionName) + "_PeakCluster.serFS",
                    false);
            FSTObjectOutput out = new FSTObjectOutput(fout);
            out.writeObject(PeakClusters);
            out.close();
            fout.close();
        } catch (Exception ex) {
            Logger.getRootLogger().error(ExceptionUtils.getStackTrace(ex));
            JavaSerializationPeakClusterWrite();
        }
    }

    private void JavaSerializationPeakClusterWrite() {
        try {
            Logger.getRootLogger().info("Writing PeakCluster serialization to file:"
                    + FilenameUtils.getBaseName(ScanCollectionName) + "_PeakCluster.ser...");
            FileOutputStream fout = new FileOutputStream(
                    FilenameUtils.getFullPath(ParentmzXMLName) + FilenameUtils.getBaseName(ParentmzXMLName)
                            + "_Peak/" + FilenameUtils.getBaseName(ScanCollectionName) + "_PeakCluster.ser",
                    false);
            ObjectOutputStream oos = new ObjectOutputStream(fout);
            oos.writeObject(PeakClusters);
            oos.close();
            fout.close();
        } catch (Exception ex) {
            Logger.getRootLogger().error(ExceptionUtils.getStackTrace(ex));
        }
    }

    public boolean ReadPeakCluster() {
        return ReadPeakClusterSerialization();
    }

    private boolean ReadPeakClusterSerialization() {
        if (!FS_PeakClusterRead()) {
            if (JavaSerializationPeakClusterRead()) {
                return true;
            }
            return false;
        }
        for (PeakCluster cluster : PeakClusters) {
            cluster.CreateLock();
        }
        return true;
    }

    private boolean JavaSerializationPeakClusterRead() {
        if (!new File(FilenameUtils.getFullPath(ParentmzXMLName) + FilenameUtils.getBaseName(ParentmzXMLName)
                + "_Peak/" + FilenameUtils.getBaseName(ScanCollectionName) + "_PeakCluster.ser").exists()) {
            return false;
        }
        try {
            Logger.getRootLogger().info("Reading PeakCluster serialization from file:"
                    + FilenameUtils.getBaseName(ScanCollectionName) + "_PeakCluster.ser...");

            FileInputStream fileIn = new FileInputStream(
                    FilenameUtils.getFullPath(ParentmzXMLName) + FilenameUtils.getBaseName(ParentmzXMLName)
                            + "_Peak/" + FilenameUtils.getBaseName(ScanCollectionName) + "_PeakCluster.ser");
            ObjectInputStream in = new ObjectInputStream(fileIn);
            PeakClusters = (ArrayList<PeakCluster>) in.readObject();
            in.close();
            fileIn.close();
        } catch (Exception ex) {
            Logger.getRootLogger().error(ExceptionUtils.getStackTrace(ex));
            return false;
        }
        return true;
    }

    private boolean FS_PeakClusterRead() {
        if (!new File(FilenameUtils.getFullPath(ParentmzXMLName) + FilenameUtils.getBaseName(ParentmzXMLName)
                + "_Peak/" + FilenameUtils.getBaseName(ScanCollectionName) + "_PeakCluster.serFS").exists()) {
            return false;
        }
        try {
            Logger.getRootLogger().info("Reading PeakCluster serialization from file:"
                    + FilenameUtils.getBaseName(ScanCollectionName) + "_PeakCluster.serFS...");

            FileInputStream fileIn = new FileInputStream(
                    FilenameUtils.getFullPath(ParentmzXMLName) + FilenameUtils.getBaseName(ParentmzXMLName)
                            + "_Peak/" + FilenameUtils.getBaseName(ScanCollectionName) + "_PeakCluster.serFS");
            FSTObjectInput in = new FSTObjectInput(fileIn);
            PeakClusters = (ArrayList<PeakCluster>) in.readObject();
            in.close();
            fileIn.close();
        } catch (Exception ex) {
            Logger.getRootLogger().error(ExceptionUtils.getStackTrace(ex));
            if (FS_PeakClusterRead_Old()) {
                WritePeakClusterSerialization();
                return true;
            }
            Logger.getRootLogger().error(ExceptionUtils.getStackTrace(ex));
            return false;
        }
        return true;
    }

    private boolean FS_PeakClusterRead_Old() {
        if (!new File(FilenameUtils.getFullPath(ParentmzXMLName) + FilenameUtils.getBaseName(ParentmzXMLName)
                + "_Peak/" + FilenameUtils.getBaseName(ScanCollectionName) + "_PeakCluster.serFS").exists()) {
            return false;
        }
        try {
            Logger.getRootLogger().info("Old PeakCluster serialization from file:"
                    + FilenameUtils.getBaseName(ScanCollectionName) + "_PeakCluster.serFS...");

            FileInputStream fileIn = new FileInputStream(
                    FilenameUtils.getFullPath(ParentmzXMLName) + FilenameUtils.getBaseName(ParentmzXMLName)
                            + "_Peak/" + FilenameUtils.getBaseName(ScanCollectionName) + "_PeakCluster.serFS");
            org.nustaq_old.serialization.FSTObjectInput in = new org.nustaq_old.serialization.FSTObjectInput(
                    fileIn);
            PeakClusters = (ArrayList<PeakCluster>) in.readObject();
            in.close();
            fileIn.close();
        } catch (Exception ex) {
            Logger.getRootLogger().error("Old version reader still failed.");
            Logger.getRootLogger().error(ExceptionUtils.getStackTrace(ex));
            return false;
        }
        return true;
    }

    public void ExportPeakClusterRegionTXT() throws IOException {
        FileWriter writer = new FileWriter(FilenameUtils.getFullPath(ScanCollectionName)
                + FilenameUtils.getBaseName(ScanCollectionName) + "_PeakClusterRange.txt");
        for (PeakCluster cluster : PeakClusters) {
            writer.write(cluster.startRT + "\t" + cluster.endRT + "\t" + (cluster.mz[0] + 1f) + "\t"
                    + (cluster.mz[0]) + "\n");
        }
        writer.close();
    }

    public void ClearMonoisotopicPeakOfCluster() {
        for (PeakCluster peak : PeakClusters) {
            peak.IsoPeaksCurves = null;
            peak.MonoIsotopePeak = null;
        }
    }

    public KDTree GetPeakClusterMassSearchTree() {
        if (PeakClusterMassSearchTree == null) {
            Logger.getRootLogger().info("Building PeakCluster Mass-RT KD tree");
            PeakClusterMassSearchTree = new KDTree(2);
            for (int i = 0; i < PeakClusters.size(); i++) {
                PeakCluster peakCluster = PeakClusters.get(i);
                try {
                    PeakClusterMassSearchTree.insert(
                            new double[] { peakCluster.PeakHeightRT[0], peakCluster.NeutralMass() }, peakCluster);
                } catch (KeyDuplicateException | KeySizeException ex) {
                    Logger.getRootLogger().error(ex.getMessage());
                }
            }
        }
        return PeakClusterMassSearchTree;
    }

    public KDTree GetPeakCurveSearchTree() {
        if (PeakCurveSearchTree == null) {
            Logger.getRootLogger().info("Building PeakCurve Mass-RT KD tree");
            PeakCurveSearchTree = new KDTree(2);
            for (PeakCurve peakCurve : UnSortedPeakCurves) {
                try {
                    PeakCurveSearchTree.insert(new double[] { peakCurve.ApexRT, peakCurve.TargetMz }, peakCurve);
                } catch (KeySizeException ex) {
                    java.util.logging.Logger.getLogger(LCMSPeakBase.class.getName()).log(Level.SEVERE, null, ex);
                } catch (KeyDuplicateException ex) {
                    java.util.logging.Logger.getLogger(LCMSPeakBase.class.getName()).log(Level.SEVERE, null, ex);
                }
            }
        }
        return PeakCurveSearchTree;
    }

    public SortedCurveCollectionApexRT GetPeakCurveListRT() {
        if (PeakCurveListRT == null) {
            PeakCurveListRT = new SortedCurveCollectionApexRT();
            PeakCurveListRT.addAll(UnSortedPeakCurves);
            PeakCurveListRT.Finalize();
        }
        return PeakCurveListRT;
    }
}