ms1quant.MS1Quant.java Source code

Java tutorial

Introduction

Here is the source code for ms1quant.MS1Quant.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package ms1quant;

import MSUmpire.BaseDataStructure.InstrumentParameter;
import MSUmpire.BaseDataStructure.UmpireInfo;
import MSUmpire.LCMSPeakStructure.LCMSPeakMS1;
import MSUmpire.BaseDataStructure.DBSearchParam;
import MSUmpire.BaseDataStructure.TandemParam;
import MSUmpire.PSMDataStructure.LCMSID;
import MSUmpire.PSMDataStructure.PTMManager;
import MSUmpire.SearchResultParser.PepXMLParser;
import MSUmpire.Utility.ConsoleLogger;
import java.io.*;
import java.util.Arrays;
import java.util.HashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;

/**
 *
 * @author Chih-Chiang Tsou <chihchiang.tsou@gmail.com>
 */
public class MS1Quant {

    /**
     * @param args the command line arguments MS1Quant parameterfile
     */
    public static void main(String[] args) throws Exception {

        BufferedReader reader = null;
        try {
            System.out.println(
                    "=================================================================================================");
            System.out.println("Umpire MS1 quantification and feature detection analysis (version: "
                    + UmpireInfo.GetInstance().Version + ")");
            if (args.length < 3 || !args[1].startsWith("-mode")) {
                System.out
                        .println("command : java -jar -Xmx10G MS1Quant.jar ms1quant.params -mode[1 or 2] [Option]");
                System.out.println("\n-mode");
                System.out.println("\t1:Single file mode--> mzXML_file PepXML_file");
                System.out.println("\t\tEx: -mode1 file1.mzXML file1.pep.xml");
                System.out.println(
                        "\t2:Folder mode--> mzXML_Folder PepXML_Folder, all generated csv tables will be merged into a single csv file");
                System.out.println("\t\tEx: -mode2 /data/mzxml/ /data/pepxml/");
                System.out.println("\nOptions");
                System.out.println(
                        "\t-C\tNo of concurrent files to be processed (only for folder mode), Ex. -C5, default:1");
                System.out.println("\t-p\tMinimum probability, Ex. -p0.9, default:0.9");
                System.out.println("\t-ID\tDetect identified feature only");
                System.out.println("\t-O\toutput folder, Ex. -O/data/");
                return;
            }
            ConsoleLogger consoleLogger = new ConsoleLogger();
            consoleLogger.SetConsoleLogger(Level.DEBUG);
            consoleLogger.SetFileLogger(Level.DEBUG, FilenameUtils.getFullPath(args[0]) + "ms1quant_debug.log");
            Logger logger = Logger.getRootLogger();
            logger.debug("Command: " + Arrays.toString(args));
            logger.info("MS1Quant version: " + UmpireInfo.GetInstance().Version);

            String parameterfile = args[0];
            logger.info("Parameter file: " + parameterfile);
            File paramfile = new File(parameterfile);
            if (!paramfile.exists()) {
                logger.error("Parameter file " + paramfile.getAbsolutePath()
                        + " cannot be found. The program will exit.");
            }

            reader = new BufferedReader(new FileReader(paramfile.getAbsolutePath()));
            String line = "";
            InstrumentParameter param = new InstrumentParameter(InstrumentParameter.InstrumentType.TOF5600);
            int NoCPUs = 2;
            int NoFile = 1;
            param.DetermineBGByID = false;
            param.EstimateBG = true;

            //<editor-fold defaultstate="collapsed" desc="Read parameter file">
            while ((line = reader.readLine()) != null) {
                if (!"".equals(line) && !line.startsWith("#")) {
                    logger.info(line);
                    //System.out.println(line);
                    if (line.split("=").length < 2) {
                        continue;
                    }
                    if (line.split("=").length < 2) {
                        continue;
                    }
                    String type = line.split("=")[0].trim();
                    if (type.startsWith("para.")) {
                        type = type.replace("para.", "SE.");
                    }
                    String value = line.split("=")[1].trim();
                    switch (type) {
                    case "Thread": {
                        NoCPUs = Integer.parseInt(value);
                        break;
                    }
                    //<editor-fold defaultstate="collapsed" desc="instrument parameters">

                    case "SE.MS1PPM": {
                        param.MS1PPM = Float.parseFloat(value);
                        break;
                    }
                    case "SE.MS2PPM": {
                        param.MS2PPM = Float.parseFloat(value);
                        break;
                    }
                    case "SE.SN": {
                        param.SNThreshold = Float.parseFloat(value);
                        break;
                    }
                    case "SE.MS2SN": {
                        param.MS2SNThreshold = Float.parseFloat(value);
                        break;
                    }
                    case "SE.MinMSIntensity": {
                        param.MinMSIntensity = Float.parseFloat(value);
                        break;
                    }
                    case "SE.MinMSMSIntensity": {
                        param.MinMSMSIntensity = Float.parseFloat(value);
                        break;
                    }
                    case "SE.MinRTRange": {
                        param.MinRTRange = Float.parseFloat(value);
                        break;
                    }
                    case "SE.MaxNoPeakCluster": {
                        param.MaxNoPeakCluster = Integer.parseInt(value);
                        param.MaxMS2NoPeakCluster = Integer.parseInt(value);
                        break;
                    }
                    case "SE.MinNoPeakCluster": {
                        param.MinNoPeakCluster = Integer.parseInt(value);
                        param.MinMS2NoPeakCluster = Integer.parseInt(value);
                        break;
                    }
                    case "SE.MinMS2NoPeakCluster": {
                        param.MinMS2NoPeakCluster = Integer.parseInt(value);
                        break;
                    }
                    case "SE.MaxCurveRTRange": {
                        param.MaxCurveRTRange = Float.parseFloat(value);
                        break;
                    }
                    case "SE.Resolution": {
                        param.Resolution = Integer.parseInt(value);
                        break;
                    }
                    case "SE.RTtol": {
                        param.RTtol = Float.parseFloat(value);
                        break;
                    }
                    case "SE.NoPeakPerMin": {
                        param.NoPeakPerMin = Integer.parseInt(value);
                        break;
                    }
                    case "SE.StartCharge": {
                        param.StartCharge = Integer.parseInt(value);
                        break;
                    }
                    case "SE.EndCharge": {
                        param.EndCharge = Integer.parseInt(value);
                        break;
                    }
                    case "SE.MS2StartCharge": {
                        param.MS2StartCharge = Integer.parseInt(value);
                        break;
                    }
                    case "SE.MS2EndCharge": {
                        param.MS2EndCharge = Integer.parseInt(value);
                        break;
                    }
                    case "SE.NoMissedScan": {
                        param.NoMissedScan = Integer.parseInt(value);
                        break;
                    }
                    case "SE.Denoise": {
                        param.Denoise = Boolean.valueOf(value);
                        break;
                    }
                    case "SE.EstimateBG": {
                        param.EstimateBG = Boolean.valueOf(value);
                        break;
                    }
                    case "SE.RemoveGroupedPeaks": {
                        param.RemoveGroupedPeaks = Boolean.valueOf(value);
                        break;
                    }
                    case "SE.MinFrag": {
                        param.MinFrag = Integer.parseInt(value);
                        break;
                    }
                    case "SE.IsoPattern": {
                        param.IsoPattern = Float.valueOf(value);
                        break;
                    }
                    case "SE.StartRT": {
                        param.startRT = Float.valueOf(value);
                    }
                    case "SE.EndRT": {
                        param.endRT = Float.valueOf(value);
                    }

                    //</editor-fold>
                    }
                }
            }
            //</editor-fold>

            int mode = 1;
            if (args[1].equals("-mode2")) {
                mode = 2;
            } else if (args[1].equals("-mode1")) {
                mode = 1;
            } else {
                logger.error("-mode number not recongized. The program will exit.");
            }

            String mzXML = "";
            String pepXML = "";
            String mzXMLPath = "";
            String pepXMLPath = "";
            File mzXMLfile = null;
            File pepXMLfile = null;
            File mzXMLfolder = null;
            File pepXMLfolder = null;
            int idx = 0;
            if (mode == 1) {
                mzXML = args[2];
                logger.info("Mode1 mzXML file: " + mzXML);
                mzXMLfile = new File(mzXML);
                if (!mzXMLfile.exists()) {
                    logger.error("Mode1 mzXML file " + mzXMLfile.getAbsolutePath()
                            + " cannot be found. The program will exit.");
                    return;
                }
                pepXML = args[3];
                logger.info("Mode1 pepXML file: " + pepXML);
                pepXMLfile = new File(pepXML);
                if (!pepXMLfile.exists()) {
                    logger.error("Mode1 pepXML file " + pepXMLfile.getAbsolutePath()
                            + " cannot be found. The program will exit.");
                    return;
                }
                idx = 4;
            } else if (mode == 2) {
                mzXMLPath = args[2];
                logger.info("Mode2 mzXML folder: " + mzXMLPath);
                mzXMLfolder = new File(mzXMLPath);
                if (!mzXMLfolder.exists()) {
                    logger.error("Mode2 mzXML folder " + mzXMLfolder.getAbsolutePath()
                            + " does not exist. The program will exit.");
                    return;
                }
                pepXMLPath = args[3];
                logger.info("Mode2 pepXML folder: " + pepXMLPath);
                pepXMLfolder = new File(pepXMLPath);
                if (!pepXMLfolder.exists()) {
                    logger.error("Mode2 pepXML folder " + pepXMLfolder.getAbsolutePath()
                            + " does not exist. The program will exit.");
                    return;
                }
                idx = 4;
            }

            String outputfolder = "";
            float MinProb = 0f;
            for (int i = idx; i < args.length; i++) {
                if (args[i].startsWith("-")) {
                    if (args[i].equals("-ID")) {
                        param.TargetIDOnly = true;
                        logger.info("Detect ID feature only: true");
                    }
                    if (args[i].startsWith("-O")) {
                        outputfolder = args[i].substring(2);
                        logger.info("Output folder: " + outputfolder);

                        File outputfile = new File(outputfolder);
                        if (!outputfolder.endsWith("\\") | outputfolder.endsWith("/")) {
                            outputfolder += "/";
                        }
                        if (!outputfile.exists()) {
                            outputfile.mkdir();
                        }
                    }
                    if (args[i].startsWith("-C")) {
                        try {
                            NoFile = Integer.parseInt(args[i].substring(2));
                            logger.info("No of concurrent files: " + NoFile);
                        } catch (Exception ex) {
                            logger.error(args[i]
                                    + " is not a correct integer format, will process only one file at a time.");
                        }
                    }
                    if (args[i].startsWith("-p")) {
                        try {
                            MinProb = Float.parseFloat(args[i].substring(2));
                            logger.info("probability threshold: " + MinProb);
                        } catch (Exception ex) {
                            logger.error(args[i] + " is not a correct format, will use 0 as threshold instead.");
                        }
                    }
                }
            }

            reader.close();
            TandemParam tandemparam = new TandemParam(DBSearchParam.SearchInstrumentType.TOF5600);
            PTMManager.GetInstance();

            if (param.TargetIDOnly) {
                param.EstimateBG = false;
                param.ApexDelta = 1.5f;
                param.NoMissedScan = 10;
                param.MiniOverlapP = 0.2f;
                param.RemoveGroupedPeaks = false;
                param.CheckMonoIsotopicApex = false;
                param.DetectByCWT = false;
                param.FillGapByBK = false;
                param.IsoCorrThreshold = -1f;
                param.SmoothFactor = 3;
            }

            if (mode == 1) {
                logger.info("Processing " + mzXMLfile.getAbsolutePath() + "....");
                long time = System.currentTimeMillis();
                LCMSPeakMS1 LCMS1 = new LCMSPeakMS1(mzXMLfile.getAbsolutePath(), NoCPUs);
                LCMS1.SetParameter(param);

                LCMS1.Resume = false;
                if (!param.TargetIDOnly) {
                    LCMS1.CreatePeakFolder();
                }
                LCMS1.ExportPeakClusterTable = true;

                if (pepXMLfile.exists()) {
                    tandemparam.InteractPepXMLPath = pepXMLfile.getAbsolutePath();
                    LCMS1.ParsePepXML(tandemparam, MinProb);
                    logger.info("No. of PSMs included: " + LCMS1.IDsummary.PSMList.size());
                    logger.info("No. of Peptide ions included: " + LCMS1.IDsummary.GetPepIonList().size());
                }

                if (param.TargetIDOnly) {
                    LCMS1.SaveSerializationFile = false;
                }

                if (param.TargetIDOnly || !LCMS1.ReadPeakCluster()) {
                    LCMS1.PeakClusterDetection();
                }

                if (pepXMLfile.exists()) {
                    LCMS1.AssignQuant(false);
                    LCMS1.IDsummary.ExportPepID(outputfolder);
                }
                time = System.currentTimeMillis() - time;
                logger.info(LCMS1.ParentmzXMLName + " processed time:"
                        + String.format("%d hour, %d min, %d sec", TimeUnit.MILLISECONDS.toHours(time),
                                TimeUnit.MILLISECONDS.toMinutes(time)
                                        - TimeUnit.HOURS.toMinutes(TimeUnit.MILLISECONDS.toHours(time)),
                                TimeUnit.MILLISECONDS.toSeconds(time)
                                        - TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(time))));
                LCMS1.BaseClearAllPeaks();
                LCMS1.SetSpectrumParser(null);
                LCMS1.IDsummary = null;
                LCMS1 = null;
                System.gc();
            } else if (mode == 2) {

                LCMSID IDsummary = new LCMSID("", "", "");
                logger.info("Parsing all pepXML files in " + pepXMLPath + "....");
                for (File file : pepXMLfolder.listFiles()) {
                    if (file.getName().toLowerCase().endsWith("pep.xml")
                            || file.getName().toLowerCase().endsWith("pepxml")) {
                        PepXMLParser pepXMLParser = new PepXMLParser(IDsummary, file.getAbsolutePath(), MinProb);
                    }
                }
                HashMap<String, LCMSID> LCMSIDMap = IDsummary.GetLCMSIDFileMap();

                ExecutorService executorPool = null;
                executorPool = Executors.newFixedThreadPool(NoFile);

                logger.info("Processing all mzXML files in " + mzXMLPath + "....");
                for (File file : mzXMLfolder.listFiles()) {
                    if (file.getName().toLowerCase().endsWith("mzxml")) {
                        LCMSID id = LCMSIDMap.get(FilenameUtils.getBaseName(file.getName()));
                        if (id == null || id.PSMList == null) {
                            logger.warn("No IDs found in :" + FilenameUtils.getBaseName(file.getName())
                                    + ". Quantification for this file is skipped");
                            continue;
                        }
                        if (!id.PSMList.isEmpty()) {
                            MS1TargetQuantThread thread = new MS1TargetQuantThread(file, id, NoCPUs, outputfolder,
                                    param);
                            executorPool.execute(thread);
                        }
                    }
                }
                LCMSIDMap.clear();
                LCMSIDMap = null;
                IDsummary = null;
                executorPool.shutdown();
                try {
                    executorPool.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
                } catch (InterruptedException e) {
                    logger.info("interrupted..");
                }

                if (outputfolder == null | outputfolder.equals("")) {
                    outputfolder = mzXMLPath;
                }

                logger.info("Merging PSM files..");
                File output = new File(outputfolder);
                FileWriter writer = new FileWriter(output.getAbsolutePath() + "/PSM_merge.csv");
                boolean header = false;
                for (File csvfile : output.listFiles()) {
                    if (csvfile.getName().toLowerCase().endsWith("_psms.csv")) {
                        BufferedReader outreader = new BufferedReader(new FileReader(csvfile));
                        String outline = outreader.readLine();
                        if (!header) {
                            writer.write(outline + "\n");
                            header = true;
                        }
                        while ((outline = outreader.readLine()) != null) {
                            writer.write(outline + "\n");
                        }
                        outreader.close();
                        csvfile.delete();
                    }
                }
                writer.close();
            }
            logger.info("MS1 quant module is complete.");
        } catch (Exception e) {
            Logger.getRootLogger().error(ExceptionUtils.getStackTrace(e));
            throw e;
        }
    }
}