com.act.lcms.ExtractFromNetCDFAroundMass.java Source code

Java tutorial

Introduction

Here is the source code for com.act.lcms.ExtractFromNetCDFAroundMass.java

Source

/*************************************************************************
*                                                                        *
*  This file is part of the 20n/act project.                             *
*  20n/act enables DNA prediction for synthetic biology/bioengineering.  *
*  Copyright (C) 2017 20n Labs, Inc.                                     *
*                                                                        *
*  Please direct all queries to act@20n.com.                             *
*                                                                        *
*  This program is free software: you can redistribute it and/or modify  *
*  it under the terms of the GNU General Public License as published by  *
*  the Free Software Foundation, either version 3 of the License, or     *
*  (at your option) any later version.                                   *
*                                                                        *
*  This program is distributed in the hope that it will be useful,       *
*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
*  GNU General Public License for more details.                          *
*                                                                        *
*  You should have received a copy of the GNU General Public License     *
*  along with this program.  If not, see <http://www.gnu.org/licenses/>. *
*                                                                        *
*************************************************************************/

package com.act.lcms;

import java.util.List;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.commons.lang3.tuple.Triple;
import java.io.PrintStream;
import java.io.OutputStream;
import java.io.FileOutputStream;

public class ExtractFromNetCDFAroundMass {

    /**
     * Extracts a window around a particular m/z value within the spectra
     * @param file The netCDF file with the full LCMS (lockmass corrected) spectra
     * @param mz The m/z value +- 1 around which to extract data for
     * @param numTimepointsToExamine Since we stream the netCDF in, we can choose
     *                               to look at the first few timepoints or -1 for all
     */
    public List<Triple<Double, Double, Double>> get2DWindow(String file, double mz, int numTimepointsToExamine)
            throws Exception {
        LCMSParser parser = new LCMSNetCDFParser();
        Iterator<LCMSSpectrum> iter = parser.getIterator(file);
        List<Triple<Double, Double, Double>> window = new ArrayList<>();
        int pulled = 0;
        Double mzTightL = mz - 0.1;
        Double mzTightR = mz + 0.1;
        Double mzMinus1Da = mz - 1;
        Double mzPlus1Da = mz + 1;

        // iterate through first few, or all if -1 given
        while (iter.hasNext() && (numTimepointsToExamine == -1 || pulled < numTimepointsToExamine)) {
            LCMSSpectrum timepoint = iter.next();

            List<Pair<Double, Double>> intensities = timepoint.getIntensities();

            // this time point is valid to look at if its max intensity is around
            // the mass we care about. So lets first get the max peak location
            Pair<Double, Double> max_peak = findMaxPeak(intensities);
            Double max_mz = max_peak.getLeft();

            // If the max_mz value is pretty close to our target mass, ie in [mzTightL, mzTightR]
            // Then this timepoint is a good timepoint to output... proceed, shall we.
            if (max_mz >= mzTightL && max_mz <= mzTightR) {

                // For this timepoint, output a window
                for (int k = 0; k < intensities.size(); k++) {
                    Double mz_here = intensities.get(k).getLeft();
                    Double intensity = intensities.get(k).getRight();

                    // The window not as tight, but +-1 Da around the target mass
                    if (mz_here > mzMinus1Da && mz_here < mzPlus1Da) {
                        window.add(Triple.of(timepoint.getTimeVal(), mz_here, intensity));
                    }
                }
            }
            pulled++;
        }
        return window;
    }

    private Pair<Double, Double> findMaxPeak(List<Pair<Double, Double>> raw) {
        // the intensity is the second value in the pairs
        // this finds the pair with the max intensity 

        Pair<Double, Double> max = null;
        for (Pair<Double, Double> mz_int : raw) {
            if (max == null || max.getRight() < mz_int.getRight())
                max = mz_int;
        }
        return max;
    }

    public static void main(String[] args) throws Exception {
        if (args.length != 4 || !args[0].endsWith(".nc")) {
            throw new RuntimeException(
                    "Needs (1) NetCDF .nc file, " + "(2) mass value, e.g., 132.0772 for debugging, "
                            + "(3) how many timepoints to process (-1 for all), "
                            + "(4) prefix for .data and rendered .pdf, '-' for stdout");
        }

        String netCDF = args[0];
        Double mz = Double.parseDouble(args[1]);
        Integer numSpectraToProcess = Integer.parseInt(args[2]);
        String outPrefix = args[3];
        String outPDF = outPrefix.equals("-") ? null : outPrefix + ".pdf";
        String outDATA = outPrefix.equals("-") ? null : outPrefix + ".data";

        ExtractFromNetCDFAroundMass e = new ExtractFromNetCDFAroundMass();
        List<Triple<Double, Double, Double>> window = e.get2DWindow(netCDF, mz, numSpectraToProcess);

        // Write data output to outfile
        PrintStream whereTo = outDATA == null ? System.out : new PrintStream(new FileOutputStream(outDATA));
        for (Triple<Double, Double, Double> xyz : window) {
            whereTo.format("%.4f\t%.4f\t%.4f\n", xyz.getLeft(), xyz.getMiddle(), xyz.getRight());
            whereTo.flush();
        }

        if (outDATA != null) {
            // if outDATA is != null, then we have written to .data file
            // now render the .data to the corresponding .pdf file

            // first close the .data
            whereTo.close();

            // render outDATA to outPDF using gnuplo
            Gnuplotter plotter = new Gnuplotter();
            plotter.plot3D(outDATA, outPDF, netCDF, mz);
        }
    }
}