com.act.lcms.db.analysis.IonSearchAnalysis.java Source code

Java tutorial

Introduction

Here is the source code for com.act.lcms.db.analysis.IonSearchAnalysis.java

Source

/*************************************************************************
*                                                                        *
*  This file is part of the 20n/act project.                             *
*  20n/act enables DNA prediction for synthetic biology/bioengineering.  *
*  Copyright (C) 2017 20n Labs, Inc.                                     *
*                                                                        *
*  Please direct all queries to act@20n.com.                             *
*                                                                        *
*  This program is free software: you can redistribute it and/or modify  *
*  it under the terms of the GNU General Public License as published by  *
*  the Free Software Foundation, either version 3 of the License, or     *
*  (at your option) any later version.                                   *
*                                                                        *
*  This program is distributed in the hope that it will be useful,       *
*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
*  GNU General Public License for more details.                          *
*                                                                        *
*  You should have received a copy of the GNU General Public License     *
*  along with this program.  If not, see <http://www.gnu.org/licenses/>. *
*                                                                        *
*************************************************************************/

package com.act.lcms.db.analysis;

import com.act.lcms.Gnuplotter;
import com.act.lcms.MS1;
import com.act.lcms.db.io.DB;
import com.act.lcms.db.io.LoadPlateCompositionIntoDB;
import com.act.lcms.db.model.ChemicalAssociatedWithPathway;
import com.act.lcms.db.model.CuratedChemical;
import com.act.lcms.db.model.LCMSWell;
import com.act.lcms.db.model.Plate;
import com.act.lcms.db.model.ScanFile;
import com.act.lcms.db.model.StandardWell;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;

import java.io.File;
import java.io.FileOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

public class IonSearchAnalysis {
    public static final String OPTION_DIRECTORY = "d";
    public static final String OPTION_OUTPUT_PREFIX = "o";
    public static final String OPTION_STRAINS = "s";
    public static final String OPTION_CONSTRUCTS = "c";
    public static final String OPTION_NEGATIVE_STRAINS = "S";
    public static final String OPTION_NEGATIVE_CONSTRUCTS = "C";
    public static final String OPTION_STANDARD_NAME = "sn";
    public static final String OPTION_STANDARD_PLATE_BARCODE = "sp";
    public static final String OPTION_STANDARD_WELLS = "sw";
    public static final String OPTION_SEARCH_MZ = "m";
    public static final String OPTION_NO_STANDARD = "ns";
    public static final String OPTION_FILTER_BY_PLATE_BARCODE = "p";
    public static final String OPTION_USE_HEATMAP = "e";
    public static final String OPTION_USE_SNR = "r";

    public static final String HELP_MESSAGE = StringUtils
            .join(new String[] { "This class applies the MS1 LCMS analysis to a combination of ",
                    "standards and samples.  Specify positive constructs/strains and negative ",
                    "controls to be analyzed and graphed together.\nStandards will be determined by ",
                    "the positive samples' targets if a standard is not explicitly specified.\n",
                    "An m/z value or chemical for which to search in the LCMS trace data can be ",
                    "explicitly specified; if no search chemical is specified and all positive samples ",
                    "share a single target, that target's mass will be used." }, "");
    public static final HelpFormatter HELP_FORMATTER = new HelpFormatter();

    static {
        HELP_FORMATTER.setWidth(100);
    }

    public static final List<Option.Builder> OPTION_BUILDERS = new ArrayList<Option.Builder>() {
        {
            add(Option.builder(OPTION_DIRECTORY).argName("directory")
                    .desc("The directory where LCMS analysis results live").hasArg().required()
                    .longOpt("data-dir"));
            add(Option.builder(OPTION_OUTPUT_PREFIX).argName("output prefix")
                    .desc("A prefix for the output data/pdf files").hasArg().required().longOpt("output-prefix"));

            add(Option.builder(OPTION_STRAINS).argName("strains")
                    .desc("The msids of the strains to be analyzed (specify only one of 'strain' or 'construct')")
                    .hasArgs().valueSeparator(',').longOpt("msids"));
            add(Option.builder(OPTION_CONSTRUCTS).argName("constructs").desc(
                    "The construct ids (composition) to be analyzed (specify only one of 'strain' or 'construct')")
                    .hasArgs().valueSeparator(',').longOpt("construct-ids"));
            add(Option.builder(OPTION_NEGATIVE_STRAINS).argName("negative-strains")
                    .desc("A strains to use as a negative control (the first novel LCMS sample will be used)")
                    .hasArgs().valueSeparator(',').longOpt("negative-msids"));
            add(Option.builder(OPTION_NEGATIVE_CONSTRUCTS).argName("constructs")
                    .desc("A constructs to use as a negative control (the first novel LCMS sample will be used)")
                    .hasArgs().valueSeparator(',').longOpt("negative-construct-ids"));
            add(Option.builder(OPTION_STANDARD_NAME).argName("standard's chemical name")
                    .desc("The name of the chemical to use as a standard (default will be LCMS wells' target)")
                    .hasArg().longOpt("standard-name"));
            add(Option.builder(OPTION_STANDARD_PLATE_BARCODE).argName("standard plate barcode")
                    .desc("The plate barcode to use when searching for a compatible standard").hasArg().required()
                    .longOpt("standard-plate"));
            add(Option.builder(OPTION_SEARCH_MZ).argName("search chem")
                    .desc("The m/z or chemical name to search for (default will use target of LCMS wells)").hasArg()
                    .longOpt("search-chem"));
            add(Option.builder(OPTION_NO_STANDARD).argName("no standard")
                    .desc("Specifies that the analysis should be completed without a standard")
                    .longOpt("no-standard"));
            add(Option.builder(OPTION_STANDARD_WELLS).argName("well coordinates")
                    .desc("Specifies a specific well or wells in the standard plate to use as the standard sample")
                    .hasArgs().valueSeparator(',').longOpt("standard-wells"));
            add(Option.builder().argName("ion list").desc(
                    "A comma-separated list of ions to include in the search (ions not in this list will be ignored)")
                    .hasArgs().valueSeparator(',').longOpt("include-ions"));
            add(Option.builder().argName("ion list").desc(
                    "A comma-separated list of ions to exclude from the search, takes precedence over include-ions")
                    .hasArgs().valueSeparator(',').longOpt("exclude-ions"));
            add(Option.builder(OPTION_FILTER_BY_PLATE_BARCODE).argName("plate barcode list")
                    .desc("A list of plate barcodes to consider, all other plates will be ignored").hasArgs()
                    .valueSeparator(',').longOpt("include-plates"));
            add(Option.builder(OPTION_USE_HEATMAP).desc("Produce a heat map rather than a 2d line plot")
                    .longOpt("heat-map"));
            // TODO: add filter on SCAN_MODE.
            add(Option.builder(OPTION_USE_SNR)
                    .desc("Use signal-to-noise ratio instead of max intensity for peak identification")
                    .longOpt("use-snr"));

            add(Option.builder().argName("font scale").desc(
                    "A Gnuplot fontscale value, should be between 0.1 and 0.5 (0.4 works if the graph text is large")
                    .hasArg().longOpt("font-scale"));
            add(Option.builder()
                    .desc(String.format(
                            "Use fine-grained M/Z tolerance (%.3f) when conducting the MS1 analysis "
                                    + "instead of default M/Z tolerance %.3f",
                            MS1.MS1_MZ_TOLERANCE_FINE, MS1.MS1_MZ_TOLERANCE_DEFAULT))
                    .longOpt("fine-grained-mz"));

            // Everybody needs a little help from their friends.
            add(Option.builder("h").argName("help").desc("Prints this help message").longOpt("help"));
        }
    };
    static {
        // Add DB connection options.
        OPTION_BUILDERS.addAll(DB.DB_OPTION_BUILDERS);
    }

    public static void main(String[] args) throws Exception {
        Options opts = new Options();
        for (Option.Builder b : OPTION_BUILDERS) {
            opts.addOption(b.build());
        }

        CommandLine cl = null;
        try {
            CommandLineParser parser = new DefaultParser();
            cl = parser.parse(opts, args);
        } catch (ParseException e) {
            System.err.format("Argument parsing failed: %s\n", e.getMessage());
            HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null,
                    true);
            System.exit(1);
        }

        if (cl.hasOption("help")) {
            HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null,
                    true);
            return;
        }

        File lcmsDir = new File(cl.getOptionValue(OPTION_DIRECTORY));
        if (!lcmsDir.isDirectory()) {
            System.err.format("File at %s is not a directory\n", lcmsDir.getAbsolutePath());
            HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null,
                    true);
            System.exit(1);
        }

        Double fontScale = null;
        if (cl.hasOption("font-scale")) {
            try {
                fontScale = Double.parseDouble(cl.getOptionValue("font-scale"));
            } catch (IllegalArgumentException e) {
                System.err.format("Argument for font-scale must be a floating point number.\n");
                System.exit(1);
            }
        }

        try (DB db = DB.openDBFromCLI(cl)) {
            Set<String> includeIons = null;
            if (cl.hasOption("include-ions")) {
                String[] ionNames = cl.getOptionValues("include-ions");
                includeIons = new HashSet<>(Arrays.asList(ionNames));
                System.out.format("Including ions in search: %s\n", StringUtils.join(includeIons, ", "));
            }
            Set<String> excludeIons = null;
            if (cl.hasOption("exclude-ions")) {
                String[] ionNames = cl.getOptionValues("exclude-ions");
                excludeIons = new HashSet<>(Arrays.asList(ionNames));
                System.out.format("Excluding ions from search: %s\n", StringUtils.join(excludeIons, ", "));
            }

            Set<Integer> takeSamplesFromPlateIds = null;
            if (cl.hasOption(OPTION_FILTER_BY_PLATE_BARCODE)) {
                String[] plateBarcodes = cl.getOptionValues(OPTION_FILTER_BY_PLATE_BARCODE);
                System.out.format("Considering only sample wells in plates: %s\n",
                        StringUtils.join(plateBarcodes, ", "));
                takeSamplesFromPlateIds = new HashSet<>(plateBarcodes.length);
                for (String plateBarcode : plateBarcodes) {
                    Plate p = Plate.getPlateByBarcode(db, plateBarcode);
                    if (p == null) {
                        System.err.format("WARNING: unable to find plate in DB with barcode %s\n", plateBarcode);
                    } else {
                        takeSamplesFromPlateIds.add(p.getId());
                    }
                }
                // Allow filtering on barcode even if we couldn't find any in the DB.
            }

            System.out.format("Loading/updating LCMS scan files into DB\n");
            ScanFile.insertOrUpdateScanFilesInDirectory(db, lcmsDir);

            System.out.format("Processing LCMS scans\n");
            Pair<List<LCMSWell>, Set<Integer>> positiveWellsAndPlateIds = Utils.extractWellsAndPlateIds(db,
                    cl.getOptionValues(OPTION_STRAINS), cl.getOptionValues(OPTION_CONSTRUCTS),
                    takeSamplesFromPlateIds, false);
            List<LCMSWell> positiveWells = positiveWellsAndPlateIds.getLeft();
            if (positiveWells.size() == 0) {
                throw new RuntimeException("Found no LCMS wells for specified strains/constructs");
            }
            // Only take negative samples from the plates where we found the positive samples.
            Pair<List<LCMSWell>, Set<Integer>> negativeWellsAndPlateIds = Utils.extractWellsAndPlateIds(db,
                    cl.getOptionValues(OPTION_NEGATIVE_STRAINS), cl.getOptionValues(OPTION_NEGATIVE_CONSTRUCTS),
                    positiveWellsAndPlateIds.getRight(), true);
            List<LCMSWell> negativeWells = negativeWellsAndPlateIds.getLeft();
            if (negativeWells == null || negativeWells.size() == 0) {
                System.err.format("WARNING: no valid negative samples found in same plates as positive samples\n");
            }

            // Extract the reference MZ that will be used in the LCMS trace processing.
            List<Pair<String, Double>> searchMZs = null;
            Set<CuratedChemical> standardChemicals = null;
            List<ChemicalAssociatedWithPathway> pathwayChems = null;
            if (cl.hasOption(OPTION_SEARCH_MZ)) {
                // Assume mz can be an FP number of a chemical name.
                String massStr = cl.getOptionValue(OPTION_SEARCH_MZ);
                Pair<String, Double> searchMZ = Utils.extractMassFromString(db, massStr);
                if (searchMZ != null) {
                    searchMZs = Collections.singletonList(searchMZ);
                }
                standardChemicals = Utils.extractTargetsForWells(db, positiveWells);
            } else {
                CuratedChemical targetChemical = Utils.requireOneTarget(db, positiveWells);
                if (targetChemical == null) {
                    throw new RuntimeException(
                            "Unable to find a curated chemical entry for specified strains'/constructs' targets.  "
                                    + "Please specify a chemical name or m/z explicitly or update the curated chemicals list in the DB.");
                }
                System.out.format("Using reference M/Z for positive target %s (%f)\n", targetChemical.getName(),
                        targetChemical.getMass());
                searchMZs = Collections.singletonList(Pair.of(targetChemical.getName(), targetChemical.getMass()));
                standardChemicals = Collections.singleton(targetChemical);
            }

            // Look up the standard by name, or use the target if none is specified.
            List<StandardWell> standardWells = null;
            if (cl.hasOption(OPTION_NO_STANDARD)) {
                System.err.format("WARNING: skipping standard comparison (no-standard option specified)\n");
                standardWells = new ArrayList<>(0);
            } else if (cl.hasOption(OPTION_STANDARD_WELLS)) {
                String[] standardCoordinates = cl.getOptionValues(OPTION_STANDARD_WELLS);
                standardWells = new ArrayList<>(standardCoordinates.length);
                Plate standardPlate = Plate.getPlateByBarcode(db, cl.getOptionValue(OPTION_STANDARD_PLATE_BARCODE));
                List<String> foundCoordinates = new ArrayList<>(standardCoordinates.length);
                for (String stringCoords : standardCoordinates) {
                    Pair<Integer, Integer> coords = Utils.parsePlateCoordinates(stringCoords);
                    StandardWell well = StandardWell.getInstance().getStandardWellsByPlateIdAndCoordinates(db,
                            standardPlate.getId(), coords.getLeft(), coords.getRight());
                    if (well == null) {
                        System.err.format("Unable to find standard well at %s [%s]\n", standardPlate.getBarcode(),
                                stringCoords);
                        continue;
                    }
                    standardWells.add(well);
                    foundCoordinates.add(stringCoords);
                }
                System.out.format("Using explicitly specified standard wells %s [%s]\n", standardPlate.getBarcode(),
                        StringUtils.join(foundCoordinates, ", "));
            } else if (cl.hasOption(OPTION_STANDARD_NAME)) {
                String standardName = cl.getOptionValue(OPTION_STANDARD_NAME);
                System.out.format("Using explicitly specified standard %s\n", standardName);
                standardWells = Collections.singletonList(Utils.extractStandardWellFromPlate(db,
                        cl.getOptionValue(OPTION_STANDARD_PLATE_BARCODE), standardName));
            } else if (standardChemicals != null && standardChemicals.size() > 0) {
                // Default to using the target chemical(s) as a standard if none is specified.
                standardWells = new ArrayList<>(standardChemicals.size());
                for (CuratedChemical c : standardChemicals) {
                    String standardName = c.getName();
                    System.out.format("Searching for well containing standard %s\n", standardName);
                    standardWells.add(Utils.extractStandardWellFromPlate(db,
                            cl.getOptionValue(OPTION_STANDARD_PLATE_BARCODE), standardName));
                }
            }

            boolean useFineGrainedMZ = cl.hasOption("fine-grained-mz");
            boolean useSNR = cl.hasOption(OPTION_USE_SNR);

            /* Process the standard, positive, and negative wells, producing ScanData containers that will allow them to be
             * iterated over for graph writing. */
            HashMap<Integer, Plate> plateCache = new HashMap<>();
            Pair<List<ScanData<StandardWell>>, Double> allStandardScans = AnalysisHelper.processScans(db, lcmsDir,
                    searchMZs, ScanData.KIND.STANDARD, plateCache, standardWells, useFineGrainedMZ, includeIons,
                    excludeIons, useSNR);
            Pair<List<ScanData<LCMSWell>>, Double> allPositiveScans = AnalysisHelper.processScans(db, lcmsDir,
                    searchMZs, ScanData.KIND.POS_SAMPLE, plateCache, positiveWells, useFineGrainedMZ, includeIons,
                    excludeIons, useSNR);
            Pair<List<ScanData<LCMSWell>>, Double> allNegativeScans = AnalysisHelper.processScans(db, lcmsDir,
                    searchMZs, ScanData.KIND.NEG_CONTROL, plateCache, negativeWells, useFineGrainedMZ, includeIons,
                    excludeIons, useSNR);

            String fmt = "pdf";
            String outImg = cl.getOptionValue(OPTION_OUTPUT_PREFIX) + "." + fmt;
            String outData = cl.getOptionValue(OPTION_OUTPUT_PREFIX) + ".data";
            System.err.format("Writing combined scan data to %s and graphs to %s\n", outData, outImg);

            produceLCMSSearchPlots(lcmsDir, outData, outImg, allStandardScans, allPositiveScans, allNegativeScans,
                    fontScale, useFineGrainedMZ, cl.hasOption(OPTION_USE_HEATMAP), useSNR);
        }
    }

    public static void produceLCMSSearchPlots(File lcmsDir, String outData, String outImg,
            Pair<List<ScanData<StandardWell>>, Double> allStandardScans,
            Pair<List<ScanData<LCMSWell>>, Double> allPositiveScans,
            Pair<List<ScanData<LCMSWell>>, Double> allNegativeScans, Double fontScale, boolean useFineGrainedMZ,
            boolean makeHeatmaps, boolean useSNR) throws Exception {
        List<ScanData> allScanData = new ArrayList<ScanData>() {
            {
                addAll(allStandardScans.getLeft());
                addAll(allPositiveScans.getLeft());
                addAll(allNegativeScans.getLeft());
            }
        };
        // Get the global maximum intensity across all scans.
        Double maxIntensity = Math.max(allStandardScans.getRight(),
                Math.max(allPositiveScans.getRight(), allNegativeScans.getRight()));
        System.out.format("Processing LCMS scans for graphing:\n");
        for (ScanData scanData : allScanData) {
            System.out.format("  %s\n", scanData.toString());
        }

        String fmt = "pdf";
        System.err.format("Writing combined scan data to %s and graphs to %s\n", outData, outImg);

        // Generate the data file and graphs.
        try (FileOutputStream fos = new FileOutputStream(outData)) {
            // Write all the scan data out to a single data file.
            List<String> graphLabels = new ArrayList<>();
            for (ScanData scanData : allScanData) {
                graphLabels.addAll(
                        AnalysisHelper.writeScanData(fos, lcmsDir, maxIntensity, scanData, makeHeatmaps, true));
            }

            Gnuplotter plotter = fontScale == null ? new Gnuplotter() : new Gnuplotter(fontScale);
            if (makeHeatmaps) {
                plotter.plotHeatmap(outData, outImg, graphLabels.toArray(new String[graphLabels.size()]),
                        maxIntensity, fmt);
            } else {
                plotter.plot2D(outData, outImg, graphLabels.toArray(new String[graphLabels.size()]), "time",
                        maxIntensity, "intensity", fmt);
            }
        }
    }
}