com.act.lcms.db.analysis.FeedingAnalysis.java Source code

Java tutorial

Introduction

Here is the source code for com.act.lcms.db.analysis.FeedingAnalysis.java

Source

/*************************************************************************
*                                                                        *
*  This file is part of the 20n/act project.                             *
*  20n/act enables DNA prediction for synthetic biology/bioengineering.  *
*  Copyright (C) 2017 20n Labs, Inc.                                     *
*                                                                        *
*  Please direct all queries to act@20n.com.                             *
*                                                                        *
*  This program is free software: you can redistribute it and/or modify  *
*  it under the terms of the GNU General Public License as published by  *
*  the Free Software Foundation, either version 3 of the License, or     *
*  (at your option) any later version.                                   *
*                                                                        *
*  This program is distributed in the hope that it will be useful,       *
*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
*  GNU General Public License for more details.                          *
*                                                                        *
*  You should have received a copy of the GNU General Public License     *
*  along with this program.  If not, see <http://www.gnu.org/licenses/>. *
*                                                                        *
*************************************************************************/

package com.act.lcms.db.analysis;

import com.act.lcms.MS1;
import com.act.lcms.db.io.DB;
import com.act.lcms.db.io.LoadPlateCompositionIntoDB;
import com.act.lcms.db.model.CuratedChemical;
import com.act.lcms.db.model.FeedingLCMSWell;
import com.act.lcms.db.model.MS1ScanForWellAndMassCharge;
import com.act.lcms.db.model.Plate;
import com.act.lcms.db.model.ScanFile;
import com.act.lcms.plotter.WriteAndPlotMS1Results;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;

import java.io.File;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class FeedingAnalysis {
    public static final String DEFAULT_ION = "M+H";

    public static final String OPTION_DIRECTORY = "d";
    public static final String OPTION_OUTPUT_PREFIX = "o";
    public static final String OPTION_FEEDING_STRAIN_OR_CONSTRUCT = "c";
    public static final String OPTION_FEEDING_EXTRACT = "e";
    public static final String OPTION_FEEDING_FED_CHEMICAL = "f";
    public static final String OPTION_ION_NAME = "i";
    public static final String OPTION_PLATE_BARCODE = "b";
    public static final String OPTION_SEARCH_MZ = "m";

    public static final String HELP_MESSAGE = StringUtils
            .join(new String[] { "This class applies the MS1 LCMS analysis to a strain/construct fed ",
                    "different concentrations of a precursors. It takes in a feeding specification ",
                    "plate, strain or construct (MSID or internal name), extract, ion name, mz, ",
                    "and fed chemical and generates a comparative graph of ",
                    "intensity vs. concentration (using max peak inferred from traces) ",
                    "and an overlaid graph of the spectra in the various concentrations." }, "");
    public static final HelpFormatter HELP_FORMATTER = new HelpFormatter();
    static {
        HELP_FORMATTER.setWidth(100);
    }

    public static final List<Option.Builder> OPTION_BUILDERS = new ArrayList<Option.Builder>() {
        {
            add(Option.builder(OPTION_DIRECTORY).argName("directory")
                    .desc("The directory where LCMS analysis results live").hasArg().required()
                    .longOpt("data-dir"));
            add(Option.builder(OPTION_OUTPUT_PREFIX).argName("output prefix")
                    .desc("A prefix for the output data/pdf files").hasArg().required().longOpt("output-prefix"));

            add(Option.builder(OPTION_FEEDING_STRAIN_OR_CONSTRUCT)
                    .desc("Perform a feeding analysis for this strain/construct").hasArg().required()
                    .longOpt("construct-or-strain"));
            add(Option.builder(OPTION_FEEDING_EXTRACT)
                    .desc("Specify the extract for which to perform a feeding analysis").hasArg().required()
                    .longOpt("extract"));
            add(Option.builder(OPTION_FEEDING_FED_CHEMICAL)
                    .desc("Specify the fed chemical group for which to perform the feeding analysis").hasArg()
                    .required().longOpt("fed-chemical"));
            add(Option.builder(OPTION_ION_NAME)
                    .desc(String.format(
                            "An ion of the chemical target for which to compute feeding curves (default is %s)",
                            DEFAULT_ION))
                    .hasArg().longOpt("ion"));
            add(Option.builder(OPTION_PLATE_BARCODE)
                    .desc("The barcode of the plate from which to extract feeding data").hasArg().required()
                    .longOpt("plate-barcode"));
            add(Option.builder(OPTION_SEARCH_MZ).desc(
                    "A m/z value or chemical name to search for in the feeding data; default is the construct's target")
                    .hasArg().longOpt("search-chem"));
        }
    };
    static {
        // Add DB connection options.
        OPTION_BUILDERS.addAll(DB.DB_OPTION_BUILDERS);
    }

    private static void performFeedingAnalysis(DB db, String lcmsDir, String searchIon, String searchMassStr,
            String plateBarcode, String strainOrConstruct, String extract, String feedingCondition,
            String outPrefix, String fmt) throws SQLException, Exception {
        Plate p = Plate.getPlateByBarcode(db, plateBarcode);
        if (p == null) {
            throw new RuntimeException(String.format("Unable to find plate with barcode %s", plateBarcode));
        }
        if (p.getContentType() != Plate.CONTENT_TYPE.FEEDING_LCMS) {
            throw new RuntimeException(String.format("Plate with barcode %s is not a feeding plate (%s)",
                    plateBarcode, p.getContentType()));
        }
        List<FeedingLCMSWell> allPlateWells = FeedingLCMSWell.getInstance().getFeedingLCMSWellByPlateId(db,
                p.getId());
        if (allPlateWells == null || allPlateWells.size() == 0) {
            throw new RuntimeException(
                    String.format("No feeding LCMS wells available for plate %s", p.getBarcode()));
        }

        List<FeedingLCMSWell> relevantWells = new ArrayList<>();
        for (FeedingLCMSWell well : allPlateWells) {
            if (!well.getMsid().equals(strainOrConstruct) && !well.getComposition().equals(strainOrConstruct)) {
                // Ignore wells that don't have the right strain/construct (though we assume the whole plate shares one).
                continue;
            }

            if (!well.getExtract().equals(extract)) {
                // Filter by extract.
                continue;
            }

            if (!well.getChemical().equals(feedingCondition)) {
                // Filter by fed chemical.
                continue;
            }

            relevantWells.add(well);
        }

        Collections.sort(relevantWells, new Comparator<FeedingLCMSWell>() {
            @Override
            public int compare(FeedingLCMSWell o1, FeedingLCMSWell o2) {
                // Assume concentration is never null.
                return o1.getConcentration().compareTo(o2.getConcentration());
            }
        });

        Map<FeedingLCMSWell, ScanFile> wellsToScanFiles = new HashMap<>();
        Set<String> constructs = new HashSet<>(1);
        for (FeedingLCMSWell well : relevantWells) {
            List<ScanFile> scanFiles = ScanFile.getScanFileByPlateIDRowAndColumn(db, well.getPlateId(),
                    well.getPlateRow(), well.getPlateColumn());
            if (scanFiles == null || scanFiles.size() == 0) {
                System.err.format("WARNING: no scan files for well at %s %s\n", p.getBarcode(),
                        well.getCoordinatesString());
                continue;
            }
            if (scanFiles.size() > 1) {
                System.err.format("WARNING: found multiple scan files for %s %s, using first\n", p.getBarcode(),
                        well.getCoordinatesString());
            }
            while (scanFiles.size() > 0 && scanFiles.get(0).getFileType() != ScanFile.SCAN_FILE_TYPE.NC) {
                scanFiles.remove(0);
            }
            if (scanFiles.size() == 0) {
                System.err.format("WARNING: no scan files with valid format for %s %s\n", p.getBarcode(),
                        well.getCoordinatesString());
                continue;
            }
            // All of the extracted wells should be unique, so there should be no collisions here.
            wellsToScanFiles.put(well, scanFiles.get(0));
            constructs.add(well.getComposition());
        }

        Pair<String, Double> searchMass = null;
        if (searchMassStr != null) {
            searchMass = Utils.extractMassFromString(db, searchMassStr);
        }
        if (searchMass == null) {
            if (constructs.size() != 1) {
                throw new RuntimeException(String.format(
                        "Found multiple growth targets for feeding analysis when no mass specified: %s",
                        StringUtils.join(constructs, ", ")));
            }
            String constructName = constructs.iterator().next();
            CuratedChemical cc = Utils.extractTargetForConstruct(db, constructName);
            if (cc == null) {
                throw new RuntimeException(
                        String.format("Unable to find curated chemical for construct %s", constructName));
            }
            System.out.format("Using target %s of construct %s as search mass (%f)\n", cc.getName(), constructName,
                    cc.getMass());
            searchMass = Pair.of(cc.getName(), cc.getMass());
        }

        MS1 c = new MS1();
        // TODO: use configurable or scan-file derived ion mode. Do it the way its done in:
        // https://github.com/20n/act/blob/d997e84f0f44a5c88a94ef935829cb47e0ca8d1a/reachables/src/main/java/com/act/lcms/db/analysis/AnalysisHelper.java#L79
        MS1.IonMode mode = MS1.IonMode.valueOf("POS");
        Map<String, Double> metlinMasses = c.getIonMasses(searchMass.getValue(), mode);

        if (searchIon == null || searchIon.isEmpty()) {
            System.err.format("No search ion defined, defaulting to M+H\n");
            searchIon = DEFAULT_ION;
        }

        List<Pair<Double, MS1ScanForWellAndMassCharge>> rampUp = new ArrayList<>();
        for (FeedingLCMSWell well : relevantWells) {
            ScanFile scanFile = wellsToScanFiles.get(well);
            if (scanFile == null) {
                System.err.format("WARNING: no scan file available for %s %s", p.getBarcode(),
                        well.getCoordinatesString());
                continue;
            }
            File localScanFile = new File(lcmsDir, scanFile.getFilename());
            if (!localScanFile.exists() && localScanFile.isFile()) {
                System.err.format("WARNING: could not find regular file at expected path: %s\n",
                        localScanFile.getAbsolutePath());
                continue;
            }
            System.out.format("Processing scan data at %s\n", localScanFile.getAbsolutePath());

            MS1ScanForWellAndMassCharge ms1ScanCache = new MS1ScanForWellAndMassCharge();
            MS1ScanForWellAndMassCharge ms1ScanResults = ms1ScanCache
                    .getByPlateIdPlateRowPlateColUseSnrScanFileChemical(db, p, well, true, scanFile,
                            searchMass.getKey(), metlinMasses, localScanFile);

            Double concentration = well.getConcentration();
            rampUp.add(Pair.of(concentration, ms1ScanResults));
        }

        WriteAndPlotMS1Results plotFeedingsResults = new WriteAndPlotMS1Results();
        plotFeedingsResults.plotFeedings(rampUp, searchIon, outPrefix, fmt, outPrefix + ".gnuplot");
    }

    public static void main(String[] args) throws Exception {
        Options opts = new Options();
        for (Option.Builder b : OPTION_BUILDERS) {
            opts.addOption(b.build());
        }

        CommandLine cl = null;
        try {
            CommandLineParser parser = new DefaultParser();
            cl = parser.parse(opts, args);
        } catch (ParseException e) {
            System.err.format("Argument parsing failed: %s\n", e.getMessage());
            HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null,
                    true);
            System.exit(1);
        }

        if (cl.hasOption("help")) {
            HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null,
                    true);
            return;
        }

        File lcmsDir = new File(cl.getOptionValue(OPTION_DIRECTORY));
        if (!lcmsDir.isDirectory()) {
            System.err.format("File at %s is not a directory\n", lcmsDir.getAbsolutePath());
            HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null,
                    true);
            System.exit(1);
        }

        try (DB db = DB.openDBFromCLI(cl)) {
            System.out.format("Loading/updating LCMS scan files into DB\n");
            ScanFile.insertOrUpdateScanFilesInDirectory(db, lcmsDir);

            System.out.format("Running feeding analysis\n");
            performFeedingAnalysis(db, cl.getOptionValue(OPTION_DIRECTORY), cl.getOptionValue(OPTION_ION_NAME),
                    cl.getOptionValue(OPTION_SEARCH_MZ), cl.getOptionValue(OPTION_PLATE_BARCODE),
                    cl.getOptionValue(OPTION_FEEDING_STRAIN_OR_CONSTRUCT),
                    cl.getOptionValue(OPTION_FEEDING_EXTRACT), cl.getOptionValue(OPTION_FEEDING_FED_CHEMICAL),
                    cl.getOptionValue(OPTION_OUTPUT_PREFIX), "pdf");
        }
    }
}