com.act.lcms.db.io.ExportStandardIonResultsFromDB.java Source code

Java tutorial

Introduction

Here is the source code for com.act.lcms.db.io.ExportStandardIonResultsFromDB.java

Source

/*************************************************************************
*                                                                        *
*  This file is part of the 20n/act project.                             *
*  20n/act enables DNA prediction for synthetic biology/bioengineering.  *
*  Copyright (C) 2017 20n Labs, Inc.                                     *
*                                                                        *
*  Please direct all queries to act@20n.com.                             *
*                                                                        *
*  This program is free software: you can redistribute it and/or modify  *
*  it under the terms of the GNU General Public License as published by  *
*  the Free Software Foundation, either version 3 of the License, or     *
*  (at your option) any later version.                                   *
*                                                                        *
*  This program is distributed in the hope that it will be useful,       *
*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
*  GNU General Public License for more details.                          *
*                                                                        *
*  You should have received a copy of the GNU General Public License     *
*  along with this program.  If not, see <http://www.gnu.org/licenses/>. *
*                                                                        *
*************************************************************************/

package com.act.lcms.db.io;

import com.act.lcms.Gnuplotter;
import com.act.lcms.XZ;
import com.act.lcms.db.analysis.AnalysisHelper;
import com.act.lcms.db.analysis.ScanData;
import com.act.lcms.db.analysis.Utils;
import com.act.lcms.db.model.ChemicalAssociatedWithPathway;
import com.act.lcms.db.model.LCMSWell;
import com.act.lcms.db.model.StandardIonResult;
import com.act.lcms.db.model.StandardWell;
import com.act.utils.TSVWriter;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;

import java.io.File;
import java.io.FileOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

public class ExportStandardIonResultsFromDB {

    private static final ScanData<LCMSWell> BLANK_SCAN = new ScanData<>(ScanData.KIND.BLANK, null, null, null, null,
            null, null);
    private static final String DEFAULT_ION = "M+H";
    private static final Double MAX_INTENSITY = 500000.0d;

    public static final String OPTION_DIRECTORY = "d";
    public static final String TSV_FORMAT = "tsv";
    public static final String OPTION_CONSTRUCT = "C";
    public static final String OPTION_CHEMICALS = "c";
    public static final String OPTION_OUTPUT_PREFIX = "o";
    public static final String OPTION_PLOTTING_DIR = "p";
    public static final String FONT_SCALE = "f";
    public static final String NULL_VALUE = "NULL";
    public static final String HELP_MESSAGE = StringUtils.join(new String[] {
            "This class is used to export relevant standard ion analysis data to the scientist from the "
                    + "standard_ion_results DB for manual assessment (done in github) through a TSV file. The inputs to this "
                    + "class either be an individual standard chemical name OR a construct pathway." },
            "");
    public static final HelpFormatter HELP_FORMATTER = new HelpFormatter();

    private static String sanitizeYeastMediaString(String name) {
        if (name.contains("Teknova SC Minimal Broth with Raffinose minus Uracil plus Gal")) {
            return "SC Minimal Broth";
        } else {
            return name;
        }
    }

    private static String generateAdditionalLabelInformation(StandardWell well, StandardIonResult result,
            String ion) {
        String plateMetadata = sanitizeYeastMediaString(well.getMedia()) + " "
                + (well.getConcentration() == null ? "" : well.getConcentration());

        if (result != null) {
            XZ intensityAndTimeOfBestIon = result.getAnalysisResults().get(ion);
            String additionalInfo = "";

            // This case happens when the standard ion result for this well does not have any good detectable
            // peaks for maybe a given restricted time region (in the case of yeast).
            if (intensityAndTimeOfBestIon != null) {
                String snrAndTime = String.format("\n%.2fSNR at %.2fs", intensityAndTimeOfBestIon.getIntensity(),
                        intensityAndTimeOfBestIon.getTime());

                additionalInfo = String.format("\n%s %s", plateMetadata, snrAndTime);
            } else {
                additionalInfo = String.format("\n%s %s", plateMetadata, "\nNo peaks found");
            }
            return additionalInfo;
        } else {
            return String.format("\n%s %s", plateMetadata, "Negative Control");
        }
    }

    static {
        HELP_FORMATTER.setWidth(100);
    }

    public static final List<Option.Builder> OPTION_BUILDERS = new ArrayList<Option.Builder>() {
        {
            add(Option.builder(OPTION_DIRECTORY).argName("directory")
                    .desc("The directory where LCMS analysis results live").hasArg().required()
                    .longOpt("data-dir"));
            add(Option.builder(OPTION_CONSTRUCT).argName("construct").desc("The construct to get results from")
                    .hasArg());
            add(Option.builder(OPTION_CHEMICALS).argName("a comma separated list of chemical names")
                    .desc("A list of chemicals to get standard ion data from").hasArgs().valueSeparator(','));
            add(Option.builder(OPTION_OUTPUT_PREFIX).argName("The prefix name")
                    .desc("The prefix of the output file").hasArg());
            add(Option.builder("h").argName("help").desc("Prints this help message").longOpt("help"));
            add(Option.builder(OPTION_PLOTTING_DIR).argName("plotting directory")
                    .desc("The absolute path of the plotting directory").hasArg().required()
                    .longOpt("plotting-dir"));
            add(Option.builder(FONT_SCALE).argName("font scale").desc(
                    "A Gnuplot fontscale value, should be between 0.1 and 0.5 (0.4 works if the graph text is large")
                    .hasArg().longOpt("font-scale"));
        }
    };

    static {
        // Add DB connection options.
        OPTION_BUILDERS.addAll(DB.DB_OPTION_BUILDERS);
    }

    public enum STANDARD_ION_HEADER_FIELDS {
        CHEMICAL, BEST_ION_FROM_ALGO, MANUAL_PICK, AUTHOR, NOTE, DIAGNOSTIC_PLOTS,
        // TODO: Deprecate STANDARD_ION_RESULT_ID in the subsequent port of LoadStandardIonAnalysisTableIntoDB.java class.
        STANDARD_ION_RESULT_ID
    };

    public static void main(String[] args) throws Exception {
        Options opts = new Options();
        for (Option.Builder b : OPTION_BUILDERS) {
            opts.addOption(b.build());
        }

        CommandLine cl = null;
        try {
            CommandLineParser parser = new DefaultParser();
            cl = parser.parse(opts, args);
        } catch (ParseException e) {
            System.err.format("Argument parsing failed: %s\n", e.getMessage());
            HELP_FORMATTER.printHelp(ExportStandardIonResultsFromDB.class.getCanonicalName(), HELP_MESSAGE, opts,
                    null, true);
            System.exit(1);
        }

        if (cl.hasOption("help")) {
            HELP_FORMATTER.printHelp(ExportStandardIonResultsFromDB.class.getCanonicalName(), HELP_MESSAGE, opts,
                    null, true);
            return;
        }

        try (DB db = DB.openDBFromCLI(cl)) {
            List<String> chemicalNames = new ArrayList<>();
            if (cl.hasOption(OPTION_CONSTRUCT)) {
                // Extract the chemicals in the pathway and their product masses, then look up info on those chemicals
                List<Pair<ChemicalAssociatedWithPathway, Double>> productMasses = Utils
                        .extractMassesForChemicalsAssociatedWithConstruct(db, cl.getOptionValue(OPTION_CONSTRUCT));

                for (Pair<ChemicalAssociatedWithPathway, Double> pair : productMasses) {
                    chemicalNames.add(pair.getLeft().getChemical());
                }
            }

            if (cl.hasOption(OPTION_CHEMICALS)) {
                chemicalNames.addAll(Arrays.asList(cl.getOptionValues(OPTION_CHEMICALS)));
            }

            if (chemicalNames.size() == 0) {
                System.err.format("No chemicals can be found from the input query.\n");
                System.exit(-1);
            }

            List<String> standardIonHeaderFields = new ArrayList<String>() {
                {
                    add(STANDARD_ION_HEADER_FIELDS.CHEMICAL.name());
                    add(STANDARD_ION_HEADER_FIELDS.BEST_ION_FROM_ALGO.name());
                    add(STANDARD_ION_HEADER_FIELDS.MANUAL_PICK.name());
                    add(STANDARD_ION_HEADER_FIELDS.AUTHOR.name());
                    add(STANDARD_ION_HEADER_FIELDS.DIAGNOSTIC_PLOTS.name());
                    add(STANDARD_ION_HEADER_FIELDS.NOTE.name());
                }
            };

            String outAnalysis;
            if (cl.hasOption(OPTION_OUTPUT_PREFIX)) {
                outAnalysis = cl.getOptionValue(OPTION_OUTPUT_PREFIX) + "." + TSV_FORMAT;
            } else {
                outAnalysis = String.join("-", chemicalNames) + "." + TSV_FORMAT;
            }

            File lcmsDir = new File(cl.getOptionValue(OPTION_DIRECTORY));
            if (!lcmsDir.isDirectory()) {
                System.err.format("File at %s is not a directory\n", lcmsDir.getAbsolutePath());
                HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts,
                        null, true);
                System.exit(1);
            }

            String plottingDirectory = cl.getOptionValue(OPTION_PLOTTING_DIR);

            TSVWriter<String, String> resultsWriter = new TSVWriter<>(standardIonHeaderFields);
            resultsWriter.open(new File(outAnalysis));

            // For each chemical, create a TSV row and a corresponding diagnostic plot
            for (String chemicalName : chemicalNames) {
                List<String> graphLabels = new ArrayList<>();
                List<Double> yMaxList = new ArrayList<>();

                String outData = plottingDirectory + "/" + chemicalName + ".data";
                String outImg = plottingDirectory + "/" + chemicalName + ".pdf";

                // For each diagnostic plot, open a new file stream.
                try (FileOutputStream fos = new FileOutputStream(outData)) {

                    List<StandardIonResult> getResultByChemicalName = StandardIonResult.getByChemicalName(db,
                            chemicalName);

                    if (getResultByChemicalName != null && getResultByChemicalName.size() > 0) {

                        // PART 1: Get the best metlin ion across all standard ion results for a given chemical
                        String bestGlobalMetlinIon = AnalysisHelper
                                .scoreAndReturnBestMetlinIonFromStandardIonResults(getResultByChemicalName,
                                        new HashMap<>(), true, true);

                        // PART 2: Plot all the graphs related to the chemical. The plots are structured as follows:
                        //
                        // Page 1: All graphs (water, MeOH, Yeast) for Global ion picked (best ion among ALL standard ion runs for
                        // the given chemical) by the algorithm
                        // Page 2: All graphs for M+H
                        // Page 3: All graphs for Local ions picked (best ion within a SINGLE standard ion run) + negative controls
                        // for Yeast.
                        //
                        // Each page is demarcated by a blank graph.

                        // Arrange results based on media
                        Map<String, List<StandardIonResult>> categories = StandardIonResult
                                .categorizeListOfStandardWellsByMedia(db, getResultByChemicalName);

                        // This set contains all the best metlin ions corresponding to all the standard ion runs.
                        Set<String> bestLocalIons = new HashSet<>();
                        bestLocalIons.add(bestGlobalMetlinIon);
                        bestLocalIons.add(DEFAULT_ION);

                        for (StandardIonResult result : getResultByChemicalName) {
                            bestLocalIons.add(result.getBestMetlinIon());
                        }

                        // We sort the best local ions are follows:
                        // 1) Global best ion spectra 2) M+H spectra 3) Local best ion spectra
                        List<String> bestLocalIonsArray = new ArrayList<>(bestLocalIons);
                        Collections.sort(bestLocalIonsArray, new Comparator<String>() {
                            @Override
                            public int compare(String o1, String o2) {
                                if (o1.equals(bestGlobalMetlinIon) && !o2.equals(bestGlobalMetlinIon)) {
                                    return -1;
                                } else if (o1.equals(DEFAULT_ION) && !o2.equals(bestGlobalMetlinIon)) {
                                    return -1;
                                } else {
                                    return 1;
                                }
                            }
                        });

                        // This variable stores the index of the array at which all the remaining spectra are contained in one
                        // page. This happens right after the M+H ion spectra.
                        Integer combineAllSpectraIntoPageThreeFromIndex = 0;
                        for (int i = 0; i < bestLocalIonsArray.size(); i++) {
                            if (bestLocalIonsArray.get(i).equals(DEFAULT_ION)) {
                                combineAllSpectraIntoPageThreeFromIndex = i + 1;
                            }
                        }

                        for (int i = 0; i < bestLocalIonsArray.size(); i++) {

                            String ion = bestLocalIonsArray.get(i);
                            for (Map.Entry<String, List<StandardIonResult>> mediaToListOfIonResults : categories
                                    .entrySet()) {

                                for (StandardIonResult result : mediaToListOfIonResults.getValue()) {

                                    // For every standard ion result, we plot the best global metlin ion and M+H. These plots are in the
                                    // pages 1 and 2. For all page 3 (aka miscellaneous spectra), we only plot the best local ion
                                    // corresponding to it's spectra and not some other graph's spectra. In the below condition,
                                    // we reach the page 3 case with not the same best ion as the spectra, in which case we just continue
                                    // and not draw anything on the page.
                                    if (i >= combineAllSpectraIntoPageThreeFromIndex
                                            && !(result.getBestMetlinIon().equals(ion))) {
                                        continue;
                                    }

                                    StandardWell positiveWell = StandardWell.getInstance().getById(db,
                                            result.getStandardWellId());
                                    String positiveControlChemical = positiveWell.getChemical();

                                    ScanData<StandardWell> encapsulatedDataForPositiveControl = AnalysisHelper
                                            .getScanDataForWell(db, lcmsDir, positiveWell, positiveControlChemical,
                                                    positiveControlChemical);

                                    Set<String> singletonSet = Collections.singleton(ion);
                                    String additionalInfo = generateAdditionalLabelInformation(positiveWell, result,
                                            ion);

                                    List<String> labels = AnalysisHelper
                                            .writeScanData(fos, lcmsDir, MAX_INTENSITY,
                                                    encapsulatedDataForPositiveControl, false, false, singletonSet)
                                            .stream().map(label -> label + additionalInfo)
                                            .collect(Collectors.toList());

                                    yMaxList.add(encapsulatedDataForPositiveControl.getMs1ScanResults()
                                            .getMaxIntensityForIon(ion));

                                    List<String> negativeLabels = null;
                                    // Only do the negative control in the miscellaneous page (page 3) and if the well is in yeast media.
                                    if (mediaToListOfIonResults.getKey()
                                            .equals(StandardWell.MEDIA_TYPE.YEAST.name())
                                            && (i >= combineAllSpectraIntoPageThreeFromIndex
                                                    && (result.getBestMetlinIon().equals(ion)))) {
                                        //TODO: Change the representative negative well to one that displays the highest noise in the future.
                                        // For now, we just use the first index among the negative wells.
                                        int representativeIndex = 0;
                                        StandardWell representativeNegativeControlWell = StandardWell.getInstance()
                                                .getById(db, result.getNegativeWellIds().get(representativeIndex));

                                        ScanData encapsulatedDataForNegativeControl = AnalysisHelper
                                                .getScanDataForWell(db, lcmsDir, representativeNegativeControlWell,
                                                        positiveWell.getChemical(),
                                                        representativeNegativeControlWell.getChemical());

                                        String negativePlateAdditionalInfo = generateAdditionalLabelInformation(
                                                representativeNegativeControlWell, null, null);

                                        negativeLabels = AnalysisHelper.writeScanData(fos, lcmsDir, MAX_INTENSITY,
                                                encapsulatedDataForNegativeControl, false, false, singletonSet)
                                                .stream().map(label -> label + negativePlateAdditionalInfo)
                                                .collect(Collectors.toList());

                                        yMaxList.add(encapsulatedDataForNegativeControl.getMs1ScanResults()
                                                .getMaxIntensityForIon(ion));
                                    }

                                    graphLabels.addAll(labels);

                                    if (negativeLabels != null) {
                                        graphLabels.addAll(negativeLabels);
                                    }
                                }
                            }

                            // Add a blank graph to demarcate pages.
                            if (i < combineAllSpectraIntoPageThreeFromIndex) {
                                graphLabels.addAll(AnalysisHelper.writeScanData(fos, lcmsDir, 0.0, BLANK_SCAN,
                                        false, false, new HashSet<>()));
                                yMaxList.add(0.0d);
                            }
                        }

                        // We need to pass the yMax values as an array to the Gnuplotter.
                        Double fontScale = null;
                        if (cl.hasOption(FONT_SCALE)) {
                            try {
                                fontScale = Double.parseDouble(cl.getOptionValue(FONT_SCALE));
                            } catch (IllegalArgumentException e) {
                                System.err.format("Argument for font-scale must be a floating point number.\n");
                                System.exit(1);
                            }
                        }

                        Double[] yMaxes = yMaxList.toArray(new Double[yMaxList.size()]);
                        Gnuplotter plotter = fontScale == null ? new Gnuplotter() : new Gnuplotter(fontScale);
                        plotter.plot2D(outData, outImg, graphLabels.toArray(new String[graphLabels.size()]), "time",
                                null, "intensity", "pdf", null, null, yMaxes, outImg + ".gnuplot");

                        Map<String, String> row = new HashMap<>();
                        row.put(STANDARD_ION_HEADER_FIELDS.CHEMICAL.name(), chemicalName);
                        row.put(STANDARD_ION_HEADER_FIELDS.BEST_ION_FROM_ALGO.name(), bestGlobalMetlinIon);
                        row.put(STANDARD_ION_HEADER_FIELDS.DIAGNOSTIC_PLOTS.name(), outImg);

                        resultsWriter.append(row);
                        resultsWriter.flush();
                    }
                }
            }

            resultsWriter.flush();
            resultsWriter.close();
        }
    }
}