Java tutorial
/************************************************************************* * * * This file is part of the 20n/act project. * * 20n/act enables DNA prediction for synthetic biology/bioengineering. * * Copyright (C) 2017 20n Labs, Inc. * * * * Please direct all queries to act@20n.com. * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see <http://www.gnu.org/licenses/>. * * * *************************************************************************/ package com.act.lcms.db.analysis; import com.act.lcms.Gnuplotter; import com.act.lcms.MS1; import com.act.lcms.db.io.DB; import com.act.lcms.db.io.LoadPlateCompositionIntoDB; import com.act.lcms.db.model.ChemicalAssociatedWithPathway; import com.act.lcms.db.model.CuratedChemical; import com.act.lcms.db.model.LCMSWell; import com.act.lcms.db.model.Plate; import com.act.lcms.db.model.ScanFile; import com.act.lcms.db.model.StandardWell; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Pair; import java.io.File; import java.io.FileOutputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Set; public class IonSearchAnalysis { public static final String OPTION_DIRECTORY = "d"; public static final String OPTION_OUTPUT_PREFIX = "o"; public static final String OPTION_STRAINS = "s"; public static final String OPTION_CONSTRUCTS = "c"; public static final String OPTION_NEGATIVE_STRAINS = "S"; public static final String OPTION_NEGATIVE_CONSTRUCTS = "C"; public static final String OPTION_STANDARD_NAME = "sn"; public static final String OPTION_STANDARD_PLATE_BARCODE = "sp"; public static final String OPTION_STANDARD_WELLS = "sw"; public static final String OPTION_SEARCH_MZ = "m"; public static final String OPTION_NO_STANDARD = "ns"; public static final String OPTION_FILTER_BY_PLATE_BARCODE = "p"; public static final String OPTION_USE_HEATMAP = "e"; public static final String OPTION_USE_SNR = "r"; public static final String HELP_MESSAGE = StringUtils .join(new String[] { "This class applies the MS1 LCMS analysis to a combination of ", "standards and samples. Specify positive constructs/strains and negative ", "controls to be analyzed and graphed together.\nStandards will be determined by ", "the positive samples' targets if a standard is not explicitly specified.\n", "An m/z value or chemical for which to search in the LCMS trace data can be ", "explicitly specified; if no search chemical is specified and all positive samples ", "share a single target, that target's mass will be used." }, ""); public static final HelpFormatter HELP_FORMATTER = new HelpFormatter(); static { HELP_FORMATTER.setWidth(100); } public static final List<Option.Builder> OPTION_BUILDERS = new ArrayList<Option.Builder>() { { add(Option.builder(OPTION_DIRECTORY).argName("directory") .desc("The directory where LCMS analysis results live").hasArg().required() .longOpt("data-dir")); add(Option.builder(OPTION_OUTPUT_PREFIX).argName("output prefix") .desc("A prefix for the output data/pdf files").hasArg().required().longOpt("output-prefix")); add(Option.builder(OPTION_STRAINS).argName("strains") .desc("The msids of the strains to be analyzed (specify only one of 'strain' or 'construct')") .hasArgs().valueSeparator(',').longOpt("msids")); add(Option.builder(OPTION_CONSTRUCTS).argName("constructs").desc( "The construct ids (composition) to be analyzed (specify only one of 'strain' or 'construct')") .hasArgs().valueSeparator(',').longOpt("construct-ids")); add(Option.builder(OPTION_NEGATIVE_STRAINS).argName("negative-strains") .desc("A strains to use as a negative control (the first novel LCMS sample will be used)") .hasArgs().valueSeparator(',').longOpt("negative-msids")); add(Option.builder(OPTION_NEGATIVE_CONSTRUCTS).argName("constructs") .desc("A constructs to use as a negative control (the first novel LCMS sample will be used)") .hasArgs().valueSeparator(',').longOpt("negative-construct-ids")); add(Option.builder(OPTION_STANDARD_NAME).argName("standard's chemical name") .desc("The name of the chemical to use as a standard (default will be LCMS wells' target)") .hasArg().longOpt("standard-name")); add(Option.builder(OPTION_STANDARD_PLATE_BARCODE).argName("standard plate barcode") .desc("The plate barcode to use when searching for a compatible standard").hasArg().required() .longOpt("standard-plate")); add(Option.builder(OPTION_SEARCH_MZ).argName("search chem") .desc("The m/z or chemical name to search for (default will use target of LCMS wells)").hasArg() .longOpt("search-chem")); add(Option.builder(OPTION_NO_STANDARD).argName("no standard") .desc("Specifies that the analysis should be completed without a standard") .longOpt("no-standard")); add(Option.builder(OPTION_STANDARD_WELLS).argName("well coordinates") .desc("Specifies a specific well or wells in the standard plate to use as the standard sample") .hasArgs().valueSeparator(',').longOpt("standard-wells")); add(Option.builder().argName("ion list").desc( "A comma-separated list of ions to include in the search (ions not in this list will be ignored)") .hasArgs().valueSeparator(',').longOpt("include-ions")); add(Option.builder().argName("ion list").desc( "A comma-separated list of ions to exclude from the search, takes precedence over include-ions") .hasArgs().valueSeparator(',').longOpt("exclude-ions")); add(Option.builder(OPTION_FILTER_BY_PLATE_BARCODE).argName("plate barcode list") .desc("A list of plate barcodes to consider, all other plates will be ignored").hasArgs() .valueSeparator(',').longOpt("include-plates")); add(Option.builder(OPTION_USE_HEATMAP).desc("Produce a heat map rather than a 2d line plot") .longOpt("heat-map")); // TODO: add filter on SCAN_MODE. add(Option.builder(OPTION_USE_SNR) .desc("Use signal-to-noise ratio instead of max intensity for peak identification") .longOpt("use-snr")); add(Option.builder().argName("font scale").desc( "A Gnuplot fontscale value, should be between 0.1 and 0.5 (0.4 works if the graph text is large") .hasArg().longOpt("font-scale")); add(Option.builder() .desc(String.format( "Use fine-grained M/Z tolerance (%.3f) when conducting the MS1 analysis " + "instead of default M/Z tolerance %.3f", MS1.MS1_MZ_TOLERANCE_FINE, MS1.MS1_MZ_TOLERANCE_DEFAULT)) .longOpt("fine-grained-mz")); // Everybody needs a little help from their friends. add(Option.builder("h").argName("help").desc("Prints this help message").longOpt("help")); } }; static { // Add DB connection options. OPTION_BUILDERS.addAll(DB.DB_OPTION_BUILDERS); } public static void main(String[] args) throws Exception { Options opts = new Options(); for (Option.Builder b : OPTION_BUILDERS) { opts.addOption(b.build()); } CommandLine cl = null; try { CommandLineParser parser = new DefaultParser(); cl = parser.parse(opts, args); } catch (ParseException e) { System.err.format("Argument parsing failed: %s\n", e.getMessage()); HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } if (cl.hasOption("help")) { HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); return; } File lcmsDir = new File(cl.getOptionValue(OPTION_DIRECTORY)); if (!lcmsDir.isDirectory()) { System.err.format("File at %s is not a directory\n", lcmsDir.getAbsolutePath()); HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } Double fontScale = null; if (cl.hasOption("font-scale")) { try { fontScale = Double.parseDouble(cl.getOptionValue("font-scale")); } catch (IllegalArgumentException e) { System.err.format("Argument for font-scale must be a floating point number.\n"); System.exit(1); } } try (DB db = DB.openDBFromCLI(cl)) { Set<String> includeIons = null; if (cl.hasOption("include-ions")) { String[] ionNames = cl.getOptionValues("include-ions"); includeIons = new HashSet<>(Arrays.asList(ionNames)); System.out.format("Including ions in search: %s\n", StringUtils.join(includeIons, ", ")); } Set<String> excludeIons = null; if (cl.hasOption("exclude-ions")) { String[] ionNames = cl.getOptionValues("exclude-ions"); excludeIons = new HashSet<>(Arrays.asList(ionNames)); System.out.format("Excluding ions from search: %s\n", StringUtils.join(excludeIons, ", ")); } Set<Integer> takeSamplesFromPlateIds = null; if (cl.hasOption(OPTION_FILTER_BY_PLATE_BARCODE)) { String[] plateBarcodes = cl.getOptionValues(OPTION_FILTER_BY_PLATE_BARCODE); System.out.format("Considering only sample wells in plates: %s\n", StringUtils.join(plateBarcodes, ", ")); takeSamplesFromPlateIds = new HashSet<>(plateBarcodes.length); for (String plateBarcode : plateBarcodes) { Plate p = Plate.getPlateByBarcode(db, plateBarcode); if (p == null) { System.err.format("WARNING: unable to find plate in DB with barcode %s\n", plateBarcode); } else { takeSamplesFromPlateIds.add(p.getId()); } } // Allow filtering on barcode even if we couldn't find any in the DB. } System.out.format("Loading/updating LCMS scan files into DB\n"); ScanFile.insertOrUpdateScanFilesInDirectory(db, lcmsDir); System.out.format("Processing LCMS scans\n"); Pair<List<LCMSWell>, Set<Integer>> positiveWellsAndPlateIds = Utils.extractWellsAndPlateIds(db, cl.getOptionValues(OPTION_STRAINS), cl.getOptionValues(OPTION_CONSTRUCTS), takeSamplesFromPlateIds, false); List<LCMSWell> positiveWells = positiveWellsAndPlateIds.getLeft(); if (positiveWells.size() == 0) { throw new RuntimeException("Found no LCMS wells for specified strains/constructs"); } // Only take negative samples from the plates where we found the positive samples. Pair<List<LCMSWell>, Set<Integer>> negativeWellsAndPlateIds = Utils.extractWellsAndPlateIds(db, cl.getOptionValues(OPTION_NEGATIVE_STRAINS), cl.getOptionValues(OPTION_NEGATIVE_CONSTRUCTS), positiveWellsAndPlateIds.getRight(), true); List<LCMSWell> negativeWells = negativeWellsAndPlateIds.getLeft(); if (negativeWells == null || negativeWells.size() == 0) { System.err.format("WARNING: no valid negative samples found in same plates as positive samples\n"); } // Extract the reference MZ that will be used in the LCMS trace processing. List<Pair<String, Double>> searchMZs = null; Set<CuratedChemical> standardChemicals = null; List<ChemicalAssociatedWithPathway> pathwayChems = null; if (cl.hasOption(OPTION_SEARCH_MZ)) { // Assume mz can be an FP number of a chemical name. String massStr = cl.getOptionValue(OPTION_SEARCH_MZ); Pair<String, Double> searchMZ = Utils.extractMassFromString(db, massStr); if (searchMZ != null) { searchMZs = Collections.singletonList(searchMZ); } standardChemicals = Utils.extractTargetsForWells(db, positiveWells); } else { CuratedChemical targetChemical = Utils.requireOneTarget(db, positiveWells); if (targetChemical == null) { throw new RuntimeException( "Unable to find a curated chemical entry for specified strains'/constructs' targets. " + "Please specify a chemical name or m/z explicitly or update the curated chemicals list in the DB."); } System.out.format("Using reference M/Z for positive target %s (%f)\n", targetChemical.getName(), targetChemical.getMass()); searchMZs = Collections.singletonList(Pair.of(targetChemical.getName(), targetChemical.getMass())); standardChemicals = Collections.singleton(targetChemical); } // Look up the standard by name, or use the target if none is specified. List<StandardWell> standardWells = null; if (cl.hasOption(OPTION_NO_STANDARD)) { System.err.format("WARNING: skipping standard comparison (no-standard option specified)\n"); standardWells = new ArrayList<>(0); } else if (cl.hasOption(OPTION_STANDARD_WELLS)) { String[] standardCoordinates = cl.getOptionValues(OPTION_STANDARD_WELLS); standardWells = new ArrayList<>(standardCoordinates.length); Plate standardPlate = Plate.getPlateByBarcode(db, cl.getOptionValue(OPTION_STANDARD_PLATE_BARCODE)); List<String> foundCoordinates = new ArrayList<>(standardCoordinates.length); for (String stringCoords : standardCoordinates) { Pair<Integer, Integer> coords = Utils.parsePlateCoordinates(stringCoords); StandardWell well = StandardWell.getInstance().getStandardWellsByPlateIdAndCoordinates(db, standardPlate.getId(), coords.getLeft(), coords.getRight()); if (well == null) { System.err.format("Unable to find standard well at %s [%s]\n", standardPlate.getBarcode(), stringCoords); continue; } standardWells.add(well); foundCoordinates.add(stringCoords); } System.out.format("Using explicitly specified standard wells %s [%s]\n", standardPlate.getBarcode(), StringUtils.join(foundCoordinates, ", ")); } else if (cl.hasOption(OPTION_STANDARD_NAME)) { String standardName = cl.getOptionValue(OPTION_STANDARD_NAME); System.out.format("Using explicitly specified standard %s\n", standardName); standardWells = Collections.singletonList(Utils.extractStandardWellFromPlate(db, cl.getOptionValue(OPTION_STANDARD_PLATE_BARCODE), standardName)); } else if (standardChemicals != null && standardChemicals.size() > 0) { // Default to using the target chemical(s) as a standard if none is specified. standardWells = new ArrayList<>(standardChemicals.size()); for (CuratedChemical c : standardChemicals) { String standardName = c.getName(); System.out.format("Searching for well containing standard %s\n", standardName); standardWells.add(Utils.extractStandardWellFromPlate(db, cl.getOptionValue(OPTION_STANDARD_PLATE_BARCODE), standardName)); } } boolean useFineGrainedMZ = cl.hasOption("fine-grained-mz"); boolean useSNR = cl.hasOption(OPTION_USE_SNR); /* Process the standard, positive, and negative wells, producing ScanData containers that will allow them to be * iterated over for graph writing. */ HashMap<Integer, Plate> plateCache = new HashMap<>(); Pair<List<ScanData<StandardWell>>, Double> allStandardScans = AnalysisHelper.processScans(db, lcmsDir, searchMZs, ScanData.KIND.STANDARD, plateCache, standardWells, useFineGrainedMZ, includeIons, excludeIons, useSNR); Pair<List<ScanData<LCMSWell>>, Double> allPositiveScans = AnalysisHelper.processScans(db, lcmsDir, searchMZs, ScanData.KIND.POS_SAMPLE, plateCache, positiveWells, useFineGrainedMZ, includeIons, excludeIons, useSNR); Pair<List<ScanData<LCMSWell>>, Double> allNegativeScans = AnalysisHelper.processScans(db, lcmsDir, searchMZs, ScanData.KIND.NEG_CONTROL, plateCache, negativeWells, useFineGrainedMZ, includeIons, excludeIons, useSNR); String fmt = "pdf"; String outImg = cl.getOptionValue(OPTION_OUTPUT_PREFIX) + "." + fmt; String outData = cl.getOptionValue(OPTION_OUTPUT_PREFIX) + ".data"; System.err.format("Writing combined scan data to %s and graphs to %s\n", outData, outImg); produceLCMSSearchPlots(lcmsDir, outData, outImg, allStandardScans, allPositiveScans, allNegativeScans, fontScale, useFineGrainedMZ, cl.hasOption(OPTION_USE_HEATMAP), useSNR); } } public static void produceLCMSSearchPlots(File lcmsDir, String outData, String outImg, Pair<List<ScanData<StandardWell>>, Double> allStandardScans, Pair<List<ScanData<LCMSWell>>, Double> allPositiveScans, Pair<List<ScanData<LCMSWell>>, Double> allNegativeScans, Double fontScale, boolean useFineGrainedMZ, boolean makeHeatmaps, boolean useSNR) throws Exception { List<ScanData> allScanData = new ArrayList<ScanData>() { { addAll(allStandardScans.getLeft()); addAll(allPositiveScans.getLeft()); addAll(allNegativeScans.getLeft()); } }; // Get the global maximum intensity across all scans. Double maxIntensity = Math.max(allStandardScans.getRight(), Math.max(allPositiveScans.getRight(), allNegativeScans.getRight())); System.out.format("Processing LCMS scans for graphing:\n"); for (ScanData scanData : allScanData) { System.out.format(" %s\n", scanData.toString()); } String fmt = "pdf"; System.err.format("Writing combined scan data to %s and graphs to %s\n", outData, outImg); // Generate the data file and graphs. try (FileOutputStream fos = new FileOutputStream(outData)) { // Write all the scan data out to a single data file. List<String> graphLabels = new ArrayList<>(); for (ScanData scanData : allScanData) { graphLabels.addAll( AnalysisHelper.writeScanData(fos, lcmsDir, maxIntensity, scanData, makeHeatmaps, true)); } Gnuplotter plotter = fontScale == null ? new Gnuplotter() : new Gnuplotter(fontScale); if (makeHeatmaps) { plotter.plotHeatmap(outData, outImg, graphLabels.toArray(new String[graphLabels.size()]), maxIntensity, fmt); } else { plotter.plot2D(outData, outImg, graphLabels.toArray(new String[graphLabels.size()]), "time", maxIntensity, "intensity", fmt); } } } }