Java tutorial
/************************************************************************* * * * This file is part of the 20n/act project. * * 20n/act enables DNA prediction for synthetic biology/bioengineering. * * Copyright (C) 2017 20n Labs, Inc. * * * * Please direct all queries to act@20n.com. * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see <http://www.gnu.org/licenses/>. * * * *************************************************************************/ package com.act.lcms.db.analysis; import com.act.lcms.MS1; import com.act.lcms.db.io.DB; import com.act.lcms.db.io.LoadPlateCompositionIntoDB; import com.act.lcms.db.model.CuratedChemical; import com.act.lcms.db.model.FeedingLCMSWell; import com.act.lcms.db.model.MS1ScanForWellAndMassCharge; import com.act.lcms.db.model.Plate; import com.act.lcms.db.model.ScanFile; import com.act.lcms.plotter.WriteAndPlotMS1Results; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Pair; import java.io.File; import java.sql.SQLException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; public class FeedingAnalysis { public static final String DEFAULT_ION = "M+H"; public static final String OPTION_DIRECTORY = "d"; public static final String OPTION_OUTPUT_PREFIX = "o"; public static final String OPTION_FEEDING_STRAIN_OR_CONSTRUCT = "c"; public static final String OPTION_FEEDING_EXTRACT = "e"; public static final String OPTION_FEEDING_FED_CHEMICAL = "f"; public static final String OPTION_ION_NAME = "i"; public static final String OPTION_PLATE_BARCODE = "b"; public static final String OPTION_SEARCH_MZ = "m"; public static final String HELP_MESSAGE = StringUtils .join(new String[] { "This class applies the MS1 LCMS analysis to a strain/construct fed ", "different concentrations of a precursors. It takes in a feeding specification ", "plate, strain or construct (MSID or internal name), extract, ion name, mz, ", "and fed chemical and generates a comparative graph of ", "intensity vs. concentration (using max peak inferred from traces) ", "and an overlaid graph of the spectra in the various concentrations." }, ""); public static final HelpFormatter HELP_FORMATTER = new HelpFormatter(); static { HELP_FORMATTER.setWidth(100); } public static final List<Option.Builder> OPTION_BUILDERS = new ArrayList<Option.Builder>() { { add(Option.builder(OPTION_DIRECTORY).argName("directory") .desc("The directory where LCMS analysis results live").hasArg().required() .longOpt("data-dir")); add(Option.builder(OPTION_OUTPUT_PREFIX).argName("output prefix") .desc("A prefix for the output data/pdf files").hasArg().required().longOpt("output-prefix")); add(Option.builder(OPTION_FEEDING_STRAIN_OR_CONSTRUCT) .desc("Perform a feeding analysis for this strain/construct").hasArg().required() .longOpt("construct-or-strain")); add(Option.builder(OPTION_FEEDING_EXTRACT) .desc("Specify the extract for which to perform a feeding analysis").hasArg().required() .longOpt("extract")); add(Option.builder(OPTION_FEEDING_FED_CHEMICAL) .desc("Specify the fed chemical group for which to perform the feeding analysis").hasArg() .required().longOpt("fed-chemical")); add(Option.builder(OPTION_ION_NAME) .desc(String.format( "An ion of the chemical target for which to compute feeding curves (default is %s)", DEFAULT_ION)) .hasArg().longOpt("ion")); add(Option.builder(OPTION_PLATE_BARCODE) .desc("The barcode of the plate from which to extract feeding data").hasArg().required() .longOpt("plate-barcode")); add(Option.builder(OPTION_SEARCH_MZ).desc( "A m/z value or chemical name to search for in the feeding data; default is the construct's target") .hasArg().longOpt("search-chem")); } }; static { // Add DB connection options. OPTION_BUILDERS.addAll(DB.DB_OPTION_BUILDERS); } private static void performFeedingAnalysis(DB db, String lcmsDir, String searchIon, String searchMassStr, String plateBarcode, String strainOrConstruct, String extract, String feedingCondition, String outPrefix, String fmt) throws SQLException, Exception { Plate p = Plate.getPlateByBarcode(db, plateBarcode); if (p == null) { throw new RuntimeException(String.format("Unable to find plate with barcode %s", plateBarcode)); } if (p.getContentType() != Plate.CONTENT_TYPE.FEEDING_LCMS) { throw new RuntimeException(String.format("Plate with barcode %s is not a feeding plate (%s)", plateBarcode, p.getContentType())); } List<FeedingLCMSWell> allPlateWells = FeedingLCMSWell.getInstance().getFeedingLCMSWellByPlateId(db, p.getId()); if (allPlateWells == null || allPlateWells.size() == 0) { throw new RuntimeException( String.format("No feeding LCMS wells available for plate %s", p.getBarcode())); } List<FeedingLCMSWell> relevantWells = new ArrayList<>(); for (FeedingLCMSWell well : allPlateWells) { if (!well.getMsid().equals(strainOrConstruct) && !well.getComposition().equals(strainOrConstruct)) { // Ignore wells that don't have the right strain/construct (though we assume the whole plate shares one). continue; } if (!well.getExtract().equals(extract)) { // Filter by extract. continue; } if (!well.getChemical().equals(feedingCondition)) { // Filter by fed chemical. continue; } relevantWells.add(well); } Collections.sort(relevantWells, new Comparator<FeedingLCMSWell>() { @Override public int compare(FeedingLCMSWell o1, FeedingLCMSWell o2) { // Assume concentration is never null. return o1.getConcentration().compareTo(o2.getConcentration()); } }); Map<FeedingLCMSWell, ScanFile> wellsToScanFiles = new HashMap<>(); Set<String> constructs = new HashSet<>(1); for (FeedingLCMSWell well : relevantWells) { List<ScanFile> scanFiles = ScanFile.getScanFileByPlateIDRowAndColumn(db, well.getPlateId(), well.getPlateRow(), well.getPlateColumn()); if (scanFiles == null || scanFiles.size() == 0) { System.err.format("WARNING: no scan files for well at %s %s\n", p.getBarcode(), well.getCoordinatesString()); continue; } if (scanFiles.size() > 1) { System.err.format("WARNING: found multiple scan files for %s %s, using first\n", p.getBarcode(), well.getCoordinatesString()); } while (scanFiles.size() > 0 && scanFiles.get(0).getFileType() != ScanFile.SCAN_FILE_TYPE.NC) { scanFiles.remove(0); } if (scanFiles.size() == 0) { System.err.format("WARNING: no scan files with valid format for %s %s\n", p.getBarcode(), well.getCoordinatesString()); continue; } // All of the extracted wells should be unique, so there should be no collisions here. wellsToScanFiles.put(well, scanFiles.get(0)); constructs.add(well.getComposition()); } Pair<String, Double> searchMass = null; if (searchMassStr != null) { searchMass = Utils.extractMassFromString(db, searchMassStr); } if (searchMass == null) { if (constructs.size() != 1) { throw new RuntimeException(String.format( "Found multiple growth targets for feeding analysis when no mass specified: %s", StringUtils.join(constructs, ", "))); } String constructName = constructs.iterator().next(); CuratedChemical cc = Utils.extractTargetForConstruct(db, constructName); if (cc == null) { throw new RuntimeException( String.format("Unable to find curated chemical for construct %s", constructName)); } System.out.format("Using target %s of construct %s as search mass (%f)\n", cc.getName(), constructName, cc.getMass()); searchMass = Pair.of(cc.getName(), cc.getMass()); } MS1 c = new MS1(); // TODO: use configurable or scan-file derived ion mode. Do it the way its done in: // https://github.com/20n/act/blob/d997e84f0f44a5c88a94ef935829cb47e0ca8d1a/reachables/src/main/java/com/act/lcms/db/analysis/AnalysisHelper.java#L79 MS1.IonMode mode = MS1.IonMode.valueOf("POS"); Map<String, Double> metlinMasses = c.getIonMasses(searchMass.getValue(), mode); if (searchIon == null || searchIon.isEmpty()) { System.err.format("No search ion defined, defaulting to M+H\n"); searchIon = DEFAULT_ION; } List<Pair<Double, MS1ScanForWellAndMassCharge>> rampUp = new ArrayList<>(); for (FeedingLCMSWell well : relevantWells) { ScanFile scanFile = wellsToScanFiles.get(well); if (scanFile == null) { System.err.format("WARNING: no scan file available for %s %s", p.getBarcode(), well.getCoordinatesString()); continue; } File localScanFile = new File(lcmsDir, scanFile.getFilename()); if (!localScanFile.exists() && localScanFile.isFile()) { System.err.format("WARNING: could not find regular file at expected path: %s\n", localScanFile.getAbsolutePath()); continue; } System.out.format("Processing scan data at %s\n", localScanFile.getAbsolutePath()); MS1ScanForWellAndMassCharge ms1ScanCache = new MS1ScanForWellAndMassCharge(); MS1ScanForWellAndMassCharge ms1ScanResults = ms1ScanCache .getByPlateIdPlateRowPlateColUseSnrScanFileChemical(db, p, well, true, scanFile, searchMass.getKey(), metlinMasses, localScanFile); Double concentration = well.getConcentration(); rampUp.add(Pair.of(concentration, ms1ScanResults)); } WriteAndPlotMS1Results plotFeedingsResults = new WriteAndPlotMS1Results(); plotFeedingsResults.plotFeedings(rampUp, searchIon, outPrefix, fmt, outPrefix + ".gnuplot"); } public static void main(String[] args) throws Exception { Options opts = new Options(); for (Option.Builder b : OPTION_BUILDERS) { opts.addOption(b.build()); } CommandLine cl = null; try { CommandLineParser parser = new DefaultParser(); cl = parser.parse(opts, args); } catch (ParseException e) { System.err.format("Argument parsing failed: %s\n", e.getMessage()); HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } if (cl.hasOption("help")) { HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); return; } File lcmsDir = new File(cl.getOptionValue(OPTION_DIRECTORY)); if (!lcmsDir.isDirectory()) { System.err.format("File at %s is not a directory\n", lcmsDir.getAbsolutePath()); HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } try (DB db = DB.openDBFromCLI(cl)) { System.out.format("Loading/updating LCMS scan files into DB\n"); ScanFile.insertOrUpdateScanFilesInDirectory(db, lcmsDir); System.out.format("Running feeding analysis\n"); performFeedingAnalysis(db, cl.getOptionValue(OPTION_DIRECTORY), cl.getOptionValue(OPTION_ION_NAME), cl.getOptionValue(OPTION_SEARCH_MZ), cl.getOptionValue(OPTION_PLATE_BARCODE), cl.getOptionValue(OPTION_FEEDING_STRAIN_OR_CONSTRUCT), cl.getOptionValue(OPTION_FEEDING_EXTRACT), cl.getOptionValue(OPTION_FEEDING_FED_CHEMICAL), cl.getOptionValue(OPTION_OUTPUT_PREFIX), "pdf"); } } }