Java tutorial
/************************************************************************* * * * This file is part of the 20n/act project. * * 20n/act enables DNA prediction for synthetic biology/bioengineering. * * Copyright (C) 2017 20n Labs, Inc. * * * * Please direct all queries to act@20n.com. * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see <http://www.gnu.org/licenses/>. * * * *************************************************************************/ package com.act.lcms.db.io; import com.act.lcms.Gnuplotter; import com.act.lcms.XZ; import com.act.lcms.db.analysis.AnalysisHelper; import com.act.lcms.db.analysis.ScanData; import com.act.lcms.db.analysis.Utils; import com.act.lcms.db.model.ChemicalAssociatedWithPathway; import com.act.lcms.db.model.LCMSWell; import com.act.lcms.db.model.StandardIonResult; import com.act.lcms.db.model.StandardWell; import com.act.utils.TSVWriter; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Pair; import java.io.File; import java.io.FileOutputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; public class ExportStandardIonResultsFromDB { private static final ScanData<LCMSWell> BLANK_SCAN = new ScanData<>(ScanData.KIND.BLANK, null, null, null, null, null, null); private static final String DEFAULT_ION = "M+H"; private static final Double MAX_INTENSITY = 500000.0d; public static final String OPTION_DIRECTORY = "d"; public static final String TSV_FORMAT = "tsv"; public static final String OPTION_CONSTRUCT = "C"; public static final String OPTION_CHEMICALS = "c"; public static final String OPTION_OUTPUT_PREFIX = "o"; public static final String OPTION_PLOTTING_DIR = "p"; public static final String FONT_SCALE = "f"; public static final String NULL_VALUE = "NULL"; public static final String HELP_MESSAGE = StringUtils.join(new String[] { "This class is used to export relevant standard ion analysis data to the scientist from the " + "standard_ion_results DB for manual assessment (done in github) through a TSV file. The inputs to this " + "class either be an individual standard chemical name OR a construct pathway." }, ""); public static final HelpFormatter HELP_FORMATTER = new HelpFormatter(); private static String sanitizeYeastMediaString(String name) { if (name.contains("Teknova SC Minimal Broth with Raffinose minus Uracil plus Gal")) { return "SC Minimal Broth"; } else { return name; } } private static String generateAdditionalLabelInformation(StandardWell well, StandardIonResult result, String ion) { String plateMetadata = sanitizeYeastMediaString(well.getMedia()) + " " + (well.getConcentration() == null ? "" : well.getConcentration()); if (result != null) { XZ intensityAndTimeOfBestIon = result.getAnalysisResults().get(ion); String additionalInfo = ""; // This case happens when the standard ion result for this well does not have any good detectable // peaks for maybe a given restricted time region (in the case of yeast). if (intensityAndTimeOfBestIon != null) { String snrAndTime = String.format("\n%.2fSNR at %.2fs", intensityAndTimeOfBestIon.getIntensity(), intensityAndTimeOfBestIon.getTime()); additionalInfo = String.format("\n%s %s", plateMetadata, snrAndTime); } else { additionalInfo = String.format("\n%s %s", plateMetadata, "\nNo peaks found"); } return additionalInfo; } else { return String.format("\n%s %s", plateMetadata, "Negative Control"); } } static { HELP_FORMATTER.setWidth(100); } public static final List<Option.Builder> OPTION_BUILDERS = new ArrayList<Option.Builder>() { { add(Option.builder(OPTION_DIRECTORY).argName("directory") .desc("The directory where LCMS analysis results live").hasArg().required() .longOpt("data-dir")); add(Option.builder(OPTION_CONSTRUCT).argName("construct").desc("The construct to get results from") .hasArg()); add(Option.builder(OPTION_CHEMICALS).argName("a comma separated list of chemical names") .desc("A list of chemicals to get standard ion data from").hasArgs().valueSeparator(',')); add(Option.builder(OPTION_OUTPUT_PREFIX).argName("The prefix name") .desc("The prefix of the output file").hasArg()); add(Option.builder("h").argName("help").desc("Prints this help message").longOpt("help")); add(Option.builder(OPTION_PLOTTING_DIR).argName("plotting directory") .desc("The absolute path of the plotting directory").hasArg().required() .longOpt("plotting-dir")); add(Option.builder(FONT_SCALE).argName("font scale").desc( "A Gnuplot fontscale value, should be between 0.1 and 0.5 (0.4 works if the graph text is large") .hasArg().longOpt("font-scale")); } }; static { // Add DB connection options. OPTION_BUILDERS.addAll(DB.DB_OPTION_BUILDERS); } public enum STANDARD_ION_HEADER_FIELDS { CHEMICAL, BEST_ION_FROM_ALGO, MANUAL_PICK, AUTHOR, NOTE, DIAGNOSTIC_PLOTS, // TODO: Deprecate STANDARD_ION_RESULT_ID in the subsequent port of LoadStandardIonAnalysisTableIntoDB.java class. STANDARD_ION_RESULT_ID }; public static void main(String[] args) throws Exception { Options opts = new Options(); for (Option.Builder b : OPTION_BUILDERS) { opts.addOption(b.build()); } CommandLine cl = null; try { CommandLineParser parser = new DefaultParser(); cl = parser.parse(opts, args); } catch (ParseException e) { System.err.format("Argument parsing failed: %s\n", e.getMessage()); HELP_FORMATTER.printHelp(ExportStandardIonResultsFromDB.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } if (cl.hasOption("help")) { HELP_FORMATTER.printHelp(ExportStandardIonResultsFromDB.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); return; } try (DB db = DB.openDBFromCLI(cl)) { List<String> chemicalNames = new ArrayList<>(); if (cl.hasOption(OPTION_CONSTRUCT)) { // Extract the chemicals in the pathway and their product masses, then look up info on those chemicals List<Pair<ChemicalAssociatedWithPathway, Double>> productMasses = Utils .extractMassesForChemicalsAssociatedWithConstruct(db, cl.getOptionValue(OPTION_CONSTRUCT)); for (Pair<ChemicalAssociatedWithPathway, Double> pair : productMasses) { chemicalNames.add(pair.getLeft().getChemical()); } } if (cl.hasOption(OPTION_CHEMICALS)) { chemicalNames.addAll(Arrays.asList(cl.getOptionValues(OPTION_CHEMICALS))); } if (chemicalNames.size() == 0) { System.err.format("No chemicals can be found from the input query.\n"); System.exit(-1); } List<String> standardIonHeaderFields = new ArrayList<String>() { { add(STANDARD_ION_HEADER_FIELDS.CHEMICAL.name()); add(STANDARD_ION_HEADER_FIELDS.BEST_ION_FROM_ALGO.name()); add(STANDARD_ION_HEADER_FIELDS.MANUAL_PICK.name()); add(STANDARD_ION_HEADER_FIELDS.AUTHOR.name()); add(STANDARD_ION_HEADER_FIELDS.DIAGNOSTIC_PLOTS.name()); add(STANDARD_ION_HEADER_FIELDS.NOTE.name()); } }; String outAnalysis; if (cl.hasOption(OPTION_OUTPUT_PREFIX)) { outAnalysis = cl.getOptionValue(OPTION_OUTPUT_PREFIX) + "." + TSV_FORMAT; } else { outAnalysis = String.join("-", chemicalNames) + "." + TSV_FORMAT; } File lcmsDir = new File(cl.getOptionValue(OPTION_DIRECTORY)); if (!lcmsDir.isDirectory()) { System.err.format("File at %s is not a directory\n", lcmsDir.getAbsolutePath()); HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } String plottingDirectory = cl.getOptionValue(OPTION_PLOTTING_DIR); TSVWriter<String, String> resultsWriter = new TSVWriter<>(standardIonHeaderFields); resultsWriter.open(new File(outAnalysis)); // For each chemical, create a TSV row and a corresponding diagnostic plot for (String chemicalName : chemicalNames) { List<String> graphLabels = new ArrayList<>(); List<Double> yMaxList = new ArrayList<>(); String outData = plottingDirectory + "/" + chemicalName + ".data"; String outImg = plottingDirectory + "/" + chemicalName + ".pdf"; // For each diagnostic plot, open a new file stream. try (FileOutputStream fos = new FileOutputStream(outData)) { List<StandardIonResult> getResultByChemicalName = StandardIonResult.getByChemicalName(db, chemicalName); if (getResultByChemicalName != null && getResultByChemicalName.size() > 0) { // PART 1: Get the best metlin ion across all standard ion results for a given chemical String bestGlobalMetlinIon = AnalysisHelper .scoreAndReturnBestMetlinIonFromStandardIonResults(getResultByChemicalName, new HashMap<>(), true, true); // PART 2: Plot all the graphs related to the chemical. The plots are structured as follows: // // Page 1: All graphs (water, MeOH, Yeast) for Global ion picked (best ion among ALL standard ion runs for // the given chemical) by the algorithm // Page 2: All graphs for M+H // Page 3: All graphs for Local ions picked (best ion within a SINGLE standard ion run) + negative controls // for Yeast. // // Each page is demarcated by a blank graph. // Arrange results based on media Map<String, List<StandardIonResult>> categories = StandardIonResult .categorizeListOfStandardWellsByMedia(db, getResultByChemicalName); // This set contains all the best metlin ions corresponding to all the standard ion runs. Set<String> bestLocalIons = new HashSet<>(); bestLocalIons.add(bestGlobalMetlinIon); bestLocalIons.add(DEFAULT_ION); for (StandardIonResult result : getResultByChemicalName) { bestLocalIons.add(result.getBestMetlinIon()); } // We sort the best local ions are follows: // 1) Global best ion spectra 2) M+H spectra 3) Local best ion spectra List<String> bestLocalIonsArray = new ArrayList<>(bestLocalIons); Collections.sort(bestLocalIonsArray, new Comparator<String>() { @Override public int compare(String o1, String o2) { if (o1.equals(bestGlobalMetlinIon) && !o2.equals(bestGlobalMetlinIon)) { return -1; } else if (o1.equals(DEFAULT_ION) && !o2.equals(bestGlobalMetlinIon)) { return -1; } else { return 1; } } }); // This variable stores the index of the array at which all the remaining spectra are contained in one // page. This happens right after the M+H ion spectra. Integer combineAllSpectraIntoPageThreeFromIndex = 0; for (int i = 0; i < bestLocalIonsArray.size(); i++) { if (bestLocalIonsArray.get(i).equals(DEFAULT_ION)) { combineAllSpectraIntoPageThreeFromIndex = i + 1; } } for (int i = 0; i < bestLocalIonsArray.size(); i++) { String ion = bestLocalIonsArray.get(i); for (Map.Entry<String, List<StandardIonResult>> mediaToListOfIonResults : categories .entrySet()) { for (StandardIonResult result : mediaToListOfIonResults.getValue()) { // For every standard ion result, we plot the best global metlin ion and M+H. These plots are in the // pages 1 and 2. For all page 3 (aka miscellaneous spectra), we only plot the best local ion // corresponding to it's spectra and not some other graph's spectra. In the below condition, // we reach the page 3 case with not the same best ion as the spectra, in which case we just continue // and not draw anything on the page. if (i >= combineAllSpectraIntoPageThreeFromIndex && !(result.getBestMetlinIon().equals(ion))) { continue; } StandardWell positiveWell = StandardWell.getInstance().getById(db, result.getStandardWellId()); String positiveControlChemical = positiveWell.getChemical(); ScanData<StandardWell> encapsulatedDataForPositiveControl = AnalysisHelper .getScanDataForWell(db, lcmsDir, positiveWell, positiveControlChemical, positiveControlChemical); Set<String> singletonSet = Collections.singleton(ion); String additionalInfo = generateAdditionalLabelInformation(positiveWell, result, ion); List<String> labels = AnalysisHelper .writeScanData(fos, lcmsDir, MAX_INTENSITY, encapsulatedDataForPositiveControl, false, false, singletonSet) .stream().map(label -> label + additionalInfo) .collect(Collectors.toList()); yMaxList.add(encapsulatedDataForPositiveControl.getMs1ScanResults() .getMaxIntensityForIon(ion)); List<String> negativeLabels = null; // Only do the negative control in the miscellaneous page (page 3) and if the well is in yeast media. if (mediaToListOfIonResults.getKey() .equals(StandardWell.MEDIA_TYPE.YEAST.name()) && (i >= combineAllSpectraIntoPageThreeFromIndex && (result.getBestMetlinIon().equals(ion)))) { //TODO: Change the representative negative well to one that displays the highest noise in the future. // For now, we just use the first index among the negative wells. int representativeIndex = 0; StandardWell representativeNegativeControlWell = StandardWell.getInstance() .getById(db, result.getNegativeWellIds().get(representativeIndex)); ScanData encapsulatedDataForNegativeControl = AnalysisHelper .getScanDataForWell(db, lcmsDir, representativeNegativeControlWell, positiveWell.getChemical(), representativeNegativeControlWell.getChemical()); String negativePlateAdditionalInfo = generateAdditionalLabelInformation( representativeNegativeControlWell, null, null); negativeLabels = AnalysisHelper.writeScanData(fos, lcmsDir, MAX_INTENSITY, encapsulatedDataForNegativeControl, false, false, singletonSet) .stream().map(label -> label + negativePlateAdditionalInfo) .collect(Collectors.toList()); yMaxList.add(encapsulatedDataForNegativeControl.getMs1ScanResults() .getMaxIntensityForIon(ion)); } graphLabels.addAll(labels); if (negativeLabels != null) { graphLabels.addAll(negativeLabels); } } } // Add a blank graph to demarcate pages. if (i < combineAllSpectraIntoPageThreeFromIndex) { graphLabels.addAll(AnalysisHelper.writeScanData(fos, lcmsDir, 0.0, BLANK_SCAN, false, false, new HashSet<>())); yMaxList.add(0.0d); } } // We need to pass the yMax values as an array to the Gnuplotter. Double fontScale = null; if (cl.hasOption(FONT_SCALE)) { try { fontScale = Double.parseDouble(cl.getOptionValue(FONT_SCALE)); } catch (IllegalArgumentException e) { System.err.format("Argument for font-scale must be a floating point number.\n"); System.exit(1); } } Double[] yMaxes = yMaxList.toArray(new Double[yMaxList.size()]); Gnuplotter plotter = fontScale == null ? new Gnuplotter() : new Gnuplotter(fontScale); plotter.plot2D(outData, outImg, graphLabels.toArray(new String[graphLabels.size()]), "time", null, "intensity", "pdf", null, null, yMaxes, outImg + ".gnuplot"); Map<String, String> row = new HashMap<>(); row.put(STANDARD_ION_HEADER_FIELDS.CHEMICAL.name(), chemicalName); row.put(STANDARD_ION_HEADER_FIELDS.BEST_ION_FROM_ALGO.name(), bestGlobalMetlinIon); row.put(STANDARD_ION_HEADER_FIELDS.DIAGNOSTIC_PLOTS.name(), outImg); resultsWriter.append(row); resultsWriter.flush(); } } } resultsWriter.flush(); resultsWriter.close(); } } }