List of usage examples for java.lang Float parseFloat
public static float parseFloat(String s) throws NumberFormatException
From source file:apps.LuceneIndexer.java
public static void main(String[] args) { Options options = new Options(); options.addOption("i", null, true, "input file"); options.addOption("o", null, true, "output directory"); options.addOption("r", null, true, "optional output TREC-format QREL file"); options.addOption("bm25_b", null, true, "BM25 parameter: b"); options.addOption("bm25_k1", null, true, "BM25 parameter: k1"); options.addOption("bm25fixed", null, false, "use the fixed BM25 similarity"); Joiner commaJoin = Joiner.on(','); Joiner spaceJoin = Joiner.on(' '); options.addOption("source_type", null, true, "document source type: " + commaJoin.join(SourceFactory.getDocSourceList())); // If you increase this value, you may need to modify the following line in *.sh file // export MAVEN_OPTS="-Xms8192m -server" double ramBufferSizeMB = 1024 * 8; // 8 GB CommandLineParser parser = new org.apache.commons.cli.GnuParser(); IndexWriter indexWriter = null;/*w ww . j ava2s. c om*/ BufferedWriter qrelWriter = null; int docNum = 0; try { CommandLine cmd = parser.parse(options, args); String inputFileName = null, outputDirName = null, qrelFileName = null; if (cmd.hasOption("i")) { inputFileName = cmd.getOptionValue("i"); } else { Usage("Specify 'input file'", options); } if (cmd.hasOption("o")) { outputDirName = cmd.getOptionValue("o"); } else { Usage("Specify 'index directory'", options); } if (cmd.hasOption("r")) { qrelFileName = cmd.getOptionValue("r"); } String sourceName = cmd.getOptionValue("source_type"); if (sourceName == null) Usage("Specify document source type", options); if (qrelFileName != null) qrelWriter = new BufferedWriter(new FileWriter(qrelFileName)); File outputDir = new File(outputDirName); if (!outputDir.exists()) { if (!outputDir.mkdirs()) { System.out.println("couldn't create " + outputDir.getAbsolutePath()); System.exit(1); } } if (!outputDir.isDirectory()) { System.out.println(outputDir.getAbsolutePath() + " is not a directory!"); System.exit(1); } if (!outputDir.canWrite()) { System.out.println("Can't write to " + outputDir.getAbsolutePath()); System.exit(1); } boolean useFixedBM25 = cmd.hasOption("bm25fixed"); float bm25_k1 = UtilConst.BM25_K1_DEFAULT, bm25_b = UtilConst.BM25_B_DEFAULT; if (cmd.hasOption("bm25_k1")) { try { bm25_k1 = Float.parseFloat(cmd.getOptionValue("bm25_k1")); } catch (NumberFormatException e) { Usage("Wrong format for 'bm25_k1'", options); } } if (cmd.hasOption("bm25_b")) { try { bm25_b = Float.parseFloat(cmd.getOptionValue("bm25_b")); } catch (NumberFormatException e) { Usage("Wrong format for 'bm25_b'", options); } } EnglishAnalyzer analyzer = new EnglishAnalyzer(); FSDirectory indexDir = FSDirectory.open(Paths.get(outputDirName)); IndexWriterConfig indexConf = new IndexWriterConfig(analyzer); /* OpenMode.CREATE creates a new index or overwrites an existing one. https://lucene.apache.org/core/6_0_0/core/org/apache/lucene/index/IndexWriterConfig.OpenMode.html#CREATE */ indexConf.setOpenMode(OpenMode.CREATE); indexConf.setRAMBufferSizeMB(ramBufferSizeMB); System.out.println(String.format("BM25 parameters k1=%f b=%f ", bm25_k1, bm25_b)); if (useFixedBM25) { System.out.println(String.format("Using fixed BM25Simlarity, k1=%f b=%f", bm25_k1, bm25_b)); indexConf.setSimilarity(new BM25SimilarityFix(bm25_k1, bm25_b)); } else { System.out.println(String.format("Using Lucene BM25Similarity, k1=%f b=%f", bm25_k1, bm25_b)); indexConf.setSimilarity(new BM25Similarity(bm25_k1, bm25_b)); } indexWriter = new IndexWriter(indexDir, indexConf); DocumentSource inpDocSource = SourceFactory.createDocumentSource(sourceName, inputFileName); DocumentEntry inpDoc = null; TextCleaner textCleaner = new TextCleaner(null); while ((inpDoc = inpDocSource.next()) != null) { ++docNum; Document luceneDoc = new Document(); ArrayList<String> cleanedToks = textCleaner.cleanUp(inpDoc.mDocText); String cleanText = spaceJoin.join(cleanedToks); // System.out.println(inpDoc.mDocId); // System.out.println(cleanText); // System.out.println("=============================="); luceneDoc.add(new StringField(UtilConst.FIELD_ID, inpDoc.mDocId, Field.Store.YES)); luceneDoc.add(new TextField(UtilConst.FIELD_TEXT, cleanText, Field.Store.YES)); indexWriter.addDocument(luceneDoc); if (inpDoc.mIsRel != null && qrelWriter != null) { saveQrelOneEntry(qrelWriter, inpDoc.mQueryId, inpDoc.mDocId, inpDoc.mIsRel ? MAX_GRADE : 0); } if (docNum % 1000 == 0) System.out.println(String.format("Indexed %d documents", docNum)); } } catch (ParseException e) { e.printStackTrace(); Usage("Cannot parse arguments" + e, options); } catch (Exception e) { System.err.println("Terminating due to an exception: " + e); System.exit(1); } finally { System.out.println(String.format("Indexed %d documents", docNum)); try { if (null != indexWriter) indexWriter.close(); if (null != qrelWriter) qrelWriter.close(); } catch (IOException e) { System.err.println("IO exception: " + e); e.printStackTrace(); } } }
From source file:CommandLineInterpreter.java
/** * Main method, command line input will get parsed here. * * @param args// www . j a v a 2s . c o m */ public static void main(String[] args) { // // test-arguments: // args = new String[] { "1.9", "-gui" }; boolean success = false; if (args.length == 1) { String arg = args[0].trim().replaceAll("[-]+", ""); if (arg.equals("help") || arg.equals("h")) printHelp(null); } if (args.length == 0) { printHelp("ONE ARGUMENT NEEDED"); } else { try { boolean guiAlert = false; Float minVersion = null; File resourcesFile = null; // ------------------------------------------------ // // -- // ------------------------------------------------ // final String minJavaVersionArgument = args[0]; if (!minJavaVersionArgument.trim().isEmpty()) { try { minVersion = Float.parseFloat(minJavaVersionArgument); } catch (Exception e) { // do nothing } } if (minVersion == null || minVersion > 2 || minVersion < 1.6) { printHelp("VERSION STRING IS NOT VALID"); } // ------------------------------------------------ // // -- // ------------------------------------------------ // for (int i = 1; i < (args.length <= 3 ? args.length : 3); i++) { final String argument = args[i].trim(); if (argument.equals("-gui")) { guiAlert = true; } else { String resourcesFilePath = argument; if (!resourcesFilePath.isEmpty()) { resourcesFile = new File(resourcesFilePath); if (!resourcesFile.exists() || !resourcesFile.isFile() || !resourcesFile.canRead()) { printHelp("RESOURCES FILE IS NOT VALID\n[" + resourcesFile.getAbsolutePath() + "]"); } } } } // ------------------------------------------------ // // -- // ------------------------------------------------ // success = checkJREVersion(minVersion, guiAlert); if (success && resourcesFile != null) { success = checkResources(resourcesFile, guiAlert); } } catch (Exception e) { success = false; e.printStackTrace(); } } if (!success) { // set error exit code System.exit(1); } }
From source file:DIA_Umpire_Quant.DIA_Umpire_LCMSIDGen.java
/** * @param args the command line arguments *//* w ww .j a va 2 s.c o m*/ public static void main(String[] args) throws FileNotFoundException, IOException, Exception { System.out.println( "================================================================================================="); System.out.println("DIA-Umpire LCMSID geneartor (version: " + UmpireInfo.GetInstance().Version + ")"); if (args.length != 1) { System.out.println( "command format error, the correct format should be: java -jar -Xmx10G DIA_Umpire_LCMSIDGen.jar diaumpire_module.params"); return; } try { ConsoleLogger.SetConsoleLogger(Level.INFO); ConsoleLogger.SetFileLogger(Level.DEBUG, FilenameUtils.getFullPath(args[0]) + "diaumpire_lcmsidgen.log"); } catch (Exception e) { } Logger.getRootLogger().info("Version: " + UmpireInfo.GetInstance().Version); Logger.getRootLogger().info("Parameter file:" + args[0]); BufferedReader reader = new BufferedReader(new FileReader(args[0])); String line = ""; String WorkFolder = ""; int NoCPUs = 2; TandemParam tandemPara = new TandemParam(DBSearchParam.SearchInstrumentType.TOF5600); HashMap<String, File> AssignFiles = new HashMap<>(); //<editor-fold defaultstate="collapsed" desc="Reading parameter file"> while ((line = reader.readLine()) != null) { line = line.trim(); Logger.getRootLogger().info(line); if (!"".equals(line) && !line.startsWith("#")) { //System.out.println(line); if (line.equals("==File list begin")) { do { line = reader.readLine(); line = line.trim(); if (line.equals("==File list end")) { continue; } else if (!"".equals(line)) { File newfile = new File(line); if (newfile.exists()) { AssignFiles.put(newfile.getAbsolutePath(), newfile); } else { Logger.getRootLogger().info("File: " + newfile + " does not exist."); } } } while (!line.equals("==File list end")); } if (line.split("=").length < 2) { continue; } String type = line.split("=")[0].trim(); String value = line.split("=")[1].trim(); switch (type) { case "Path": { WorkFolder = value; break; } case "path": { WorkFolder = value; break; } case "Thread": { NoCPUs = Integer.parseInt(value); break; } case "DecoyPrefix": { if (!"".equals(value)) { tandemPara.DecoyPrefix = value; } break; } case "PeptideFDR": { tandemPara.PepFDR = Float.parseFloat(value); break; } } } } //</editor-fold> //Initialize PTM manager using compomics library PTMManager.GetInstance(); //Generate DIA file list ArrayList<DIAPack> FileList = new ArrayList<>(); File folder = new File(WorkFolder); if (!folder.exists()) { Logger.getRootLogger().info("The path : " + WorkFolder + " cannot be found."); System.exit(1); } for (final File fileEntry : folder.listFiles()) { if (fileEntry.isFile() && (fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzxml") | fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzml")) && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q1.mzxml") && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q2.mzxml") && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) { AssignFiles.put(fileEntry.getAbsolutePath(), fileEntry); } if (fileEntry.isDirectory()) { for (final File fileEntry2 : fileEntry.listFiles()) { if (fileEntry2.isFile() && (fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzxml") | fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzml")) && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q1.mzxml") && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q2.mzxml") && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) { AssignFiles.put(fileEntry2.getAbsolutePath(), fileEntry2); } } } } Logger.getRootLogger().info("No. of files assigned :" + AssignFiles.size()); for (File fileEntry : AssignFiles.values()) { Logger.getRootLogger().info(fileEntry.getAbsolutePath()); } //process each DIA file to genearate untargeted identifications for (File fileEntry : AssignFiles.values()) { String mzXMLFile = fileEntry.getAbsolutePath(); if (mzXMLFile.toLowerCase().endsWith(".mzxml") | mzXMLFile.toLowerCase().endsWith(".mzml")) { long time = System.currentTimeMillis(); DIAPack DiaFile = new DIAPack(mzXMLFile, NoCPUs); FileList.add(DiaFile); Logger.getRootLogger().info( "================================================================================================="); Logger.getRootLogger().info("Processing " + mzXMLFile); if (!DiaFile.LoadDIASetting()) { Logger.getRootLogger().info("Loading DIA setting failed, job is incomplete"); System.exit(1); } if (!DiaFile.LoadParams()) { Logger.getRootLogger().info("Loading parameters failed, job is incomplete"); System.exit(1); } Logger.getRootLogger().info("Loading identification results " + mzXMLFile + "...."); DiaFile.ParsePepXML(tandemPara, null); DiaFile.BuildStructure(); if (!DiaFile.MS1FeatureMap.ReadPeakCluster()) { Logger.getRootLogger().info("Loading peak and structure failed, job is incomplete"); System.exit(1); } DiaFile.MS1FeatureMap.ClearMonoisotopicPeakOfCluster(); //Generate mapping between index of precursor feature and pseudo MS/MS scan index DiaFile.GenerateClusterScanNomapping(); //Doing quantification DiaFile.AssignQuant(); DiaFile.ClearStructure(); DiaFile.IDsummary.ReduceMemoryUsage(); time = System.currentTimeMillis() - time; Logger.getRootLogger().info(mzXMLFile + " processed time:" + String.format("%d hour, %d min, %d sec", TimeUnit.MILLISECONDS.toHours(time), TimeUnit.MILLISECONDS.toMinutes(time) - TimeUnit.HOURS.toMinutes(TimeUnit.MILLISECONDS.toHours(time)), TimeUnit.MILLISECONDS.toSeconds(time) - TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(time)))); } Logger.getRootLogger().info("Job done"); Logger.getRootLogger().info( "================================================================================================="); } }
From source file:DIA_Umpire_SE.DIA_Umpire_SE.java
/** * @param args the command line arguments DIA_Umpire parameterfile *//*from w w w . ja v a2 s. c o m*/ public static void main(String[] args) throws InterruptedException, FileNotFoundException, ExecutionException, IOException, ParserConfigurationException, DataFormatException, SAXException, Exception { System.out.println( "================================================================================================="); System.out.println( "DIA-Umpire singal extraction analysis (version: " + UmpireInfo.GetInstance().Version + ")"); if (args.length < 2 || args.length > 3) { System.out.println( "command format error, the correct format is: java -jar -Xmx8G DIA_Umpire_SE.jar mzMXL_file diaumpire_se.params"); System.out.println( "To fix DIA setting, use : java -jar -Xmx8G DIA_Umpire_SE.jar mzMXL_file diaumpire_se.params -f"); return; } try { //Define logger level for console ConsoleLogger.SetConsoleLogger(Level.INFO); //Define logger level and file path for text log file ConsoleLogger.SetFileLogger(Level.DEBUG, FilenameUtils.getFullPath(args[0]) + "diaumpire_se.log"); } catch (Exception e) { } boolean Fix = false; boolean Resume = false; if (args.length == 3 && args[2].equals("-f")) { Fix = true; } String parameterfile = args[1]; String MSFilePath = args[0]; Logger.getRootLogger().info("Version: " + UmpireInfo.GetInstance().Version); Logger.getRootLogger().info("Parameter file:" + parameterfile); Logger.getRootLogger().info("Spectra file:" + MSFilePath); BufferedReader reader = new BufferedReader(new FileReader(parameterfile)); String line = ""; InstrumentParameter param = new InstrumentParameter(InstrumentParameter.InstrumentType.TOF5600); param.DetermineBGByID = false; param.EstimateBG = true; int NoCPUs = 2; SpectralDataType.DataType dataType = SpectralDataType.DataType.DIA_F_Window; String WindowType = ""; int WindowSize = 25; ArrayList<XYData> WindowList = new ArrayList<>(); boolean ExportPrecursorPeak = false; boolean ExportFragmentPeak = false; //<editor-fold defaultstate="collapsed" desc="Read parameter file"> while ((line = reader.readLine()) != null) { Logger.getRootLogger().info(line); if (!"".equals(line) && !line.startsWith("#")) { //System.out.println(line); if (line.equals("==window setting begin")) { while (!(line = reader.readLine()).equals("==window setting end")) { if (!"".equals(line)) { WindowList.add(new XYData(Float.parseFloat(line.split("\t")[0]), Float.parseFloat(line.split("\t")[1]))); } } continue; } if (line.split("=").length < 2) { continue; } String type = line.split("=")[0].trim(); if (type.startsWith("para.")) { type = type.replace("para.", "SE."); } String value = line.split("=")[1].trim(); switch (type) { case "Thread": { NoCPUs = Integer.parseInt(value); break; } case "ExportPrecursorPeak": { ExportPrecursorPeak = Boolean.parseBoolean(value); break; } case "ExportFragmentPeak": { ExportFragmentPeak = Boolean.parseBoolean(value); break; } //<editor-fold defaultstate="collapsed" desc="instrument parameters"> case "RPmax": { param.PrecursorRank = Integer.parseInt(value); break; } case "RFmax": { param.FragmentRank = Integer.parseInt(value); break; } case "CorrThreshold": { param.CorrThreshold = Float.parseFloat(value); break; } case "DeltaApex": { param.ApexDelta = Float.parseFloat(value); break; } case "RTOverlap": { param.RTOverlapThreshold = Float.parseFloat(value); break; } case "BoostComplementaryIon": { param.BoostComplementaryIon = Boolean.parseBoolean(value); break; } case "AdjustFragIntensity": { param.AdjustFragIntensity = Boolean.parseBoolean(value); break; } case "SE.MS1PPM": { param.MS1PPM = Float.parseFloat(value); break; } case "SE.MS2PPM": { param.MS2PPM = Float.parseFloat(value); break; } case "SE.SN": { param.SNThreshold = Float.parseFloat(value); break; } case "SE.MS2SN": { param.MS2SNThreshold = Float.parseFloat(value); break; } case "SE.MinMSIntensity": { param.MinMSIntensity = Float.parseFloat(value); break; } case "SE.MinMSMSIntensity": { param.MinMSMSIntensity = Float.parseFloat(value); break; } case "SE.MinRTRange": { param.MinRTRange = Float.parseFloat(value); break; } case "SE.MaxNoPeakCluster": { param.MaxNoPeakCluster = Integer.parseInt(value); param.MaxMS2NoPeakCluster = Integer.parseInt(value); break; } case "SE.MinNoPeakCluster": { param.MinNoPeakCluster = Integer.parseInt(value); param.MinMS2NoPeakCluster = Integer.parseInt(value); break; } case "SE.MinMS2NoPeakCluster": { param.MinMS2NoPeakCluster = Integer.parseInt(value); break; } case "SE.MaxCurveRTRange": { param.MaxCurveRTRange = Float.parseFloat(value); break; } case "SE.Resolution": { param.Resolution = Integer.parseInt(value); break; } case "SE.RTtol": { param.RTtol = Float.parseFloat(value); break; } case "SE.NoPeakPerMin": { param.NoPeakPerMin = Integer.parseInt(value); break; } case "SE.StartCharge": { param.StartCharge = Integer.parseInt(value); break; } case "SE.EndCharge": { param.EndCharge = Integer.parseInt(value); break; } case "SE.MS2StartCharge": { param.MS2StartCharge = Integer.parseInt(value); break; } case "SE.MS2EndCharge": { param.MS2EndCharge = Integer.parseInt(value); break; } case "SE.NoMissedScan": { param.NoMissedScan = Integer.parseInt(value); break; } case "SE.Denoise": { param.Denoise = Boolean.valueOf(value); break; } case "SE.EstimateBG": { param.EstimateBG = Boolean.valueOf(value); break; } case "SE.RemoveGroupedPeaks": { param.RemoveGroupedPeaks = Boolean.valueOf(value); break; } case "SE.MinFrag": { param.MinFrag = Integer.parseInt(value); break; } case "SE.IsoPattern": { param.IsoPattern = Float.valueOf(value); break; } case "SE.StartRT": { param.startRT = Float.valueOf(value); break; } case "SE.EndRT": { param.endRT = Float.valueOf(value); break; } case "SE.RemoveGroupedPeaksRTOverlap": { param.RemoveGroupedPeaksRTOverlap = Float.valueOf(value); break; } case "SE.RemoveGroupedPeaksCorr": { param.RemoveGroupedPeaksCorr = Float.valueOf(value); break; } case "SE.MinMZ": { param.MinMZ = Float.valueOf(value); break; } case "SE.MinPrecursorMass": { param.MinPrecursorMass = Float.valueOf(value); break; } case "SE.MaxPrecursorMass": { param.MaxPrecursorMass = Float.valueOf(value); break; } case "SE.IsoCorrThreshold": { param.IsoCorrThreshold = Float.valueOf(value); break; } case "SE.MassDefectFilter": { param.MassDefectFilter = Boolean.parseBoolean(value); break; } case "SE.MassDefectOffset": { param.MassDefectOffset = Float.valueOf(value); break; } //</editor-fold>//</editor-fold> case "WindowType": { WindowType = value; switch (WindowType) { case "SWATH": { dataType = SpectralDataType.DataType.DIA_F_Window; break; } case "V_SWATH": { dataType = SpectralDataType.DataType.DIA_V_Window; break; } case "MSX": { dataType = SpectralDataType.DataType.MSX; break; } case "MSE": { dataType = SpectralDataType.DataType.MSe; break; } } break; } case "WindowSize": { WindowSize = Integer.parseInt(value); break; } } } } //</editor-fold> try { File MSFile = new File(MSFilePath); if (MSFile.exists()) { long time = System.currentTimeMillis(); Logger.getRootLogger().info( "================================================================================================="); Logger.getRootLogger().info("Processing " + MSFilePath + "...."); //Initialize a DIA file data structure DIAPack DiaFile = new DIAPack(MSFile.getAbsolutePath(), NoCPUs); DiaFile.Resume = Resume; DiaFile.SetDataType(dataType); DiaFile.SetParameter(param); //Set DIA isolation window setting if (dataType == SpectralDataType.DataType.DIA_F_Window) { DiaFile.SetWindowSize(WindowSize); } else if (dataType == SpectralDataType.DataType.DIA_V_Window) { for (XYData window : WindowList) { DiaFile.AddVariableWindow(window); } } DiaFile.SaveDIASetting(); DiaFile.SaveParams(); if (Fix) { DiaFile.FixScanidx(); return; } DiaFile.ExportPrecursorPeak = ExportPrecursorPeak; DiaFile.ExportFragmentPeak = ExportFragmentPeak; Logger.getRootLogger().info("Module A: Signal extraction"); //Start DIA signal extraction process to generate pseudo MS/MS files DiaFile.process(); time = System.currentTimeMillis() - time; Logger.getRootLogger().info(MSFilePath + " processed time:" + String.format("%d hour, %d min, %d sec", TimeUnit.MILLISECONDS.toHours(time), TimeUnit.MILLISECONDS.toMinutes(time) - TimeUnit.HOURS.toMinutes(TimeUnit.MILLISECONDS.toHours(time)), TimeUnit.MILLISECONDS.toSeconds(time) - TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(time)))); } else { throw new RuntimeException("file: " + MSFile + "? does not exist!"); } Logger.getRootLogger().info("Job complete"); Logger.getRootLogger().info( "================================================================================================="); } catch (Exception e) { Logger.getRootLogger().error(ExceptionUtils.getStackTrace(e)); throw e; } }
From source file:edu.msu.cme.rdp.multicompare.Reprocess.java
/** * This class reprocesses the classification results (allrank output) and print out hierarchy output file, based on the confidence cutoff; * and print out only the detail classification results with assignment at certain rank with confidence above the cutoff or/and matching a given taxon. * @param args// ww w. ja v a 2 s . c o m * @throws Exception */ public static void main(String[] args) throws Exception { PrintWriter assign_out = new PrintWriter(new NullWriter()); float conf = 0.8f; PrintStream heir_out = null; String hier_out_filename = null; ClassificationResultFormatter.FORMAT format = ClassificationResultFormatter.FORMAT.allRank; String rank = null; String taxonFilterFile = null; String train_propfile = null; String gene = null; List<MCSample> samples = new ArrayList(); try { CommandLine line = new PosixParser().parse(options, args); if (line.hasOption(CmdOptions.HIER_OUTFILE_SHORT_OPT)) { hier_out_filename = line.getOptionValue(CmdOptions.HIER_OUTFILE_SHORT_OPT); heir_out = new PrintStream(hier_out_filename); } else { throw new Exception( "It make sense to provide output filename for " + CmdOptions.HIER_OUTFILE_LONG_OPT); } if (line.hasOption(CmdOptions.OUTFILE_SHORT_OPT)) { assign_out = new PrintWriter(line.getOptionValue(CmdOptions.OUTFILE_SHORT_OPT)); } if (line.hasOption(CmdOptions.RANK_SHORT_OPT)) { rank = line.getOptionValue(CmdOptions.RANK_SHORT_OPT); } if (line.hasOption(CmdOptions.TAXON_SHORT_OPT)) { taxonFilterFile = line.getOptionValue(CmdOptions.TAXON_SHORT_OPT); } if (line.hasOption(CmdOptions.BOOTSTRAP_SHORT_OPT)) { conf = Float.parseFloat(line.getOptionValue(CmdOptions.BOOTSTRAP_SHORT_OPT)); if (conf < 0 || conf > 1) { throw new IllegalArgumentException("Confidence must be in the range [0,1]"); } } if (line.hasOption(CmdOptions.FORMAT_SHORT_OPT)) { String f = line.getOptionValue(CmdOptions.FORMAT_SHORT_OPT); if (f.equalsIgnoreCase("allrank")) { format = ClassificationResultFormatter.FORMAT.allRank; } else if (f.equalsIgnoreCase("fixrank")) { format = ClassificationResultFormatter.FORMAT.fixRank; } else if (f.equalsIgnoreCase("db")) { format = ClassificationResultFormatter.FORMAT.dbformat; } else if (f.equalsIgnoreCase("filterbyconf")) { format = ClassificationResultFormatter.FORMAT.filterbyconf; } else { throw new IllegalArgumentException( "Not valid output format, only allrank, fixrank, filterbyconf and db allowed"); } } if (line.hasOption(CmdOptions.TRAINPROPFILE_SHORT_OPT)) { if (gene != null) { throw new IllegalArgumentException( "Already specified the gene from the default location. Can not specify train_propfile"); } else { train_propfile = line.getOptionValue(CmdOptions.TRAINPROPFILE_SHORT_OPT); } } if (line.hasOption(CmdOptions.GENE_SHORT_OPT)) { if (train_propfile != null) { throw new IllegalArgumentException( "Already specified train_propfile. Can not specify gene any more"); } gene = line.getOptionValue(CmdOptions.GENE_SHORT_OPT).toLowerCase(); if (!gene.equals(ClassifierFactory.RRNA_16S_GENE) && !gene.equals(ClassifierFactory.FUNGALLSU_GENE) && !gene.equals(ClassifierFactory.FUNGALITS_warcup_GENE) && !gene.equals(ClassifierFactory.FUNGALITS_unite_GENE)) { throw new IllegalArgumentException(gene + " is NOT valid, only allows " + ClassifierFactory.RRNA_16S_GENE + ", " + ClassifierFactory.FUNGALLSU_GENE + ", " + ClassifierFactory.FUNGALITS_warcup_GENE + " and " + ClassifierFactory.FUNGALITS_unite_GENE); } } args = line.getArgs(); if (args.length < 1) { throw new Exception("Incorrect number of command line arguments"); } for (String arg : args) { String[] inFileNames = arg.split(","); String inputFile = inFileNames[0]; File idmappingFile = null; if (inFileNames.length == 2) { idmappingFile = new File(inFileNames[1]); if (!idmappingFile.exists()) { System.err.println("Failed to find input file \"" + inFileNames[1] + "\""); return; } } MCSample nextSample = new MCSampleResult(inputFile, idmappingFile); samples.add(nextSample); } } catch (Exception e) { System.out.println("Command Error: " + e.getMessage()); new HelpFormatter().printHelp(120, "Reprocess [options] <Classification_allrank_result>[,idmappingfile] ...", "", options, ""); return; } if (train_propfile == null && gene == null) { gene = ClassifierFactory.RRNA_16S_GENE; } HashSet<String> taxonFilter = null; if (taxonFilterFile != null) { taxonFilter = readTaxonFilterFile(taxonFilterFile); } MultiClassifier multiClassifier = new MultiClassifier(train_propfile, gene); DefaultPrintVisitor printVisitor = new DefaultPrintVisitor(heir_out, samples); MultiClassifierResult result = multiClassifier.multiClassificationParser(samples, conf, assign_out, format, rank, taxonFilter); result.getRoot().topDownVisit(printVisitor); assign_out.close(); heir_out.close(); if (multiClassifier.hasCopyNumber()) { // print copy number corrected counts File cn_corrected_s = new File(new File(hier_out_filename).getParentFile(), "cncorrected_" + hier_out_filename); PrintStream cn_corrected_hier_out = new PrintStream(cn_corrected_s); printVisitor = new DefaultPrintVisitor(cn_corrected_hier_out, samples, true); result.getRoot().topDownVisit(printVisitor); cn_corrected_hier_out.close(); } }
From source file:apps.quantification.LearnQuantificationSVMLight.java
public static void main(String[] args) throws IOException { String cmdLineSyntax = LearnQuantificationSVMLight.class.getName() + " [OPTIONS] <path to svm_light_learn> <path to svm_light_classify> <trainingIndexDirectory> <outputDirectory>"; Options options = new Options(); OptionBuilder.withArgName("f"); OptionBuilder.withDescription("Number of folds"); OptionBuilder.withLongOpt("f"); OptionBuilder.isRequired(true);/*ww w . j a va 2 s. c om*/ OptionBuilder.hasArg(); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("c"); OptionBuilder.withDescription("The c value for svm_light (default 1)"); OptionBuilder.withLongOpt("c"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("k"); OptionBuilder.withDescription("Kernel type (default 0: linear, 1: polynomial, 2: RBF, 3: sigmoid)"); OptionBuilder.withLongOpt("k"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("t"); OptionBuilder.withDescription("Path for temporary files"); OptionBuilder.withLongOpt("t"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("v"); OptionBuilder.withDescription("Verbose output"); OptionBuilder.withLongOpt("v"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(false); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("s"); OptionBuilder.withDescription("Don't delete temporary training file in svm_light format (default: delete)"); OptionBuilder.withLongOpt("s"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(false); options.addOption(OptionBuilder.create()); SvmLightLearnerCustomizer classificationLearnerCustomizer = null; SvmLightClassifierCustomizer classificationCustomizer = null; int folds = -1; GnuParser parser = new GnuParser(); String[] remainingArgs = null; try { CommandLine line = parser.parse(options, args); remainingArgs = line.getArgs(); classificationLearnerCustomizer = new SvmLightLearnerCustomizer(remainingArgs[0]); classificationCustomizer = new SvmLightClassifierCustomizer(remainingArgs[1]); folds = Integer.parseInt(line.getOptionValue("f")); if (line.hasOption("c")) classificationLearnerCustomizer.setC(Float.parseFloat(line.getOptionValue("c"))); if (line.hasOption("k")) { System.out.println("Kernel type: " + line.getOptionValue("k")); classificationLearnerCustomizer.setKernelType(Integer.parseInt(line.getOptionValue("k"))); } if (line.hasOption("v")) classificationLearnerCustomizer.printSvmLightOutput(true); if (line.hasOption("s")) classificationLearnerCustomizer.setDeleteTrainingFiles(false); if (line.hasOption("t")) { classificationLearnerCustomizer.setTempPath(line.getOptionValue("t")); classificationCustomizer.setTempPath(line.getOptionValue("t")); } } catch (Exception exp) { System.err.println("Parsing failed. Reason: " + exp.getMessage()); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(cmdLineSyntax, options); System.exit(-1); } assert (classificationLearnerCustomizer != null); if (remainingArgs.length != 4) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(cmdLineSyntax, options); System.exit(-1); } String indexFile = remainingArgs[2]; File file = new File(indexFile); String indexName = file.getName(); String indexPath = file.getParent(); String outputPath = remainingArgs[3]; SvmLightLearner classificationLearner = new SvmLightLearner(); classificationLearner.setRuntimeCustomizer(classificationLearnerCustomizer); FileSystemStorageManager fssm = new FileSystemStorageManager(indexPath, false); fssm.open(); IIndex training = TroveReadWriteHelper.readIndex(fssm, indexName, TroveContentDBType.Full, TroveClassificationDBType.Full); final TextualProgressBar progressBar = new TextualProgressBar("Learning the quantifiers"); IOperationStatusListener status = new IOperationStatusListener() { @Override public void operationStatus(double percentage) { progressBar.signal((int) percentage); } }; QuantificationLearner quantificationLearner = new QuantificationLearner(folds, classificationLearner, classificationLearnerCustomizer, classificationCustomizer, ClassificationMode.PER_CATEGORY, new LogisticFunction(), status); IQuantifier[] quantifiers = quantificationLearner.learn(training); File executableFile = new File(classificationLearnerCustomizer.getSvmLightLearnPath()); IDataManager classifierDataManager = new SvmLightDataManager(new SvmLightClassifierCustomizer( executableFile.getParentFile().getAbsolutePath() + Os.pathSeparator() + "svm_light_classify")); String description = "_SVMLight_C-" + classificationLearnerCustomizer.getC() + "_K-" + classificationLearnerCustomizer.getKernelType(); if (classificationLearnerCustomizer.getAdditionalParameters().length() > 0) description += "_" + classificationLearnerCustomizer.getAdditionalParameters(); String quantifierPrefix = indexName + "_Quantifier-" + folds + description; FileSystemStorageManager fssmo = new FileSystemStorageManager( outputPath + File.separatorChar + quantifierPrefix, true); fssmo.open(); QuantificationLearner.write(quantifiers, fssmo, classifierDataManager); fssmo.close(); BufferedWriter bfs = new BufferedWriter( new FileWriter(outputPath + File.separatorChar + quantifierPrefix + "_rates.txt")); TShortDoubleHashMap simpleTPRs = quantificationLearner.getSimpleTPRs(); TShortDoubleHashMap simpleFPRs = quantificationLearner.getSimpleFPRs(); TShortDoubleHashMap scaledTPRs = quantificationLearner.getScaledTPRs(); TShortDoubleHashMap scaledFPRs = quantificationLearner.getScaledFPRs(); ContingencyTableSet contingencyTableSet = quantificationLearner.getContingencyTableSet(); short[] cats = simpleTPRs.keys(); for (int i = 0; i < cats.length; ++i) { short cat = cats[i]; String catName = training.getCategoryDB().getCategoryName(cat); ContingencyTable contingencyTable = contingencyTableSet.getCategoryContingencyTable(cat); double simpleTPR = simpleTPRs.get(cat); double simpleFPR = simpleFPRs.get(cat); double scaledTPR = scaledTPRs.get(cat); double scaledFPR = scaledFPRs.get(cat); String line = quantifierPrefix + "\ttrain\tsimple\t" + catName + "\t" + cat + "\t" + contingencyTable.tp() + "\t" + contingencyTable.fp() + "\t" + contingencyTable.fn() + "\t" + contingencyTable.tn() + "\t" + simpleTPR + "\t" + simpleFPR + "\n"; bfs.write(line); line = quantifierPrefix + "\ttrain\tscaled\t" + catName + "\t" + cat + "\t" + contingencyTable.tp() + "\t" + contingencyTable.fp() + "\t" + contingencyTable.fn() + "\t" + contingencyTable.tn() + "\t" + scaledTPR + "\t" + scaledFPR + "\n"; bfs.write(line); } bfs.close(); }
From source file:ms1quant.MS1Quant.java
/** * @param args the command line arguments MS1Quant parameterfile *//*w w w . j a v a 2 s. c o m*/ public static void main(String[] args) throws Exception { BufferedReader reader = null; try { System.out.println( "================================================================================================="); System.out.println("Umpire MS1 quantification and feature detection analysis (version: " + UmpireInfo.GetInstance().Version + ")"); if (args.length < 3 || !args[1].startsWith("-mode")) { System.out .println("command : java -jar -Xmx10G MS1Quant.jar ms1quant.params -mode[1 or 2] [Option]"); System.out.println("\n-mode"); System.out.println("\t1:Single file mode--> mzXML_file PepXML_file"); System.out.println("\t\tEx: -mode1 file1.mzXML file1.pep.xml"); System.out.println( "\t2:Folder mode--> mzXML_Folder PepXML_Folder, all generated csv tables will be merged into a single csv file"); System.out.println("\t\tEx: -mode2 /data/mzxml/ /data/pepxml/"); System.out.println("\nOptions"); System.out.println( "\t-C\tNo of concurrent files to be processed (only for folder mode), Ex. -C5, default:1"); System.out.println("\t-p\tMinimum probability, Ex. -p0.9, default:0.9"); System.out.println("\t-ID\tDetect identified feature only"); System.out.println("\t-O\toutput folder, Ex. -O/data/"); return; } ConsoleLogger consoleLogger = new ConsoleLogger(); consoleLogger.SetConsoleLogger(Level.DEBUG); consoleLogger.SetFileLogger(Level.DEBUG, FilenameUtils.getFullPath(args[0]) + "ms1quant_debug.log"); Logger logger = Logger.getRootLogger(); logger.debug("Command: " + Arrays.toString(args)); logger.info("MS1Quant version: " + UmpireInfo.GetInstance().Version); String parameterfile = args[0]; logger.info("Parameter file: " + parameterfile); File paramfile = new File(parameterfile); if (!paramfile.exists()) { logger.error("Parameter file " + paramfile.getAbsolutePath() + " cannot be found. The program will exit."); } reader = new BufferedReader(new FileReader(paramfile.getAbsolutePath())); String line = ""; InstrumentParameter param = new InstrumentParameter(InstrumentParameter.InstrumentType.TOF5600); int NoCPUs = 2; int NoFile = 1; param.DetermineBGByID = false; param.EstimateBG = true; //<editor-fold defaultstate="collapsed" desc="Read parameter file"> while ((line = reader.readLine()) != null) { if (!"".equals(line) && !line.startsWith("#")) { logger.info(line); //System.out.println(line); if (line.split("=").length < 2) { continue; } if (line.split("=").length < 2) { continue; } String type = line.split("=")[0].trim(); if (type.startsWith("para.")) { type = type.replace("para.", "SE."); } String value = line.split("=")[1].trim(); switch (type) { case "Thread": { NoCPUs = Integer.parseInt(value); break; } //<editor-fold defaultstate="collapsed" desc="instrument parameters"> case "SE.MS1PPM": { param.MS1PPM = Float.parseFloat(value); break; } case "SE.MS2PPM": { param.MS2PPM = Float.parseFloat(value); break; } case "SE.SN": { param.SNThreshold = Float.parseFloat(value); break; } case "SE.MS2SN": { param.MS2SNThreshold = Float.parseFloat(value); break; } case "SE.MinMSIntensity": { param.MinMSIntensity = Float.parseFloat(value); break; } case "SE.MinMSMSIntensity": { param.MinMSMSIntensity = Float.parseFloat(value); break; } case "SE.MinRTRange": { param.MinRTRange = Float.parseFloat(value); break; } case "SE.MaxNoPeakCluster": { param.MaxNoPeakCluster = Integer.parseInt(value); param.MaxMS2NoPeakCluster = Integer.parseInt(value); break; } case "SE.MinNoPeakCluster": { param.MinNoPeakCluster = Integer.parseInt(value); param.MinMS2NoPeakCluster = Integer.parseInt(value); break; } case "SE.MinMS2NoPeakCluster": { param.MinMS2NoPeakCluster = Integer.parseInt(value); break; } case "SE.MaxCurveRTRange": { param.MaxCurveRTRange = Float.parseFloat(value); break; } case "SE.Resolution": { param.Resolution = Integer.parseInt(value); break; } case "SE.RTtol": { param.RTtol = Float.parseFloat(value); break; } case "SE.NoPeakPerMin": { param.NoPeakPerMin = Integer.parseInt(value); break; } case "SE.StartCharge": { param.StartCharge = Integer.parseInt(value); break; } case "SE.EndCharge": { param.EndCharge = Integer.parseInt(value); break; } case "SE.MS2StartCharge": { param.MS2StartCharge = Integer.parseInt(value); break; } case "SE.MS2EndCharge": { param.MS2EndCharge = Integer.parseInt(value); break; } case "SE.NoMissedScan": { param.NoMissedScan = Integer.parseInt(value); break; } case "SE.Denoise": { param.Denoise = Boolean.valueOf(value); break; } case "SE.EstimateBG": { param.EstimateBG = Boolean.valueOf(value); break; } case "SE.RemoveGroupedPeaks": { param.RemoveGroupedPeaks = Boolean.valueOf(value); break; } case "SE.MinFrag": { param.MinFrag = Integer.parseInt(value); break; } case "SE.IsoPattern": { param.IsoPattern = Float.valueOf(value); break; } case "SE.StartRT": { param.startRT = Float.valueOf(value); } case "SE.EndRT": { param.endRT = Float.valueOf(value); } //</editor-fold> } } } //</editor-fold> int mode = 1; if (args[1].equals("-mode2")) { mode = 2; } else if (args[1].equals("-mode1")) { mode = 1; } else { logger.error("-mode number not recongized. The program will exit."); } String mzXML = ""; String pepXML = ""; String mzXMLPath = ""; String pepXMLPath = ""; File mzXMLfile = null; File pepXMLfile = null; File mzXMLfolder = null; File pepXMLfolder = null; int idx = 0; if (mode == 1) { mzXML = args[2]; logger.info("Mode1 mzXML file: " + mzXML); mzXMLfile = new File(mzXML); if (!mzXMLfile.exists()) { logger.error("Mode1 mzXML file " + mzXMLfile.getAbsolutePath() + " cannot be found. The program will exit."); return; } pepXML = args[3]; logger.info("Mode1 pepXML file: " + pepXML); pepXMLfile = new File(pepXML); if (!pepXMLfile.exists()) { logger.error("Mode1 pepXML file " + pepXMLfile.getAbsolutePath() + " cannot be found. The program will exit."); return; } idx = 4; } else if (mode == 2) { mzXMLPath = args[2]; logger.info("Mode2 mzXML folder: " + mzXMLPath); mzXMLfolder = new File(mzXMLPath); if (!mzXMLfolder.exists()) { logger.error("Mode2 mzXML folder " + mzXMLfolder.getAbsolutePath() + " does not exist. The program will exit."); return; } pepXMLPath = args[3]; logger.info("Mode2 pepXML folder: " + pepXMLPath); pepXMLfolder = new File(pepXMLPath); if (!pepXMLfolder.exists()) { logger.error("Mode2 pepXML folder " + pepXMLfolder.getAbsolutePath() + " does not exist. The program will exit."); return; } idx = 4; } String outputfolder = ""; float MinProb = 0f; for (int i = idx; i < args.length; i++) { if (args[i].startsWith("-")) { if (args[i].equals("-ID")) { param.TargetIDOnly = true; logger.info("Detect ID feature only: true"); } if (args[i].startsWith("-O")) { outputfolder = args[i].substring(2); logger.info("Output folder: " + outputfolder); File outputfile = new File(outputfolder); if (!outputfolder.endsWith("\\") | outputfolder.endsWith("/")) { outputfolder += "/"; } if (!outputfile.exists()) { outputfile.mkdir(); } } if (args[i].startsWith("-C")) { try { NoFile = Integer.parseInt(args[i].substring(2)); logger.info("No of concurrent files: " + NoFile); } catch (Exception ex) { logger.error(args[i] + " is not a correct integer format, will process only one file at a time."); } } if (args[i].startsWith("-p")) { try { MinProb = Float.parseFloat(args[i].substring(2)); logger.info("probability threshold: " + MinProb); } catch (Exception ex) { logger.error(args[i] + " is not a correct format, will use 0 as threshold instead."); } } } } reader.close(); TandemParam tandemparam = new TandemParam(DBSearchParam.SearchInstrumentType.TOF5600); PTMManager.GetInstance(); if (param.TargetIDOnly) { param.EstimateBG = false; param.ApexDelta = 1.5f; param.NoMissedScan = 10; param.MiniOverlapP = 0.2f; param.RemoveGroupedPeaks = false; param.CheckMonoIsotopicApex = false; param.DetectByCWT = false; param.FillGapByBK = false; param.IsoCorrThreshold = -1f; param.SmoothFactor = 3; } if (mode == 1) { logger.info("Processing " + mzXMLfile.getAbsolutePath() + "...."); long time = System.currentTimeMillis(); LCMSPeakMS1 LCMS1 = new LCMSPeakMS1(mzXMLfile.getAbsolutePath(), NoCPUs); LCMS1.SetParameter(param); LCMS1.Resume = false; if (!param.TargetIDOnly) { LCMS1.CreatePeakFolder(); } LCMS1.ExportPeakClusterTable = true; if (pepXMLfile.exists()) { tandemparam.InteractPepXMLPath = pepXMLfile.getAbsolutePath(); LCMS1.ParsePepXML(tandemparam, MinProb); logger.info("No. of PSMs included: " + LCMS1.IDsummary.PSMList.size()); logger.info("No. of Peptide ions included: " + LCMS1.IDsummary.GetPepIonList().size()); } if (param.TargetIDOnly) { LCMS1.SaveSerializationFile = false; } if (param.TargetIDOnly || !LCMS1.ReadPeakCluster()) { LCMS1.PeakClusterDetection(); } if (pepXMLfile.exists()) { LCMS1.AssignQuant(false); LCMS1.IDsummary.ExportPepID(outputfolder); } time = System.currentTimeMillis() - time; logger.info(LCMS1.ParentmzXMLName + " processed time:" + String.format("%d hour, %d min, %d sec", TimeUnit.MILLISECONDS.toHours(time), TimeUnit.MILLISECONDS.toMinutes(time) - TimeUnit.HOURS.toMinutes(TimeUnit.MILLISECONDS.toHours(time)), TimeUnit.MILLISECONDS.toSeconds(time) - TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(time)))); LCMS1.BaseClearAllPeaks(); LCMS1.SetSpectrumParser(null); LCMS1.IDsummary = null; LCMS1 = null; System.gc(); } else if (mode == 2) { LCMSID IDsummary = new LCMSID("", "", ""); logger.info("Parsing all pepXML files in " + pepXMLPath + "...."); for (File file : pepXMLfolder.listFiles()) { if (file.getName().toLowerCase().endsWith("pep.xml") || file.getName().toLowerCase().endsWith("pepxml")) { PepXMLParser pepXMLParser = new PepXMLParser(IDsummary, file.getAbsolutePath(), MinProb); } } HashMap<String, LCMSID> LCMSIDMap = IDsummary.GetLCMSIDFileMap(); ExecutorService executorPool = null; executorPool = Executors.newFixedThreadPool(NoFile); logger.info("Processing all mzXML files in " + mzXMLPath + "...."); for (File file : mzXMLfolder.listFiles()) { if (file.getName().toLowerCase().endsWith("mzxml")) { LCMSID id = LCMSIDMap.get(FilenameUtils.getBaseName(file.getName())); if (id == null || id.PSMList == null) { logger.warn("No IDs found in :" + FilenameUtils.getBaseName(file.getName()) + ". Quantification for this file is skipped"); continue; } if (!id.PSMList.isEmpty()) { MS1TargetQuantThread thread = new MS1TargetQuantThread(file, id, NoCPUs, outputfolder, param); executorPool.execute(thread); } } } LCMSIDMap.clear(); LCMSIDMap = null; IDsummary = null; executorPool.shutdown(); try { executorPool.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS); } catch (InterruptedException e) { logger.info("interrupted.."); } if (outputfolder == null | outputfolder.equals("")) { outputfolder = mzXMLPath; } logger.info("Merging PSM files.."); File output = new File(outputfolder); FileWriter writer = new FileWriter(output.getAbsolutePath() + "/PSM_merge.csv"); boolean header = false; for (File csvfile : output.listFiles()) { if (csvfile.getName().toLowerCase().endsWith("_psms.csv")) { BufferedReader outreader = new BufferedReader(new FileReader(csvfile)); String outline = outreader.readLine(); if (!header) { writer.write(outline + "\n"); header = true; } while ((outline = outreader.readLine()) != null) { writer.write(outline + "\n"); } outreader.close(); csvfile.delete(); } } writer.close(); } logger.info("MS1 quant module is complete."); } catch (Exception e) { Logger.getRootLogger().error(ExceptionUtils.getStackTrace(e)); throw e; } }
From source file:edu.msu.cme.rdp.classifier.train.validation.crossvalidate.CrossValidateMain.java
/** * This is the main method for cross validation test. * @param args/*from w w w . j ava 2 s . c om*/ * @throws IOException */ public static void main(String[] args) throws IOException { String tax_file = null; String source_file = null; String out_file = null; Integer partialLength = null; // default is full length float fraction = 0.1f; String rdmSelectedRank = null; int min_bootstrap_words = NBClassifier.MIN_BOOTSTRSP_WORDS; try { CommandLine line = new PosixParser().parse(options, args); if (line.hasOption(TRAIN_TAXONFILE_SHORT_OPT)) { tax_file = line.getOptionValue(TRAIN_TAXONFILE_SHORT_OPT); } else { throw new ParseException("Taxonomy file must be specified"); } if (line.hasOption(TRAIN_SEQFILE_SHORT_OPT)) { source_file = line.getOptionValue(TRAIN_SEQFILE_SHORT_OPT); } else { throw new ParseException("Source training fasta file must be specified"); } if (line.hasOption(OUTFILE_SHORT_OPT)) { out_file = line.getOptionValue(OUTFILE_SHORT_OPT); } else { throw new ParseException("Output file must be specified"); } if (line.hasOption(LENGTH_SHORT_OPT)) { ; partialLength = new Integer(line.getOptionValue(LENGTH_SHORT_OPT)); } if (line.hasOption("fraction")) { fraction = Float.parseFloat(line.getOptionValue("fraction")); } if (line.hasOption("rdmRank")) { rdmSelectedRank = line.getOptionValue("rdmRank"); } if (line.hasOption(CmdOptions.MIN_BOOTSTRAP_WORDS_SHORT_OPT)) { min_bootstrap_words = Integer .parseInt(line.getOptionValue(CmdOptions.MIN_BOOTSTRAP_WORDS_SHORT_OPT)); if (min_bootstrap_words < NBClassifier.MIN_BOOTSTRSP_WORDS) { throw new IllegalArgumentException( min_bootstrap_words + " must be at least " + NBClassifier.MIN_BOOTSTRSP_WORDS); } } } catch (ParseException ex) { new HelpFormatter().printHelp(120, "CrossValidateMain", "", options, "", true); return; } boolean useSeed = true; // use seed for random number generator CrossValidate theObj = new CrossValidate(); theObj.runTest(new File(tax_file), new File(source_file), new File(out_file), rdmSelectedRank, fraction, partialLength, useSeed, min_bootstrap_words); }
From source file:DIA_Umpire_Quant.DIA_Umpire_IntLibSearch.java
/** * @param args the command line arguments *//*from w ww .j a v a 2s .c o m*/ public static void main(String[] args) throws FileNotFoundException, IOException, Exception { System.out.println( "================================================================================================="); System.out.println("DIA-Umpire targeted re-extraction analysis using internal library (version: " + UmpireInfo.GetInstance().Version + ")"); if (args.length != 1) { System.out.println( "command format error, the correct format should be : java -jar -Xmx10G DIA_Umpire_IntLibSearch.jar diaumpire_module.params"); return; } try { ConsoleLogger.SetConsoleLogger(Level.INFO); ConsoleLogger.SetFileLogger(Level.DEBUG, FilenameUtils.getFullPath(args[0]) + "diaumpire_intlibsearch.log"); } catch (Exception e) { } Logger.getRootLogger().info("Version: " + UmpireInfo.GetInstance().Version); Logger.getRootLogger().info("Parameter file:" + args[0]); BufferedReader reader = new BufferedReader(new FileReader(args[0])); String line = ""; String WorkFolder = ""; int NoCPUs = 2; String InternalLibID = ""; float ProbThreshold = 0.99f; float RTWindow_Int = -1f; float Freq = 0f; int TopNFrag = 6; TandemParam tandemPara = new TandemParam(DBSearchParam.SearchInstrumentType.TOF5600); HashMap<String, File> AssignFiles = new HashMap<>(); //<editor-fold defaultstate="collapsed" desc="Reading parameter file"> while ((line = reader.readLine()) != null) { line = line.trim(); Logger.getRootLogger().info(line); if (!"".equals(line) && !line.startsWith("#")) { //System.out.println(line); if (line.equals("==File list begin")) { do { line = reader.readLine(); line = line.trim(); if (line.equals("==File list end")) { continue; } else if (!"".equals(line)) { File newfile = new File(line); if (newfile.exists()) { AssignFiles.put(newfile.getAbsolutePath(), newfile); } else { Logger.getRootLogger().info("File: " + newfile + " does not exist."); } } } while (!line.equals("==File list end")); } if (line.split("=").length < 2) { continue; } String type = line.split("=")[0].trim(); String value = line.split("=")[1].trim(); switch (type) { case "Path": { WorkFolder = value; break; } case "path": { WorkFolder = value; break; } case "Thread": { NoCPUs = Integer.parseInt(value); break; } case "InternalLibID": { InternalLibID = value; break; } case "RTWindow_Int": { RTWindow_Int = Float.parseFloat(value); break; } case "ProbThreshold": { ProbThreshold = Float.parseFloat(value); break; } case "TopNFrag": { TopNFrag = Integer.parseInt(value); break; } case "Freq": { Freq = Float.parseFloat(value); break; } case "Fasta": { tandemPara.FastaPath = value; break; } } } } //</editor-fold> //Initialize PTM manager using compomics library PTMManager.GetInstance(); //Check if the fasta file can be found if (!new File(tandemPara.FastaPath).exists()) { Logger.getRootLogger().info("Fasta file :" + tandemPara.FastaPath + " cannot be found, the process will be terminated, please check."); System.exit(1); } //Generate DIA file list ArrayList<DIAPack> FileList = new ArrayList<>(); try { File folder = new File(WorkFolder); if (!folder.exists()) { Logger.getRootLogger().info("The path : " + WorkFolder + " cannot be found."); System.exit(1); } for (final File fileEntry : folder.listFiles()) { if (fileEntry.isFile() && (fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzxml") | fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzml")) && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q1.mzxml") && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q2.mzxml") && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) { AssignFiles.put(fileEntry.getAbsolutePath(), fileEntry); } if (fileEntry.isDirectory()) { for (final File fileEntry2 : fileEntry.listFiles()) { if (fileEntry2.isFile() && (fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzxml") | fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzml")) && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q1.mzxml") && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q2.mzxml") && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) { AssignFiles.put(fileEntry2.getAbsolutePath(), fileEntry2); } } } } Logger.getRootLogger().info("No. of files assigned :" + AssignFiles.size()); for (File fileEntry : AssignFiles.values()) { Logger.getRootLogger().info(fileEntry.getAbsolutePath()); } for (File fileEntry : AssignFiles.values()) { String mzXMLFile = fileEntry.getAbsolutePath(); if (mzXMLFile.toLowerCase().endsWith(".mzxml") | mzXMLFile.toLowerCase().endsWith(".mzml")) { DIAPack DiaFile = new DIAPack(mzXMLFile, NoCPUs); Logger.getRootLogger().info( "================================================================================================="); Logger.getRootLogger().info("Processing " + mzXMLFile); if (!DiaFile.LoadDIASetting()) { Logger.getRootLogger().info("Loading DIA setting failed, job is incomplete"); System.exit(1); } if (!DiaFile.LoadParams()) { Logger.getRootLogger().info("Loading parameters failed, job is incomplete"); System.exit(1); } Logger.getRootLogger().info("Loading identification results " + mzXMLFile + "...."); //If the serialization file for ID file existed if (DiaFile.ReadSerializedLCMSID()) { DiaFile.IDsummary.ReduceMemoryUsage(); DiaFile.IDsummary.FastaPath = tandemPara.FastaPath; FileList.add(DiaFile); } } } //<editor-fold defaultstate="collapsed" desc="Targete re-extraction using internal library"> Logger.getRootLogger().info( "================================================================================================="); if (FileList.size() > 1) { Logger.getRootLogger().info("Targeted re-extraction using internal library"); FragmentLibManager libManager = FragmentLibManager.ReadFragmentLibSerialization(WorkFolder, InternalLibID); if (libManager == null) { Logger.getRootLogger().info("Building internal spectral library"); libManager = new FragmentLibManager(InternalLibID); ArrayList<LCMSID> LCMSIDList = new ArrayList<>(); for (DIAPack dia : FileList) { LCMSIDList.add(dia.IDsummary); } libManager.ImportFragLibTopFrag(LCMSIDList, Freq, TopNFrag); libManager.WriteFragmentLibSerialization(WorkFolder); } libManager.ReduceMemoryUsage(); Logger.getRootLogger() .info("Building retention time prediction model and generate candidate peptide list"); for (int i = 0; i < FileList.size(); i++) { FileList.get(i).IDsummary.ClearMappedPep(); } for (int i = 0; i < FileList.size(); i++) { for (int j = i + 1; j < FileList.size(); j++) { RTAlignedPepIonMapping alignment = new RTAlignedPepIonMapping(WorkFolder, FileList.get(i).GetParameter(), FileList.get(i).IDsummary, FileList.get(j).IDsummary); alignment.GenerateModel(); alignment.GenerateMappedPepIon(); } FileList.get(i).ExportID(); FileList.get(i).IDsummary = null; } Logger.getRootLogger().info("Targeted matching........"); for (DIAPack diafile : FileList) { if (diafile.IDsummary == null) { diafile.ReadSerializedLCMSID(); } if (!diafile.IDsummary.GetMappedPepIonList().isEmpty()) { diafile.UseMappedIon = true; diafile.FilterMappedIonByProb = false; diafile.BuildStructure(); diafile.MS1FeatureMap.ReadPeakCluster(); diafile.MS1FeatureMap.ClearMonoisotopicPeakOfCluster(); diafile.GenerateMassCalibrationRTMap(); diafile.TargetedExtractionQuant(false, libManager, ProbThreshold, RTWindow_Int); diafile.MS1FeatureMap.ClearAllPeaks(); diafile.IDsummary.ReduceMemoryUsage(); diafile.IDsummary.RemoveLowProbMappedIon(ProbThreshold); diafile.ExportID(); Logger.getRootLogger().info("Peptide ions: " + diafile.IDsummary.GetPepIonList().size() + " Mapped ions: " + diafile.IDsummary.GetMappedPepIonList().size()); diafile.ClearStructure(); } diafile.IDsummary = null; System.gc(); } Logger.getRootLogger().info( "================================================================================================="); } //</editor-fold> Logger.getRootLogger().info("Job done"); Logger.getRootLogger().info( "================================================================================================="); } catch (Exception e) { Logger.getRootLogger().error(ExceptionUtils.getStackTrace(e)); throw e; } }
From source file:apps.classification.LearnSVMPerf.java
public static void main(String[] args) throws IOException { String cmdLineSyntax = LearnSVMPerf.class.getName() + " [OPTIONS] <path to svm_perf> <trainingIndexDirectory>"; Options options = new Options(); OptionBuilder.withArgName("c"); OptionBuilder.withDescription("The c value for svm_perf (default 0.01)"); OptionBuilder.withLongOpt("c"); OptionBuilder.isRequired(false);// w w w .j a v a 2 s. co m OptionBuilder.hasArg(); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("t"); OptionBuilder.withDescription("Path for temporary files"); OptionBuilder.withLongOpt("t"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("l"); OptionBuilder.withDescription("The loss function to optimize (default 2):\n" + " 0 Zero/one loss: 1 if vector of predictions contains error, 0 otherwise.\n" + " 1 F1: 100 minus the F1-score in percent.\n" + " 2 Errorrate: Percentage of errors in prediction vector.\n" + " 3 Prec/Rec Breakeven: 100 minus PRBEP in percent.\n" + " 4 Prec@p: 100 minus precision at p in percent.\n" + " 5 Rec@p: 100 minus recall at p in percent.\n" + " 10 ROCArea: Percentage of swapped pos/neg pairs (i.e. 100 - ROCArea)."); OptionBuilder.withLongOpt("l"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("w"); OptionBuilder.withDescription("Choice of structural learning algorithm (default 9):\n" + " 0: n-slack algorithm described in [2]\n" + " 1: n-slack algorithm with shrinking heuristic\n" + " 2: 1-slack algorithm (primal) described in [5]\n" + " 3: 1-slack algorithm (dual) described in [5]\n" + " 4: 1-slack algorithm (dual) with constraint cache [5]\n" + " 9: custom algorithm in svm_struct_learn_custom.c"); OptionBuilder.withLongOpt("w"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("p"); OptionBuilder.withDescription("The value of p used by the prec@p and rec@p loss functions (default 0)"); OptionBuilder.withLongOpt("p"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("v"); OptionBuilder.withDescription("Verbose output"); OptionBuilder.withLongOpt("v"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(false); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("s"); OptionBuilder.withDescription("Don't delete temporary training file in svm_perf format (default: delete)"); OptionBuilder.withLongOpt("s"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(false); options.addOption(OptionBuilder.create()); SvmPerfLearnerCustomizer classificationLearnerCustomizer = null; GnuParser parser = new GnuParser(); String[] remainingArgs = null; try { CommandLine line = parser.parse(options, args); remainingArgs = line.getArgs(); classificationLearnerCustomizer = new SvmPerfLearnerCustomizer(remainingArgs[0]); if (line.hasOption("c")) classificationLearnerCustomizer.setC(Float.parseFloat(line.getOptionValue("c"))); if (line.hasOption("w")) classificationLearnerCustomizer.setW(Integer.parseInt(line.getOptionValue("w"))); if (line.hasOption("p")) classificationLearnerCustomizer.setP(Integer.parseInt(line.getOptionValue("p"))); if (line.hasOption("l")) classificationLearnerCustomizer.setL(Integer.parseInt(line.getOptionValue("l"))); if (line.hasOption("v")) classificationLearnerCustomizer.printSvmPerfOutput(true); if (line.hasOption("s")) classificationLearnerCustomizer.setDeleteTrainingFiles(false); if (line.hasOption("t")) classificationLearnerCustomizer.setTempPath(line.getOptionValue("t")); } catch (Exception exp) { System.err.println("Parsing failed. Reason: " + exp.getMessage()); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(cmdLineSyntax, options); System.exit(-1); } if (remainingArgs.length != 2) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(cmdLineSyntax, options); System.exit(-1); } String indexFile = remainingArgs[1]; File file = new File(indexFile); String indexName = file.getName(); String indexPath = file.getParent(); // LEARNING SvmPerfLearner classificationLearner = new SvmPerfLearner(); classificationLearner.setRuntimeCustomizer(classificationLearnerCustomizer); FileSystemStorageManager storageManager = new FileSystemStorageManager(indexPath, false); storageManager.open(); IIndex training = TroveReadWriteHelper.readIndex(storageManager, indexName, TroveContentDBType.Full, TroveClassificationDBType.Full); storageManager.close(); IClassifier classifier = classificationLearner.build(training); File executableFile = new File(classificationLearnerCustomizer.getSvmPerfLearnPath()); SvmPerfDataManager dataManager = new SvmPerfDataManager(new SvmPerfClassifierCustomizer( executableFile.getParentFile().getAbsolutePath() + Os.pathSeparator() + "svm_perf_classify")); String description = "_SVMPerf_C-" + classificationLearnerCustomizer.getC() + "_W-" + classificationLearnerCustomizer.getW() + "_L-" + classificationLearnerCustomizer.getL(); if (classificationLearnerCustomizer.getL() == 4 || classificationLearnerCustomizer.getL() == 5) description += "_P-" + classificationLearnerCustomizer.getP(); if (classificationLearnerCustomizer.getAdditionalParameters().length() > 0) description += "_" + classificationLearnerCustomizer.getAdditionalParameters(); storageManager = new FileSystemStorageManager(indexPath, false); storageManager.open(); dataManager.write(storageManager, indexName + description, classifier); storageManager.close(); }