List of usage examples for java.lang Float parseFloat
public static float parseFloat(String s) throws NumberFormatException
From source file:edu.cmu.lti.oaqa.knn4qa.apps.FilterTranTable.java
public static void main(String[] args) { Options options = new Options(); options.addOption(INPUT_PARAM, null, true, INPUT_DESC); options.addOption(OUTPUT_PARAM, null, true, OUTPUT_DESC); options.addOption(CommonParams.MEM_FWD_INDEX_PARAM, null, true, CommonParams.MEM_FWD_INDEX_DESC); options.addOption(CommonParams.GIZA_ITER_QTY_PARAM, null, true, CommonParams.GIZA_ITER_QTY_PARAM); options.addOption(CommonParams.GIZA_ROOT_DIR_PARAM, null, true, CommonParams.GIZA_ROOT_DIR_PARAM); options.addOption(CommonParams.MIN_PROB_PARAM, null, true, CommonParams.MIN_PROB_DESC); options.addOption(CommonParams.MAX_WORD_QTY_PARAM, null, true, CommonParams.MAX_WORD_QTY_PARAM); CommandLineParser parser = new org.apache.commons.cli.GnuParser(); try {/*ww w . j av a2 s . co m*/ CommandLine cmd = parser.parse(options, args); String outputFile = null; outputFile = cmd.getOptionValue(OUTPUT_PARAM); if (null == outputFile) { Usage("Specify 'A name of the output file'", options); } String gizaRootDir = cmd.getOptionValue(CommonParams.GIZA_ROOT_DIR_PARAM); if (null == gizaRootDir) { Usage("Specify '" + CommonParams.GIZA_ROOT_DIR_DESC + "'", options); } String gizaIterQty = cmd.getOptionValue(CommonParams.GIZA_ITER_QTY_PARAM); if (null == gizaIterQty) { Usage("Specify '" + CommonParams.GIZA_ITER_QTY_DESC + "'", options); } float minProb = 0; String tmpf = cmd.getOptionValue(CommonParams.MIN_PROB_PARAM); if (tmpf != null) { minProb = Float.parseFloat(tmpf); } int maxWordQty = Integer.MAX_VALUE; String tmpi = cmd.getOptionValue(CommonParams.MAX_WORD_QTY_PARAM); if (null != tmpi) { maxWordQty = Integer.parseInt(tmpi); } String memFwdIndxName = cmd.getOptionValue(CommonParams.MEM_FWD_INDEX_PARAM); if (null == memFwdIndxName) { Usage("Specify '" + CommonParams.MEM_FWD_INDEX_DESC + "'", options); } System.out.println("Filtering index: " + memFwdIndxName + " max # of frequent words: " + maxWordQty + " min. probability:" + minProb); VocabularyFilterAndRecoder filter = new FrequentIndexWordFilterAndRecoder(memFwdIndxName, maxWordQty); String srcVocFile = CompressUtils.findFileVariant(gizaRootDir + "/source.vcb"); System.out.println("Source vocabulary file: " + srcVocFile); GizaVocabularyReader srcVoc = new GizaVocabularyReader(srcVocFile, filter); String dstVocFile = CompressUtils.findFileVariant(gizaRootDir + "/target.vcb"); System.out.println("Target vocabulary file: " + dstVocFile); GizaVocabularyReader dstVoc = new GizaVocabularyReader(CompressUtils.findFileVariant(dstVocFile), filter); String inputFile = CompressUtils.findFileVariant(gizaRootDir + "/output.t1." + gizaIterQty); BufferedReader finp = new BufferedReader( new InputStreamReader(CompressUtils.createInputStream(inputFile))); BufferedWriter fout = new BufferedWriter( new OutputStreamWriter(CompressUtils.createOutputStream(outputFile))); try { String line; int prevSrcId = -1; int wordQty = 0; long addedQty = 0; long totalQty = 0; boolean isNotFiltered = false; for (totalQty = 0; (line = finp.readLine()) != null;) { ++totalQty; // Skip empty lines line = line.trim(); if (line.isEmpty()) continue; GizaTranRec rec = new GizaTranRec(line); if (rec.mSrcId != prevSrcId) { ++wordQty; } if (totalQty % REPORT_INTERVAL_QTY == 0) { System.out.println(String.format( "Processed %d lines (%d source word entries) from '%s', added %d lines", totalQty, wordQty, inputFile, addedQty)); } // isNotFiltered should be set after procOneWord if (rec.mSrcId != prevSrcId) { if (rec.mSrcId == 0) isNotFiltered = true; else { String wordSrc = srcVoc.getWord(rec.mSrcId); isNotFiltered = filter == null || (wordSrc != null && filter.checkWord(wordSrc)); } } prevSrcId = rec.mSrcId; if (rec.mProb >= minProb && isNotFiltered) { String wordDst = dstVoc.getWord(rec.mDstId); if (filter == null || (wordDst != null && filter.checkWord(wordDst))) { fout.write(String.format(rec.mSrcId + " " + rec.mDstId + " " + rec.mProb)); fout.newLine(); addedQty++; } } } System.out.println( String.format("Processed %d lines (%d source word entries) from '%s', added %d lines", totalQty, wordQty, inputFile, addedQty)); } finally { finp.close(); fout.close(); } } catch (ParseException e) { Usage("Cannot parse arguments", options); } catch (Exception e) { e.printStackTrace(); System.err.println("Terminating due to an exception: " + e); System.exit(1); } }
From source file:SequentialPersonalizedPageRank.java
@SuppressWarnings({ "static-access" }) public static void main(String[] args) throws IOException { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(/*from w w w . j a v a 2 s . c o m*/ OptionBuilder.withArgName("val").hasArg().withDescription("random jump factor").create(JUMP)); options.addOption(OptionBuilder.withArgName("node").hasArg() .withDescription("source node (i.e., destination of the random jump)").create(SOURCE)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(SOURCE)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(SequentialPersonalizedPageRank.class.getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.exit(-1); } String infile = cmdline.getOptionValue(INPUT); final String source = cmdline.getOptionValue(SOURCE); float alpha = cmdline.hasOption(JUMP) ? Float.parseFloat(cmdline.getOptionValue(JUMP)) : 0.15f; int edgeCnt = 0; DirectedSparseGraph<String, Integer> graph = new DirectedSparseGraph<String, Integer>(); BufferedReader data = new BufferedReader(new InputStreamReader(new FileInputStream(infile))); String line; while ((line = data.readLine()) != null) { line.trim(); String[] arr = line.split("\\t"); for (int i = 1; i < arr.length; i++) { graph.addEdge(new Integer(edgeCnt++), arr[0], arr[i]); } } data.close(); if (!graph.containsVertex(source)) { System.err.println("Error: source node not found in the graph!"); System.exit(-1); } WeakComponentClusterer<String, Integer> clusterer = new WeakComponentClusterer<String, Integer>(); Set<Set<String>> components = clusterer.transform(graph); int numComponents = components.size(); System.out.println("Number of components: " + numComponents); System.out.println("Number of edges: " + graph.getEdgeCount()); System.out.println("Number of nodes: " + graph.getVertexCount()); System.out.println("Random jump factor: " + alpha); // Compute personalized PageRank. PageRankWithPriors<String, Integer> ranker = new PageRankWithPriors<String, Integer>(graph, new Transformer<String, Double>() { @Override public Double transform(String vertex) { return vertex.equals(source) ? 1.0 : 0; } }, alpha); ranker.evaluate(); // Use priority queue to sort vertices by PageRank values. PriorityQueue<Ranking<String>> q = new PriorityQueue<Ranking<String>>(); int i = 0; for (String pmid : graph.getVertices()) { q.add(new Ranking<String>(i++, ranker.getVertexScore(pmid), pmid)); } // Print PageRank values. System.out.println("\nPageRank of nodes, in descending order:"); Ranking<String> r = null; while ((r = q.poll()) != null) { System.out.println(r.rankScore + "\t" + r.getRanked()); } }
From source file:edu.umd.shrawanraina.SequentialPersonalizedPageRank.java
@SuppressWarnings({ "static-access" }) public static void main(String[] args) throws IOException { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(// www .ja v a 2 s . c o m OptionBuilder.withArgName("val").hasArg().withDescription("random jump factor").create(JUMP)); options.addOption(OptionBuilder.withArgName("node").hasArg() .withDescription("source node (i.e., destination of the random jump)").create(SOURCE)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(SOURCE)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(SequentialPersonalizedPageRank.class.getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.exit(-1); } String infile = cmdline.getOptionValue(INPUT); final String source = cmdline.getOptionValue(SOURCE); float alpha = cmdline.hasOption(JUMP) ? Float.parseFloat(cmdline.getOptionValue(JUMP)) : 0.15f; int edgeCnt = 0; DirectedSparseGraph<String, Integer> graph = new DirectedSparseGraph<String, Integer>(); BufferedReader data = new BufferedReader(new InputStreamReader(new FileInputStream(infile))); String line; while ((line = data.readLine()) != null) { line.trim(); String[] arr = line.split("\\t"); for (int i = 1; i < arr.length; i++) { graph.addEdge(new Integer(edgeCnt++), arr[0], arr[i]); } } data.close(); if (!graph.containsVertex(source)) { System.err.println("Error: source node not found in the graph!"); System.exit(-1); } WeakComponentClusterer<String, Integer> clusterer = new WeakComponentClusterer<String, Integer>(); Set<Set<String>> components = clusterer.transform(graph); int numComponents = components.size(); System.out.println("Number of components: " + numComponents); System.out.println("Number of edges: " + graph.getEdgeCount()); System.out.println("Number of nodes: " + graph.getVertexCount()); System.out.println("Random jump factor: " + alpha); // Compute personalized PageRank. PageRankWithPriors<String, Integer> ranker = new PageRankWithPriors<String, Integer>(graph, new Transformer<String, Double>() { public Double transform(String vertex) { return vertex.equals(source) ? 1.0 : 0; } }, alpha); ranker.evaluate(); // Use priority queue to sort vertices by PageRank values. PriorityQueue<Ranking<String>> q = new PriorityQueue<Ranking<String>>(); int i = 0; for (String pmid : graph.getVertices()) { q.add(new Ranking<String>(i++, ranker.getVertexScore(pmid), pmid)); } // Print PageRank values. System.out.println("\nPageRank of nodes, in descending order:"); Ranking<String> r = null; while ((r = q.poll()) != null) { System.out.println(r.rankScore + "\t" + r.getRanked()); } }
From source file:edu.cmu.lti.oaqa.knn4qa.apps.QueryGenNMSLIB.java
public static void main(String[] args) { Options options = new Options(); options.addOption(CommonParams.QUERY_FILE_PARAM, null, true, CommonParams.QUERY_FILE_DESC); options.addOption(CommonParams.MEMINDEX_PARAM, null, true, CommonParams.MEMINDEX_DESC); options.addOption(CommonParams.KNN_QUERIES_PARAM, null, true, CommonParams.KNN_QUERIES_DESC); options.addOption(CommonParams.NMSLIB_FIELDS_PARAM, null, true, CommonParams.NMSLIB_FIELDS_DESC); options.addOption(CommonParams.MAX_NUM_QUERY_PARAM, null, true, CommonParams.MAX_NUM_QUERY_DESC); options.addOption(CommonParams.SEL_PROB_PARAM, null, true, CommonParams.SEL_PROB_DESC); CommandLineParser parser = new org.apache.commons.cli.GnuParser(); BufferedWriter knnQueries = null; int maxNumQuery = Integer.MAX_VALUE; Float selProb = null;// ww w . j av a 2s.com try { CommandLine cmd = parser.parse(options, args); String queryFile = null; if (cmd.hasOption(CommonParams.QUERY_FILE_PARAM)) { queryFile = cmd.getOptionValue(CommonParams.QUERY_FILE_PARAM); } else { Usage("Specify 'query file'", options); } String knnQueriesFile = cmd.getOptionValue(CommonParams.KNN_QUERIES_PARAM); if (null == knnQueriesFile) Usage("Specify '" + CommonParams.KNN_QUERIES_DESC + "'", options); String tmpn = cmd.getOptionValue(CommonParams.MAX_NUM_QUERY_PARAM); if (tmpn != null) { try { maxNumQuery = Integer.parseInt(tmpn); } catch (NumberFormatException e) { Usage("Maximum number of queries isn't integer: '" + tmpn + "'", options); } } String tmps = cmd.getOptionValue(CommonParams.NMSLIB_FIELDS_PARAM); if (null == tmps) Usage("Specify '" + CommonParams.NMSLIB_FIELDS_DESC + "'", options); String nmslibFieldList[] = tmps.split(","); knnQueries = new BufferedWriter(new FileWriter(knnQueriesFile)); knnQueries.write("isQueryFile=1"); knnQueries.newLine(); knnQueries.newLine(); String memIndexPref = cmd.getOptionValue(CommonParams.MEMINDEX_PARAM); if (null == memIndexPref) { Usage("Specify '" + CommonParams.MEMINDEX_DESC + "'", options); } String tmpf = cmd.getOptionValue(CommonParams.SEL_PROB_PARAM); if (tmpf != null) { try { selProb = Float.parseFloat(tmpf); } catch (NumberFormatException e) { Usage("A selection probability isn't a number in the range (0,1)'" + tmpf + "'", options); } if (selProb < Float.MIN_NORMAL || selProb + Float.MIN_NORMAL >= 1) Usage("A selection probability isn't a number in the range (0,1)'" + tmpf + "'", options); } BufferedReader inpText = new BufferedReader( new InputStreamReader(CompressUtils.createInputStream(queryFile))); String docText = XmlHelper.readNextXMLIndexEntry(inpText); NmslibQueryGenerator queryGen = new NmslibQueryGenerator(nmslibFieldList, memIndexPref); Random rnd = new Random(); for (int docNum = 1; docNum <= maxNumQuery && docText != null; ++docNum, docText = XmlHelper.readNextXMLIndexEntry(inpText)) { if (selProb != null) { if (rnd.nextFloat() > selProb) continue; } Map<String, String> docFields = null; try { docFields = XmlHelper.parseXMLIndexEntry(docText); String queryObjStr = queryGen.getStrObjForKNNService(docFields); knnQueries.append(queryObjStr); knnQueries.newLine(); } catch (SAXException e) { System.err.println("Parsing error, offending DOC:" + NL + docText + " doc # " + docNum); throw new Exception("Parsing error."); } } knnQueries.close(); } catch (ParseException e) { Usage("Cannot parse arguments", options); if (null != knnQueries) try { knnQueries.close(); } catch (IOException e1) { e1.printStackTrace(); } } catch (Exception e) { System.err.println("Terminating due to an exception: " + e); try { if (knnQueries != null) knnQueries.close(); } catch (IOException e1) { e1.printStackTrace(); } System.exit(1); } System.out.println("Terminated successfully!"); }
From source file:edu.cmu.lti.oaqa.knn4qa.apps.BuildRetrofitLexicons.java
public static void main(String[] args) { Options options = new Options(); options.addOption(CommonParams.GIZA_ROOT_DIR_PARAM, null, true, CommonParams.GIZA_ROOT_DIR_DESC); options.addOption(CommonParams.GIZA_ITER_QTY_PARAM, null, true, CommonParams.GIZA_ITER_QTY_DESC); options.addOption(CommonParams.MEMINDEX_PARAM, null, true, CommonParams.MEMINDEX_DESC); options.addOption(OUT_FILE_PARAM, null, true, OUT_FILE_DESC); options.addOption(MIN_PROB_PARAM, null, true, MIN_PROB_DESC); options.addOption(FORMAT_PARAM, null, true, FORMAT_DESC); CommandLineParser parser = new org.apache.commons.cli.GnuParser(); try {//from w w w. ja v a2 s . com CommandLine cmd = parser.parse(options, args); String gizaRootDir = cmd.getOptionValue(CommonParams.GIZA_ROOT_DIR_PARAM); int gizaIterQty = -1; if (cmd.hasOption(CommonParams.GIZA_ITER_QTY_PARAM)) { gizaIterQty = Integer.parseInt(cmd.getOptionValue(CommonParams.GIZA_ITER_QTY_PARAM)); } else { Usage("Specify: " + CommonParams.GIZA_ITER_QTY_PARAM, options); } String outFileName = cmd.getOptionValue(OUT_FILE_PARAM); if (null == outFileName) { Usage("Specify: " + OUT_FILE_PARAM, options); } String indexDir = cmd.getOptionValue(CommonParams.MEMINDEX_PARAM); if (null == indexDir) { Usage("Specify: " + CommonParams.MEMINDEX_DESC, options); } FormatType outType = FormatType.kOrig; String outTypeStr = cmd.getOptionValue(FORMAT_PARAM); if (null != outTypeStr) { if (outTypeStr.equals(ORIG_TYPE)) { outType = FormatType.kOrig; } else if (outTypeStr.equals(WEIGHTED_TYPE)) { outType = FormatType.kWeighted; } else if (outTypeStr.equals(UNWEIGHTED_TYPE)) { outType = FormatType.kUnweighted; } else { Usage("Unknown format type: " + outTypeStr, options); } } float minProb = 0; if (cmd.hasOption(MIN_PROB_PARAM)) { minProb = Float.parseFloat(cmd.getOptionValue(MIN_PROB_PARAM)); } else { Usage("Specify: " + MIN_PROB_PARAM, options); } System.out.println(String.format( "Saving lexicon to '%s' (output format '%s'), keep only entries with translation probability >= %f", outFileName, outType.toString(), minProb)); // We use unlemmatized text here, because lemmatized dictionary is going to be mostly subset of the unlemmatized one. InMemForwardIndex textIndex = new InMemForwardIndex(FeatureExtractor.indexFileName(indexDir, FeatureExtractor.mFieldNames[FeatureExtractor.TEXT_UNLEMM_FIELD_ID])); InMemForwardIndexFilterAndRecoder filterAndRecoder = new InMemForwardIndexFilterAndRecoder(textIndex); String prefix = gizaRootDir + "/" + FeatureExtractor.mFieldNames[FeatureExtractor.TEXT_UNLEMM_FIELD_ID] + "/"; GizaVocabularyReader answVoc = new GizaVocabularyReader(prefix + "source.vcb", filterAndRecoder); GizaVocabularyReader questVoc = new GizaVocabularyReader(prefix + "target.vcb", filterAndRecoder); GizaTranTableReaderAndRecoder gizaTable = new GizaTranTableReaderAndRecoder(false, // we don't need to flip the table for the purpose prefix + "/output.t1." + gizaIterQty, filterAndRecoder, answVoc, questVoc, (float) FeatureExtractor.DEFAULT_PROB_SELF_TRAN, minProb); BufferedWriter outFile = new BufferedWriter(new FileWriter(outFileName)); for (int srcWordId = 0; srcWordId <= textIndex.getMaxWordId(); ++srcWordId) { GizaOneWordTranRecs tranRecs = gizaTable.getTranProbs(srcWordId); if (null != tranRecs) { String wordSrc = textIndex.getWord(srcWordId); StringBuffer sb = new StringBuffer(); sb.append(wordSrc); for (int k = 0; k < tranRecs.mDstIds.length; ++k) { float prob = tranRecs.mProbs[k]; if (prob >= minProb) { int dstWordId = tranRecs.mDstIds[k]; if (dstWordId == srcWordId && outType != FormatType.kWeighted) continue; // Don't duplicate the word, unless it's probability weighted sb.append(' '); String dstWord = textIndex.getWord(dstWordId); if (null == dstWord) { throw new Exception( "Bug or inconsistent data: Couldn't retriev a word for wordId = " + dstWordId); } if (dstWord.indexOf(':') >= 0) throw new Exception( "Illegal dictionary word '" + dstWord + "' b/c it contains ':'"); sb.append(dstWord); if (outType != FormatType.kOrig) { sb.append(':'); sb.append(outType == FormatType.kWeighted ? prob : 1); } } } outFile.write(sb.toString()); outFile.newLine(); } } outFile.close(); } catch (ParseException e) { e.printStackTrace(); Usage("Cannot parse arguments", options); } catch (Exception e) { e.printStackTrace(); System.err.println("Terminating due to an exception: " + e); System.exit(1); } System.out.println("Terminated successfully!"); }
From source file:FDREstimator.ExportWithEstimatedFDR.java
/** * @param args the command line arguments *///from w w w. j a v a 2s. com public static void main(String[] args) throws ParserConfigurationException, SAXException, IOException, ClassNotFoundException, XmlPullParserException, InterruptedException, SQLException { System.out.println( "================================================================================================="); System.out.println("Umpire search result parser(version: " + UmpireInfo.GetInstance().Version + ")"); if (args.length == 0) { System.out.println( "command : java -jar -Xmx1G Umpire-SearchResultParser.jar [Options] [Combined ProtXML file] [PepXML files...]"); System.out.println(""); System.out.println("ProtXML extension: *.prot.xml or *.ProtXML"); System.out.println("PepXML extension: *.pep.xml or *.PepXML"); System.out.println("\nOptions"); System.out.println("\t-MP\tMin protein parsing probability\tex: -MP0.1f (default: -1, no filtering)"); System.out.println("\t-Mp\tMin PSM parsing probability\tex: -Mp0.1f (default: -1, no filtering)"); System.out.println("\t-fP\tProtein FDR\tex: -fP0.01 (default: 0.01, no filtering: -1)"); System.out.println("\t-fp\tPeptide FDR\tex: -fp0.05 (default: 0.01, no filtering: -1)"); System.out.println("\t-d\tDecoy tag prefix\tex: -dDECOY (default: rev_)"); System.out.println( "\t-C\t(0 or 1) Correct mass diff derived from isotope error\tex:-C0 (default:0, no correction)"); System.out.println("\t-fa\tFasta file"); System.out.println("\t-N\tOutput filename"); System.out.println( "\t-pt\tInitial protein probability filtering threshold\tex: -pt0.5 (default: 0.5, no filtering : -1)"); System.out.println( "\t-rf\tR factor threshold, proteins with protein probablity less than the threshold will be used to estimate the R factor \n\t\tex: -rf0.2 (default: 0.2, do not use R factor: -1)"); return; } ConsoleLogger.SetConsoleLogger(Level.INFO); ConsoleLogger.SetFileLogger(Level.DEBUG, FilenameUtils.getFullPath(args[0]) + "parser_debug.log"); float protFDR = 0.01f; float pepFDR = 0.01f; float MinpepProb = -1f; float MinprotProb = -1f; boolean CorrectMassDiff = false; String DecoyTag = "rev_"; String Fasta = ""; String Outputname = ""; float protprob = 0.5f; float rfthreshold = 0.2f; String ProtXML = ""; ArrayList<String> PepXML = new ArrayList<>(); for (int i = 0; i < args.length; i++) { if (args[i].startsWith("-")) { if (args[i].startsWith("-fP")) { protFDR = Float.parseFloat(args[i].substring(3)); Logger.getRootLogger().info("Protein FDR: " + protFDR); } if (args[i].startsWith("-fp")) { pepFDR = Float.parseFloat(args[i].substring(3)); Logger.getRootLogger().info("Peptide FDR: " + pepFDR); } if (args[i].startsWith("-MP")) { MinprotProb = Float.parseFloat(args[i].substring(3)); Logger.getRootLogger().info("Min protein parsing probability: " + MinprotProb); } if (args[i].startsWith("-Mp")) { MinpepProb = Float.parseFloat(args[i].substring(3)); Logger.getRootLogger().info("Min PSM parsing probability: " + MinpepProb); } if (args[i].startsWith("-d")) { DecoyTag = args[i].substring(2); Logger.getRootLogger().info("Decoy tag: " + DecoyTag); } if (args[i].startsWith("-fa")) { Fasta = args[i].substring(3); Logger.getRootLogger().info("Fasta file: " + Fasta); } if (args[i].startsWith("-N")) { Outputname = args[i].substring(2); Logger.getRootLogger().info("Output filename: " + Outputname); } if (args[i].startsWith("-C")) { if (args[i].substring(2).equals("1")) { CorrectMassDiff = true; } Logger.getRootLogger().info("Correct mass diff: " + CorrectMassDiff); } if (args[i].startsWith("-pt")) { protprob = Float.parseFloat(args[i].substring(3)); Logger.getRootLogger().info("Initial protein probablity filtering threshold: " + protprob); } if (args[i].startsWith("-rf")) { rfthreshold = Float.parseFloat(args[i].substring(3)); Logger.getRootLogger().info("R factor threshold: " + rfthreshold); } } if (args[i].endsWith(".pep.xml") || args[i].endsWith(".PepXML")) { PepXML.add(args[i]); } if (args[i].endsWith(".prot.xml") || args[i].endsWith(".ProtXML")) { ProtXML = args[i]; } } if (!Outputname.equals("")) { Outputname = Outputname + "_"; } Outputname = Outputname + MSUmpire.Utility.DateTimeTag.GetTag(); LCMSID lcmsid = new LCMSID(Outputname, DecoyTag, Fasta); for (String pepxml : PepXML) { LCMSID pepxmlid = new LCMSID(pepxml, DecoyTag, Fasta); PepXMLParser pepxmlparser = new PepXMLParser(pepxmlid, pepxml, MinpepProb, CorrectMassDiff); if (pepFDR != -1f) { pepxmlid.FilterByPepDecoyFDR(DecoyTag, pepFDR); } Logger.getRootLogger().info("peptide No.:" + pepxmlid.GetPepIonList().size() + "; Peptide level threshold: " + pepxmlid.PepProbThreshold); for (PepIonID pepID : pepxmlid.GetPepIonList().values()) { lcmsid.AddPeptideID(pepID); } } if (!"".equals(ProtXML)) { ProtXMLParser protxmlparser = new ProtXMLParser(lcmsid, ProtXML, MinprotProb); lcmsid.DecoyTag = DecoyTag; if (protprob != -1f) { lcmsid.RemoveLowLocalPWProtein(protprob); } float rf = 1f; if (rfthreshold != -1f) { rf = lcmsid.GetRFactor(rfthreshold); } if (protFDR != -1f) { lcmsid.FilterByProteinDecoyFDRUsingMaxIniProb(lcmsid.DecoyTag, protFDR / rf); } if (!"".equals(Fasta)) { lcmsid.LoadSequence(); } lcmsid.ReMapProPep(); lcmsid.ExportProtID(); } lcmsid.CreateInstanceForAllPepIon(); lcmsid.ExportPepID(); Logger.getRootLogger().info("Protein No.:" + lcmsid.ProteinList.size() + "; All peptide ions.:" + lcmsid.GetPepIonList().size()); }
From source file:edu.msu.cme.rdp.seqmatch.cli.SeqMatchMain.java
public static void main(String[] args) throws Exception { if (args.length == 0) { System.err.println("USAGE: SeqMatchMain [train|seqmatch] <args>"); return;//from w ww . j av a2 s . co m } String cmd = args[0]; args = Arrays.copyOfRange(args, 1, args.length); if (cmd.equals("train")) { if (args.length != 2) { System.err.println("USAGE: train <reference sequences> <trainee_out_file_prefix>" + "\nMultiple trainee output files might be created, each containing maximum " + Trainee.MAX_NUM_SEQ + " sequences"); return; } File refSeqs = new File(args[0]); File traineeFileOut = new File(args[1]); //maybe more than 1 trainee files need to be created, depending on the number of seqs CreateMultiMatchFromFile.getMultiTrainee(refSeqs, traineeFileOut); } else if (cmd.equals("seqmatch")) { File refFile = null; File queryFile = null; HashMap<String, String> descMap = new HashMap<String, String>(); PrintStream out = new PrintStream(System.out); int knn = 20; float minSab = .5f; try { CommandLine line = new PosixParser().parse(options, args); if (line.hasOption("knn")) { knn = Integer.parseInt(line.getOptionValue("knn")); } if (line.hasOption("sab")) { minSab = Float.parseFloat(line.getOptionValue("sab")); } if (line.hasOption("desc")) { descMap = readDesc(new File(line.getOptionValue("desc"))); } if (line.hasOption("outFile")) { out = new PrintStream(new File(line.getOptionValue("outFile"))); } args = line.getArgs(); if (args.length != 2) { throw new Exception("Unexpected number of command line arguments"); } refFile = new File(args[0]); queryFile = new File(args[1]); } catch (Exception e) { new HelpFormatter().printHelp("seqmatch <refseqs | trainee_file_or_dir> <query_file>\n" + " trainee_file_or_dir is a single trainee file or a directory containing multiple trainee files", options); System.err.println("Error: " + e.getMessage()); return; } SeqMatch seqmatch = null; if (refFile.isDirectory()) { // a directory of trainee files List<SeqMatch> engineList = new ArrayList<SeqMatch>(); for (File f : refFile.listFiles()) { if (!f.isHidden()) { TwowaySeqMatch match = new TwowaySeqMatch(new SeqMatchEngine(new StorageTrainee(f))); engineList.add(match); } } seqmatch = new MultiTraineeSeqMatch(engineList); } else { // a single fasta file or trainee file if (SeqUtils.guessFileFormat(refFile) == SequenceFormat.UNKNOWN) { seqmatch = CLISeqMatchFactory.trainTwowaySeqMatch(new StorageTrainee(refFile)); } else { seqmatch = CreateMultiMatchFromFile.getMultiMatch(refFile); } } out.println("query name\tmatch seq\torientation\tS_ab score\tunique oligomers\tdescription"); SeqReader reader = new SequenceReader(queryFile); Sequence seq; while ((seq = reader.readNextSequence()) != null) { SeqMatchResultSet resultSet = seqmatch.match(seq, knn); for (SeqMatchResult result : resultSet) { char r = '+'; if (result.isReverse()) { r = '-'; } if (result.getScore() > minSab) { out.println(seq.getSeqName() + "\t" + result.getSeqName() + "\t" + r + "\t" + result.getScore() + "\t" + resultSet.getQueryWordCount() + "\t" + descMap.get(result.getSeqName())); } } } out.close(); } else { throw new IllegalArgumentException("USAGE: SeqMatchMain [train|seqmatch] <args>"); } }
From source file:edu.umd.ujjwalgoel.AnalyzePMI.java
@SuppressWarnings({ "static-access" }) public static void main(String[] args) { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); CommandLine cmdline = null;//from w w w . j a va 2s.c om CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(AnalyzePMI.class.getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.exit(-1); } String inputPath = cmdline.getOptionValue(INPUT); System.out.println("input path: " + inputPath); BufferedReader br = null; int countPairs = 0; List<PairOfWritables<PairOfStrings, FloatWritable>> pmis = new ArrayList<PairOfWritables<PairOfStrings, FloatWritable>>(); List<PairOfWritables<PairOfStrings, FloatWritable>> cloudPmis = new ArrayList<PairOfWritables<PairOfStrings, FloatWritable>>(); List<PairOfWritables<PairOfStrings, FloatWritable>> lovePmis = new ArrayList<PairOfWritables<PairOfStrings, FloatWritable>>(); PairOfWritables<PairOfStrings, FloatWritable> highestPMI = null; PairOfWritables<PairOfStrings, FloatWritable> highestCloudPMI = null; PairOfWritables<PairOfStrings, FloatWritable> highestCloudPMI2 = null; PairOfWritables<PairOfStrings, FloatWritable> highestCloudPMI3 = null; PairOfWritables<PairOfStrings, FloatWritable> highestLovePMI = null; PairOfWritables<PairOfStrings, FloatWritable> highestLovePMI2 = null; PairOfWritables<PairOfStrings, FloatWritable> highestLovePMI3 = null; try { FileSystem fs = FileSystem.get(new Configuration()); FileStatus[] status = fs.listStatus(new Path(inputPath)); //PairOfStrings pair = new PairOfStrings(); for (int i = 0; i < status.length; i++) { br = new BufferedReader(new InputStreamReader(fs.open(status[i].getPath()))); String line = br.readLine(); while (line != null) { String[] words = line.split("\\t"); float value = Float.parseFloat(words[1].trim()); String[] wordPair = words[0].replaceAll("\\(", "").replaceAll("\\)", "").split(","); PairOfStrings pair = new PairOfStrings(); pair.set(wordPair[0].trim(), wordPair[1].trim()); if (wordPair[0].trim().equals("cloud")) { PairOfWritables<PairOfStrings, FloatWritable> cloudPmi = new PairOfWritables<PairOfStrings, FloatWritable>(); cloudPmi.set(pair, new FloatWritable(value)); cloudPmis.add(cloudPmi); if ((highestCloudPMI == null) || (highestCloudPMI.getRightElement().compareTo(cloudPmi.getRightElement()) < 0)) { highestCloudPMI = cloudPmi; } else if ((highestCloudPMI2 == null) || (highestCloudPMI2.getRightElement().compareTo(cloudPmi.getRightElement()) < 0)) { highestCloudPMI2 = cloudPmi; } else if ((highestCloudPMI3 == null) || (highestCloudPMI3.getRightElement().compareTo(cloudPmi.getRightElement()) < 0)) { highestCloudPMI3 = cloudPmi; } } if (wordPair[0].trim().equals("love")) { PairOfWritables<PairOfStrings, FloatWritable> lovePmi = new PairOfWritables<PairOfStrings, FloatWritable>(); lovePmi.set(pair, new FloatWritable(value)); lovePmis.add(lovePmi); if ((highestLovePMI == null) || (highestLovePMI.getRightElement().compareTo(lovePmi.getRightElement()) < 0)) { highestLovePMI = lovePmi; } else if ((highestLovePMI2 == null) || (highestLovePMI2.getRightElement().compareTo(lovePmi.getRightElement()) < 0)) { highestLovePMI2 = lovePmi; } else if ((highestLovePMI3 == null) || (highestLovePMI3.getRightElement().compareTo(lovePmi.getRightElement()) < 0)) { highestLovePMI3 = lovePmi; } } PairOfWritables<PairOfStrings, FloatWritable> pmi = new PairOfWritables<PairOfStrings, FloatWritable>(); pmi.set(pair, new FloatWritable(value)); pmis.add(pmi); if (highestPMI == null) { highestPMI = pmi; } else if (highestPMI.getRightElement().compareTo(pmi.getRightElement()) < 0) { highestPMI = pmi; } countPairs++; line = br.readLine(); } } } catch (Exception ex) { System.out.println("ERROR" + ex.getMessage()); } /*Collections.sort(pmis, new Comparator<PairOfWritables<PairOfStrings, FloatWritable>>() { public int compare(PairOfWritables<PairOfStrings, FloatWritable> e1, PairOfWritables<PairOfStrings, FloatWritable> e2) { /*if (e2.getRightElement().compareTo(e1.getRightElement()) == 0) { return e1.getLeftElement().getLeftElement().compareTo(e2.getLeftElement().getLeftElement()); } return e2.getRightElement().compareTo(e1.getRightElement()); } }); Collections.sort(cloudPmis, new Comparator<PairOfWritables<PairOfStrings, FloatWritable>>() { public int compare(PairOfWritables<PairOfStrings, FloatWritable> e1, PairOfWritables<PairOfStrings, FloatWritable> e2) { if (e2.getRightElement().compareTo(e1.getRightElement()) == 0) { return e1.getLeftElement().getLeftElement().compareTo(e2.getLeftElement().getLeftElement()); } return e2.getRightElement().compareTo(e1.getRightElement()); } }); Collections.sort(lovePmis, new Comparator<PairOfWritables<PairOfStrings, FloatWritable>>() { public int compare(PairOfWritables<PairOfStrings, FloatWritable> e1, PairOfWritables<PairOfStrings, FloatWritable> e2) { if (e2.getRightElement().compareTo(e1.getRightElement()) == 0) { return e1.getLeftElement().getLeftElement().compareTo(e2.getLeftElement().getLeftElement()); } return e2.getRightElement().compareTo(e1.getRightElement()); } }); PairOfWritables<PairOfStrings, FloatWritable> highestPMI = pmis.get(0); PairOfWritables<PairOfStrings, FloatWritable> highestCloudPMI = cloudPmis.get(0); PairOfWritables<PairOfStrings, FloatWritable> highestCloudPMI2 = cloudPmis.get(1); PairOfWritables<PairOfStrings, FloatWritable> highestCloudPMI3 = cloudPmis.get(2); PairOfWritables<PairOfStrings, FloatWritable> highestLovePMI = lovePmis.get(0); PairOfWritables<PairOfStrings, FloatWritable> highestLovePMI2 = lovePmis.get(1); PairOfWritables<PairOfStrings, FloatWritable> highestLovePMI3 = lovePmis.get(2);*/ System.out.println("Total Distinct Pairs : " + countPairs); System.out.println("Pair with highest PMI : (" + highestPMI.getLeftElement().getLeftElement() + ", " + highestPMI.getLeftElement().getRightElement()); System.out .println("Word with highest PMI with Cloud : " + highestCloudPMI.getLeftElement().getRightElement() + " with value : " + highestCloudPMI.getRightElement().get()); System.out.println( "Word with second highest PMI with Cloud : " + highestCloudPMI2.getLeftElement().getRightElement() + " with value : " + highestCloudPMI2.getRightElement().get()); System.out.println( "Word with third highest PMI with Cloud : " + highestCloudPMI3.getLeftElement().getRightElement() + " with value : " + highestCloudPMI3.getRightElement().get()); System.out.println("Word with highest PMI with Love : " + highestLovePMI.getLeftElement().getRightElement() + " with value : " + highestLovePMI.getRightElement().get()); System.out.println( "Word with second highest PMI with Love : " + highestLovePMI2.getLeftElement().getRightElement() + " with value : " + highestLovePMI2.getRightElement().get()); System.out.println( "Word with third highest PMI with Love : " + highestLovePMI3.getLeftElement().getRightElement() + " with value : " + highestLovePMI3.getRightElement().get()); }
From source file:marytts.signalproc.effects.VocalTractLinearScalerEffect.java
/** * Command line interface to the vocal tract linear scaler effect. * /* w ww . j a v a 2 s . c o m*/ * @param args the command line arguments. Exactly two arguments are expected: * (1) the factor by which to scale the vocal tract (between 0.25 = very long and 4.0 = very short vocal tract); * (2) the filename of the wav file to modify. * Will produce a file basename_factor.wav, where basename is the filename without the extension. * @throws Exception if processing fails for some reason. */ public static void main(String[] args) throws Exception { if (args.length != 2) { System.err.println( "Usage: java " + VocalTractLinearScalerEffect.class.getName() + " <factor> <filename>"); System.exit(1); } float factor = Float.parseFloat(args[0]); String filename = args[1]; AudioDoubleDataSource input = new AudioDoubleDataSource( AudioSystem.getAudioInputStream(new File(filename))); AudioFormat format = input.getAudioFormat(); VocalTractLinearScalerEffect effect = new VocalTractLinearScalerEffect((int) format.getSampleRate()); DoubleDataSource output = effect.apply(input, "amount:" + factor); DDSAudioInputStream audioOut = new DDSAudioInputStream(output, format); String outFilename = FilenameUtils.removeExtension(filename) + "_" + factor + ".wav"; AudioSystem.write(audioOut, AudioFileFormat.Type.WAVE, new File(outFilename)); System.out.println("Created file " + outFilename); }
From source file:edu.cmu.lti.oaqa.knn4qa.apps.GenTranEmbeddings.java
public static void main(String[] args) { Options options = new Options(); options.addOption(CommonParams.MEMINDEX_PARAM, null, true, CommonParams.MEMINDEX_DESC); options.addOption(CommonParams.GIZA_ROOT_DIR_PARAM, null, true, CommonParams.GIZA_ROOT_DIR_DESC); options.addOption(CommonParams.GIZA_ITER_QTY_PARAM, null, true, CommonParams.GIZA_ITER_QTY_DESC); options.addOption(OUT_FILE_PARAM, null, true, OUT_FILE_DESC); options.addOption(MAX_MODEL_ORDER_PARAM, null, true, MAX_MODEL_ORDER_DESC); options.addOption(MIN_PROB_PARAM, null, true, MIN_PROB_DESC); options.addOption(MAX_DIGIT_PARAM, null, true, MAX_DIGIT_DESC); options.addOption(CommonParams.MAX_WORD_QTY_PARAM, null, true, CommonParams.MAX_WORD_QTY_PARAM); CommandLineParser parser = new org.apache.commons.cli.GnuParser(); try {//from www . j a v a 2s . co m CommandLine cmd = parser.parse(options, args); int maxWordQty = Integer.MAX_VALUE; String tmpi = cmd.getOptionValue(CommonParams.MAX_WORD_QTY_PARAM); if (null != tmpi) { maxWordQty = Integer.parseInt(tmpi); } String memIndexPref = cmd.getOptionValue(CommonParams.MEMINDEX_PARAM); if (null == memIndexPref) { Usage("Specify '" + CommonParams.MEMINDEX_DESC + "'", options); } String gizaRootDir = cmd.getOptionValue(CommonParams.GIZA_ROOT_DIR_PARAM); if (null == gizaRootDir) { Usage("Specify '" + CommonParams.GIZA_ROOT_DIR_PARAM + "'", options); } int gizaIterQty = -1; if (cmd.hasOption(CommonParams.GIZA_ITER_QTY_PARAM)) { gizaIterQty = Integer.parseInt(cmd.getOptionValue(CommonParams.GIZA_ITER_QTY_PARAM)); } if (gizaIterQty <= 0) { Usage("Specify '" + CommonParams.GIZA_ITER_QTY_DESC + "'", options); } int maxModelOrder = -1; if (cmd.hasOption(MAX_MODEL_ORDER_PARAM)) { maxModelOrder = Integer.parseInt(cmd.getOptionValue(MAX_MODEL_ORDER_PARAM)); } String outFilePrefix = cmd.getOptionValue(OUT_FILE_PARAM); if (null == outFilePrefix) { Usage("Specify '" + OUT_FILE_DESC + "'", options); } float minProb = 0; if (cmd.hasOption(MIN_PROB_PARAM)) { minProb = Float.parseFloat(cmd.getOptionValue(MIN_PROB_PARAM)); } else { Usage("Specify '" + MIN_PROB_DESC + "'", options); } int maxDigit = 5; if (cmd.hasOption(MAX_DIGIT_PARAM)) { maxDigit = Integer.parseInt(cmd.getOptionValue(MAX_DIGIT_PARAM)); } // We use unlemmatized text here, because lemmatized dictionary is going to be mostly subset of the unlemmatized one. int fieldId = FeatureExtractor.TEXT_UNLEMM_FIELD_ID; String memFwdIndxName = FeatureExtractor.indexFileName(memIndexPref, FeatureExtractor.mFieldNames[fieldId]); FrequentIndexWordFilterAndRecoder filterAndRecoder = new FrequentIndexWordFilterAndRecoder( memFwdIndxName, maxWordQty); InMemForwardIndex index = new InMemForwardIndex(memFwdIndxName); BM25SimilarityLucene simil = new BM25SimilarityLucene(FeatureExtractor.BM25_K1, FeatureExtractor.BM25_B, index); String prefix = gizaRootDir + "/" + FeatureExtractor.mFieldNames[fieldId] + "/"; GizaVocabularyReader answVoc = new GizaVocabularyReader(prefix + "source.vcb", filterAndRecoder); GizaVocabularyReader questVoc = new GizaVocabularyReader(prefix + "target.vcb", filterAndRecoder); GizaTranTableReaderAndRecoder answToQuestTran = new GizaTranTableReaderAndRecoder( false /* don't flip a translation table */, prefix + "/output.t1." + gizaIterQty, filterAndRecoder, answVoc, questVoc, (float) FeatureExtractor.DEFAULT_PROB_SELF_TRAN, minProb); int order = 0; System.out.println("Starting to compute the 0-order model"); HashIntObjMap<SparseVector> currModel = SparseEmbeddingReaderAndRecorder.createTranVecDict(index, filterAndRecoder, minProb, answToQuestTran); System.out.println("0-order model is computed"); SparseEmbeddingReaderAndRecorder.saveDict(index, outFilePrefix + ".0", currModel, maxDigit); System.out.println("0-order model is saved"); while (order < maxModelOrder) { ++order; System.out.println("Starting to compute the " + order + "-order model"); currModel = SparseEmbeddingReaderAndRecorder.nextOrderDict(currModel, index, minProb, answToQuestTran); System.out.println(order + "-order model is computed"); SparseEmbeddingReaderAndRecorder.saveDict(index, outFilePrefix + "." + order, currModel, maxDigit); System.out.println(order + "-order model is saved"); } } catch (ParseException e) { Usage("Cannot parse arguments", options); } catch (Exception e) { System.err.println("Terminating due to an exception: " + e); System.exit(1); } System.out.println("Terminated successfully!"); }