List of usage examples for java.io BufferedWriter close
@SuppressWarnings("try") public void close() throws IOException
From source file:apps.quantification.LearnQuantificationSVMLight.java
public static void main(String[] args) throws IOException { String cmdLineSyntax = LearnQuantificationSVMLight.class.getName() + " [OPTIONS] <path to svm_light_learn> <path to svm_light_classify> <trainingIndexDirectory> <outputDirectory>"; Options options = new Options(); OptionBuilder.withArgName("f"); OptionBuilder.withDescription("Number of folds"); OptionBuilder.withLongOpt("f"); OptionBuilder.isRequired(true);/* w ww . j a v a 2 s. co m*/ OptionBuilder.hasArg(); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("c"); OptionBuilder.withDescription("The c value for svm_light (default 1)"); OptionBuilder.withLongOpt("c"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("k"); OptionBuilder.withDescription("Kernel type (default 0: linear, 1: polynomial, 2: RBF, 3: sigmoid)"); OptionBuilder.withLongOpt("k"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("t"); OptionBuilder.withDescription("Path for temporary files"); OptionBuilder.withLongOpt("t"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("v"); OptionBuilder.withDescription("Verbose output"); OptionBuilder.withLongOpt("v"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(false); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("s"); OptionBuilder.withDescription("Don't delete temporary training file in svm_light format (default: delete)"); OptionBuilder.withLongOpt("s"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(false); options.addOption(OptionBuilder.create()); SvmLightLearnerCustomizer classificationLearnerCustomizer = null; SvmLightClassifierCustomizer classificationCustomizer = null; int folds = -1; GnuParser parser = new GnuParser(); String[] remainingArgs = null; try { CommandLine line = parser.parse(options, args); remainingArgs = line.getArgs(); classificationLearnerCustomizer = new SvmLightLearnerCustomizer(remainingArgs[0]); classificationCustomizer = new SvmLightClassifierCustomizer(remainingArgs[1]); folds = Integer.parseInt(line.getOptionValue("f")); if (line.hasOption("c")) classificationLearnerCustomizer.setC(Float.parseFloat(line.getOptionValue("c"))); if (line.hasOption("k")) { System.out.println("Kernel type: " + line.getOptionValue("k")); classificationLearnerCustomizer.setKernelType(Integer.parseInt(line.getOptionValue("k"))); } if (line.hasOption("v")) classificationLearnerCustomizer.printSvmLightOutput(true); if (line.hasOption("s")) classificationLearnerCustomizer.setDeleteTrainingFiles(false); if (line.hasOption("t")) { classificationLearnerCustomizer.setTempPath(line.getOptionValue("t")); classificationCustomizer.setTempPath(line.getOptionValue("t")); } } catch (Exception exp) { System.err.println("Parsing failed. Reason: " + exp.getMessage()); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(cmdLineSyntax, options); System.exit(-1); } assert (classificationLearnerCustomizer != null); if (remainingArgs.length != 4) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(cmdLineSyntax, options); System.exit(-1); } String indexFile = remainingArgs[2]; File file = new File(indexFile); String indexName = file.getName(); String indexPath = file.getParent(); String outputPath = remainingArgs[3]; SvmLightLearner classificationLearner = new SvmLightLearner(); classificationLearner.setRuntimeCustomizer(classificationLearnerCustomizer); FileSystemStorageManager fssm = new FileSystemStorageManager(indexPath, false); fssm.open(); IIndex training = TroveReadWriteHelper.readIndex(fssm, indexName, TroveContentDBType.Full, TroveClassificationDBType.Full); final TextualProgressBar progressBar = new TextualProgressBar("Learning the quantifiers"); IOperationStatusListener status = new IOperationStatusListener() { @Override public void operationStatus(double percentage) { progressBar.signal((int) percentage); } }; QuantificationLearner quantificationLearner = new QuantificationLearner(folds, classificationLearner, classificationLearnerCustomizer, classificationCustomizer, ClassificationMode.PER_CATEGORY, new LogisticFunction(), status); IQuantifier[] quantifiers = quantificationLearner.learn(training); File executableFile = new File(classificationLearnerCustomizer.getSvmLightLearnPath()); IDataManager classifierDataManager = new SvmLightDataManager(new SvmLightClassifierCustomizer( executableFile.getParentFile().getAbsolutePath() + Os.pathSeparator() + "svm_light_classify")); String description = "_SVMLight_C-" + classificationLearnerCustomizer.getC() + "_K-" + classificationLearnerCustomizer.getKernelType(); if (classificationLearnerCustomizer.getAdditionalParameters().length() > 0) description += "_" + classificationLearnerCustomizer.getAdditionalParameters(); String quantifierPrefix = indexName + "_Quantifier-" + folds + description; FileSystemStorageManager fssmo = new FileSystemStorageManager( outputPath + File.separatorChar + quantifierPrefix, true); fssmo.open(); QuantificationLearner.write(quantifiers, fssmo, classifierDataManager); fssmo.close(); BufferedWriter bfs = new BufferedWriter( new FileWriter(outputPath + File.separatorChar + quantifierPrefix + "_rates.txt")); TShortDoubleHashMap simpleTPRs = quantificationLearner.getSimpleTPRs(); TShortDoubleHashMap simpleFPRs = quantificationLearner.getSimpleFPRs(); TShortDoubleHashMap scaledTPRs = quantificationLearner.getScaledTPRs(); TShortDoubleHashMap scaledFPRs = quantificationLearner.getScaledFPRs(); ContingencyTableSet contingencyTableSet = quantificationLearner.getContingencyTableSet(); short[] cats = simpleTPRs.keys(); for (int i = 0; i < cats.length; ++i) { short cat = cats[i]; String catName = training.getCategoryDB().getCategoryName(cat); ContingencyTable contingencyTable = contingencyTableSet.getCategoryContingencyTable(cat); double simpleTPR = simpleTPRs.get(cat); double simpleFPR = simpleFPRs.get(cat); double scaledTPR = scaledTPRs.get(cat); double scaledFPR = scaledFPRs.get(cat); String line = quantifierPrefix + "\ttrain\tsimple\t" + catName + "\t" + cat + "\t" + contingencyTable.tp() + "\t" + contingencyTable.fp() + "\t" + contingencyTable.fn() + "\t" + contingencyTable.tn() + "\t" + simpleTPR + "\t" + simpleFPR + "\n"; bfs.write(line); line = quantifierPrefix + "\ttrain\tscaled\t" + catName + "\t" + cat + "\t" + contingencyTable.tp() + "\t" + contingencyTable.fp() + "\t" + contingencyTable.fn() + "\t" + contingencyTable.tn() + "\t" + scaledTPR + "\t" + scaledFPR + "\n"; bfs.write(line); } bfs.close(); }
From source file:apps.quantification.LearnQuantificationSVMPerf.java
public static void main(String[] args) throws IOException { String cmdLineSyntax = LearnQuantificationSVMPerf.class.getName() + " [OPTIONS] <path to svm_perf_learn> <path to svm_perf_classify> <trainingIndexDirectory> <outputDirectory>"; Options options = new Options(); OptionBuilder.withArgName("f"); OptionBuilder.withDescription("Number of folds"); OptionBuilder.withLongOpt("f"); OptionBuilder.isRequired(true);//w ww . jav a 2s. c o m OptionBuilder.hasArg(); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("c"); OptionBuilder.withDescription("The c value for svm_perf (default 0.01)"); OptionBuilder.withLongOpt("c"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("t"); OptionBuilder.withDescription("Path for temporary files"); OptionBuilder.withLongOpt("t"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("l"); OptionBuilder.withDescription("The loss function to optimize (default 2):\n" + " 0 Zero/one loss: 1 if vector of predictions contains error, 0 otherwise.\n" + " 1 F1: 100 minus the F1-score in percent.\n" + " 2 Errorrate: Percentage of errors in prediction vector.\n" + " 3 Prec/Rec Breakeven: 100 minus PRBEP in percent.\n" + " 4 Prec@p: 100 minus precision at p in percent.\n" + " 5 Rec@p: 100 minus recall at p in percent.\n" + " 10 ROCArea: Percentage of swapped pos/neg pairs (i.e. 100 - ROCArea)."); OptionBuilder.withLongOpt("l"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("w"); OptionBuilder.withDescription("Choice of structural learning algorithm (default 9):\n" + " 0: n-slack algorithm described in [2]\n" + " 1: n-slack algorithm with shrinking heuristic\n" + " 2: 1-slack algorithm (primal) described in [5]\n" + " 3: 1-slack algorithm (dual) described in [5]\n" + " 4: 1-slack algorithm (dual) with constraint cache [5]\n" + " 9: custom algorithm in svm_struct_learn_custom.c"); OptionBuilder.withLongOpt("w"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("p"); OptionBuilder.withDescription("The value of p used by the prec@p and rec@p loss functions (default 0)"); OptionBuilder.withLongOpt("p"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("v"); OptionBuilder.withDescription("Verbose output"); OptionBuilder.withLongOpt("v"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(false); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("s"); OptionBuilder.withDescription("Don't delete temporary training file in svm_perf format (default: delete)"); OptionBuilder.withLongOpt("s"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(false); options.addOption(OptionBuilder.create()); SvmPerfLearnerCustomizer classificationLearnerCustomizer = null; SvmPerfClassifierCustomizer classificationCustomizer = null; int folds = -1; GnuParser parser = new GnuParser(); String[] remainingArgs = null; try { CommandLine line = parser.parse(options, args); remainingArgs = line.getArgs(); classificationLearnerCustomizer = new SvmPerfLearnerCustomizer(remainingArgs[0]); classificationCustomizer = new SvmPerfClassifierCustomizer(remainingArgs[1]); folds = Integer.parseInt(line.getOptionValue("f")); if (line.hasOption("c")) classificationLearnerCustomizer.setC(Float.parseFloat(line.getOptionValue("c"))); if (line.hasOption("w")) classificationLearnerCustomizer.setW(Integer.parseInt(line.getOptionValue("w"))); if (line.hasOption("p")) classificationLearnerCustomizer.setP(Integer.parseInt(line.getOptionValue("p"))); if (line.hasOption("l")) classificationLearnerCustomizer.setL(Integer.parseInt(line.getOptionValue("l"))); if (line.hasOption("v")) classificationLearnerCustomizer.printSvmPerfOutput(true); if (line.hasOption("s")) classificationLearnerCustomizer.setDeleteTrainingFiles(false); if (line.hasOption("t")) { classificationLearnerCustomizer.setTempPath(line.getOptionValue("t")); classificationCustomizer.setTempPath(line.getOptionValue("t")); } } catch (Exception exp) { System.err.println("Parsing failed. Reason: " + exp.getMessage()); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(cmdLineSyntax, options); System.exit(-1); } assert (classificationLearnerCustomizer != null); if (remainingArgs.length != 4) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(cmdLineSyntax, options); System.exit(-1); } String indexFile = remainingArgs[2]; File file = new File(indexFile); String indexName = file.getName(); String indexPath = file.getParent(); String outputPath = remainingArgs[3]; SvmPerfLearner classificationLearner = new SvmPerfLearner(); classificationLearner.setRuntimeCustomizer(classificationLearnerCustomizer); FileSystemStorageManager fssm = new FileSystemStorageManager(indexPath, false); fssm.open(); IIndex training = TroveReadWriteHelper.readIndex(fssm, indexName, TroveContentDBType.Full, TroveClassificationDBType.Full); final TextualProgressBar progressBar = new TextualProgressBar("Learning the quantifiers"); IOperationStatusListener status = new IOperationStatusListener() { @Override public void operationStatus(double percentage) { progressBar.signal((int) percentage); } }; QuantificationLearner quantificationLearner = new QuantificationLearner(folds, classificationLearner, classificationLearnerCustomizer, classificationCustomizer, ClassificationMode.PER_CATEGORY, new LogisticFunction(), status); IQuantifier[] quantifiers = quantificationLearner.learn(training); File executableFile = new File(classificationLearnerCustomizer.getSvmPerfLearnPath()); IDataManager classifierDataManager = new SvmPerfDataManager(new SvmPerfClassifierCustomizer( executableFile.getParentFile().getAbsolutePath() + Os.pathSeparator() + "svm_perf_classify")); String description = "_SVMPerf_C-" + classificationLearnerCustomizer.getC() + "_W-" + classificationLearnerCustomizer.getW() + "_L-" + classificationLearnerCustomizer.getL(); if (classificationLearnerCustomizer.getL() == 4 || classificationLearnerCustomizer.getL() == 5) description += "_P-" + classificationLearnerCustomizer.getP(); if (classificationLearnerCustomizer.getAdditionalParameters().length() > 0) description += "_" + classificationLearnerCustomizer.getAdditionalParameters(); String quantifierPrefix = indexName + "_Quantifier-" + folds + description; FileSystemStorageManager fssmo = new FileSystemStorageManager( outputPath + File.separatorChar + quantifierPrefix, true); fssmo.open(); QuantificationLearner.write(quantifiers, fssmo, classifierDataManager); fssmo.close(); BufferedWriter bfs = new BufferedWriter( new FileWriter(outputPath + File.separatorChar + quantifierPrefix + "_rates.txt")); TShortDoubleHashMap simpleTPRs = quantificationLearner.getSimpleTPRs(); TShortDoubleHashMap simpleFPRs = quantificationLearner.getSimpleFPRs(); TShortDoubleHashMap scaledTPRs = quantificationLearner.getScaledTPRs(); TShortDoubleHashMap scaledFPRs = quantificationLearner.getScaledFPRs(); ContingencyTableSet contingencyTableSet = quantificationLearner.getContingencyTableSet(); short[] cats = simpleTPRs.keys(); for (int i = 0; i < cats.length; ++i) { short cat = cats[i]; String catName = training.getCategoryDB().getCategoryName(cat); ContingencyTable contingencyTable = contingencyTableSet.getCategoryContingencyTable(cat); double simpleTPR = simpleTPRs.get(cat); double simpleFPR = simpleFPRs.get(cat); double scaledTPR = scaledTPRs.get(cat); double scaledFPR = scaledFPRs.get(cat); String line = quantifierPrefix + "\ttrain\tsimple\t" + catName + "\t" + cat + "\t" + contingencyTable.tp() + "\t" + contingencyTable.fp() + "\t" + contingencyTable.fn() + "\t" + contingencyTable.tn() + "\t" + simpleTPR + "\t" + simpleFPR + "\n"; bfs.write(line); line = quantifierPrefix + "\ttrain\tscaled\t" + catName + "\t" + cat + "\t" + contingencyTable.tp() + "\t" + contingencyTable.fp() + "\t" + contingencyTable.fn() + "\t" + contingencyTable.tn() + "\t" + scaledTPR + "\t" + scaledFPR + "\n"; bfs.write(line); } bfs.close(); }
From source file:metaTile.Main.java
/** * @param args//from w ww .j a v a2s .com * @throws IOException */ public static void main(String[] args) throws IOException { try { /* parse the command line arguments */ // create the command line parser CommandLineParser parser = new PosixParser(); // create the Options Options options = new Options(); options.addOption("i", "input", true, "File to read original tile list from."); options.addOption("o", "output", true, "File to write shorter meta-tile list to."); options.addOption("m", "metatiles", true, "Number of tiles in x and y direction to group into one meta-tile."); // parse the command line arguments CommandLine commandLine = parser.parse(options, args); if (!commandLine.hasOption("input") || !commandLine.hasOption("output") || !commandLine.hasOption("metatiles")) printUsage(options); String inputFileName = commandLine.getOptionValue("input"); String outputFileName = commandLine.getOptionValue("output"); int metaTileSize = Integer.parseInt(commandLine.getOptionValue("metatiles")); ArrayList<RenderingTile> tiles = new ArrayList<RenderingTile>(); BufferedReader tileListReader = new BufferedReader(new FileReader(new File(inputFileName))); BufferedWriter renderMetatileListWriter = new BufferedWriter(new FileWriter(new File(outputFileName))); String line = tileListReader.readLine(); while (line != null) { String[] columns = line.split("/"); if (columns.length == 3) tiles.add(new RenderingTile(Integer.parseInt(columns[0]), Integer.parseInt(columns[1]), Integer.parseInt(columns[2]))); line = tileListReader.readLine(); } tileListReader.close(); int hits = 0; // tiles which we are already rendering as the top left corner of 4x4 metatiles HashSet<RenderingTile> whitelist = new HashSet<RenderingTile>(); // for each tile in the list see if it has a meta-tile in the whitelist already for (int i = 0; i < tiles.size(); i++) { boolean hit = false; // by default we aren't already rendering this tile as part of another metatile for (int dx = 0; dx < metaTileSize; dx++) { for (int dy = 0; dy < metaTileSize; dy++) { RenderingTile candidate = new RenderingTile(tiles.get(i).z, tiles.get(i).x - dx, tiles.get(i).y - dy); if (whitelist.contains(candidate)) { hit = true; // now exit the two for loops iterating over tiles inside a meta-tile dx = metaTileSize; dy = metaTileSize; } } } // if this tile doesn't already have a meta-tile in the whitelist, add it if (hit == false) { hits++; renderMetatileListWriter.write(tiles.get(i).toString() + "/" + metaTileSize + "\n"); whitelist.add(tiles.get(i)); } } renderMetatileListWriter.close(); System.out.println( "Reduced " + tiles.size() + " tiles into " + hits + " metatiles of size " + metaTileSize); } catch (Exception e) { e.printStackTrace(); } }
From source file:bookChapter.theoretical.AnalyzeTheoreticalMSMSCalculation.java
/** * * @param args/*from w w w .j av a2 s . com*/ * @throws IOException * @throws FileNotFoundException * @throws ClassNotFoundException * @throws InterruptedException * @throws MzMLUnmarshallerException */ public static void main(String[] args) throws IOException, FileNotFoundException, ClassNotFoundException, IOException, InterruptedException, MzMLUnmarshallerException { Logger l = Logger.getLogger("AnalyzeTheoreticalMSMSCalculation"); Date date = Calendar.getInstance().getTime(); DateFormat formatter = new SimpleDateFormat("EEEE, dd MMMM yyyy, hh:mm:ss.SSS a"); String now = formatter.format(date); l.log(Level.INFO, "Calculation starts at {0}", now); double precursorTolerance = ConfigHolder.getInstance().getDouble("precursor.tolerance"), fragmentTolerance = ConfigHolder.getInstance().getDouble("fragment.tolerance"); String databaseName = ConfigHolder.getInstance().getString("database.name"), spectraName = ConfigHolder.getInstance().getString("spectra.name"), output = ConfigHolder.getInstance().getString("output"); int correctionFactor = ConfigHolder.getInstance().getInt("correctionFactor"); boolean theoFromAllCharges = ConfigHolder.getInstance().getBoolean("hasAllPossCharge"); BufferedWriter bw = new BufferedWriter(new FileWriter(output)); bw.write("SpectrumTitle" + "\t" + "PrecursorMZ" + "\t" + "PrecursorCharge" + "\t" + "Observed Mass (M+H)" + "\t" + "AndromedaLikeScore" + "\t" + "SequestLikeScore" + "\t" + "PeptideByAndromedaLikeScore" + "\t" + "PeptideBySequestLikeScore" + "\t" + "LevenshteinDistance" + "\t" + "TotalScoredPeps" + "\t" + "isCorrectMatchByAndromedaLike" + "\t" + "isCorrectMatchBySequestLikeScore" + "\n"); l.info("Getting database entries"); // first load all sequences into the memory HashSet<DBEntry> dbEntries = getDBEntries(databaseName); // for every spectrum-calculate both score... // now convert to binExperimental spectrum int num = 0; SpectrumFactory fct = SpectrumFactory.getInstance(); num = 0; File f = new File(spectraName); if (spectraName.endsWith(".mgf")) { fct.addSpectra(f, new WaitingHandlerCLIImpl()); l.log(Level.INFO, "Spectra scoring starts at {0}", now); for (String title : fct.getSpectrumTitles(f.getName())) { num++; MSnSpectrum ms = (MSnSpectrum) fct.getSpectrum(f.getName(), title); // here calculate all except this is an empty spectrum... if (ms.getPeakList().size() > 2) { // to check a spectrum with negative values.. String text = result(ms, precursorTolerance, dbEntries, fragmentTolerance, correctionFactor, theoFromAllCharges); if (!text.isEmpty()) { bw.write(text); } } if (num % 500 == 0) { l.info("Running " + num + " spectra." + Calendar.getInstance().getTime()); } } } l.info("Program finished at " + Calendar.getInstance().getTime()); bw.close(); }
From source file:edu.cmu.lti.oaqa.knn4qa.apps.BuildRetrofitLexicons.java
public static void main(String[] args) { Options options = new Options(); options.addOption(CommonParams.GIZA_ROOT_DIR_PARAM, null, true, CommonParams.GIZA_ROOT_DIR_DESC); options.addOption(CommonParams.GIZA_ITER_QTY_PARAM, null, true, CommonParams.GIZA_ITER_QTY_DESC); options.addOption(CommonParams.MEMINDEX_PARAM, null, true, CommonParams.MEMINDEX_DESC); options.addOption(OUT_FILE_PARAM, null, true, OUT_FILE_DESC); options.addOption(MIN_PROB_PARAM, null, true, MIN_PROB_DESC); options.addOption(FORMAT_PARAM, null, true, FORMAT_DESC); CommandLineParser parser = new org.apache.commons.cli.GnuParser(); try {//from www .j a va2 s .c o m CommandLine cmd = parser.parse(options, args); String gizaRootDir = cmd.getOptionValue(CommonParams.GIZA_ROOT_DIR_PARAM); int gizaIterQty = -1; if (cmd.hasOption(CommonParams.GIZA_ITER_QTY_PARAM)) { gizaIterQty = Integer.parseInt(cmd.getOptionValue(CommonParams.GIZA_ITER_QTY_PARAM)); } else { Usage("Specify: " + CommonParams.GIZA_ITER_QTY_PARAM, options); } String outFileName = cmd.getOptionValue(OUT_FILE_PARAM); if (null == outFileName) { Usage("Specify: " + OUT_FILE_PARAM, options); } String indexDir = cmd.getOptionValue(CommonParams.MEMINDEX_PARAM); if (null == indexDir) { Usage("Specify: " + CommonParams.MEMINDEX_DESC, options); } FormatType outType = FormatType.kOrig; String outTypeStr = cmd.getOptionValue(FORMAT_PARAM); if (null != outTypeStr) { if (outTypeStr.equals(ORIG_TYPE)) { outType = FormatType.kOrig; } else if (outTypeStr.equals(WEIGHTED_TYPE)) { outType = FormatType.kWeighted; } else if (outTypeStr.equals(UNWEIGHTED_TYPE)) { outType = FormatType.kUnweighted; } else { Usage("Unknown format type: " + outTypeStr, options); } } float minProb = 0; if (cmd.hasOption(MIN_PROB_PARAM)) { minProb = Float.parseFloat(cmd.getOptionValue(MIN_PROB_PARAM)); } else { Usage("Specify: " + MIN_PROB_PARAM, options); } System.out.println(String.format( "Saving lexicon to '%s' (output format '%s'), keep only entries with translation probability >= %f", outFileName, outType.toString(), minProb)); // We use unlemmatized text here, because lemmatized dictionary is going to be mostly subset of the unlemmatized one. InMemForwardIndex textIndex = new InMemForwardIndex(FeatureExtractor.indexFileName(indexDir, FeatureExtractor.mFieldNames[FeatureExtractor.TEXT_UNLEMM_FIELD_ID])); InMemForwardIndexFilterAndRecoder filterAndRecoder = new InMemForwardIndexFilterAndRecoder(textIndex); String prefix = gizaRootDir + "/" + FeatureExtractor.mFieldNames[FeatureExtractor.TEXT_UNLEMM_FIELD_ID] + "/"; GizaVocabularyReader answVoc = new GizaVocabularyReader(prefix + "source.vcb", filterAndRecoder); GizaVocabularyReader questVoc = new GizaVocabularyReader(prefix + "target.vcb", filterAndRecoder); GizaTranTableReaderAndRecoder gizaTable = new GizaTranTableReaderAndRecoder(false, // we don't need to flip the table for the purpose prefix + "/output.t1." + gizaIterQty, filterAndRecoder, answVoc, questVoc, (float) FeatureExtractor.DEFAULT_PROB_SELF_TRAN, minProb); BufferedWriter outFile = new BufferedWriter(new FileWriter(outFileName)); for (int srcWordId = 0; srcWordId <= textIndex.getMaxWordId(); ++srcWordId) { GizaOneWordTranRecs tranRecs = gizaTable.getTranProbs(srcWordId); if (null != tranRecs) { String wordSrc = textIndex.getWord(srcWordId); StringBuffer sb = new StringBuffer(); sb.append(wordSrc); for (int k = 0; k < tranRecs.mDstIds.length; ++k) { float prob = tranRecs.mProbs[k]; if (prob >= minProb) { int dstWordId = tranRecs.mDstIds[k]; if (dstWordId == srcWordId && outType != FormatType.kWeighted) continue; // Don't duplicate the word, unless it's probability weighted sb.append(' '); String dstWord = textIndex.getWord(dstWordId); if (null == dstWord) { throw new Exception( "Bug or inconsistent data: Couldn't retriev a word for wordId = " + dstWordId); } if (dstWord.indexOf(':') >= 0) throw new Exception( "Illegal dictionary word '" + dstWord + "' b/c it contains ':'"); sb.append(dstWord); if (outType != FormatType.kOrig) { sb.append(':'); sb.append(outType == FormatType.kWeighted ? prob : 1); } } } outFile.write(sb.toString()); outFile.newLine(); } } outFile.close(); } catch (ParseException e) { e.printStackTrace(); Usage("Cannot parse arguments", options); } catch (Exception e) { e.printStackTrace(); System.err.println("Terminating due to an exception: " + e); System.exit(1); } System.out.println("Terminated successfully!"); }
From source file:cc.redberry.core.performance.StableSort.java
/** * @param args the command line arguments *//*from w ww.j a v a 2 s . co m*/ public static void main(String[] args) { try { //burn JVM BitsStreamGenerator bitsStreamGenerator = new Well19937c(); for (int i = 0; i < 1000; ++i) nextArray(1000, bitsStreamGenerator); System.out.println("!"); BufferedWriter timMeanOut = new BufferedWriter( new FileWriter("/home/stas/Projects/stableSort/timMean.dat")); BufferedWriter insertionMeanOut = new BufferedWriter( new FileWriter("/home/stas/Projects/stableSort/insertionMean.dat")); BufferedWriter timMaxOut = new BufferedWriter( new FileWriter("/home/stas/Projects/stableSort/timMax.dat")); BufferedWriter insertionMaxOut = new BufferedWriter( new FileWriter("/home/stas/Projects/stableSort/insertionMax.dat")); BufferedWriter timSigOut = new BufferedWriter( new FileWriter("/home/stas/Projects/stableSort/timSig.dat")); BufferedWriter insertionSigOut = new BufferedWriter( new FileWriter("/home/stas/Projects/stableSort/insertionSig.dat")); DescriptiveStatistics timSort; DescriptiveStatistics insertionSort; int tryies = 200; int arrayLength = 0; for (; arrayLength < 1000; ++arrayLength) { int[] coSort = nextArray(arrayLength, bitsStreamGenerator); timSort = new DescriptiveStatistics(); insertionSort = new DescriptiveStatistics(); for (int i = 0; i < tryies; ++i) { int[] t1 = nextArray(arrayLength, bitsStreamGenerator); int[] t2 = t1.clone(); long start = System.currentTimeMillis(); ArraysUtils.timSort(t1, coSort); long stop = System.currentTimeMillis(); timSort.addValue(stop - start); start = System.currentTimeMillis(); ArraysUtils.insertionSort(t2, coSort); stop = System.currentTimeMillis(); insertionSort.addValue(stop - start); } timMeanOut.write(arrayLength + "\t" + timSort.getMean() + "\n"); insertionMeanOut.write(arrayLength + "\t" + insertionSort.getMean() + "\n"); timMaxOut.write(arrayLength + "\t" + timSort.getMax() + "\n"); insertionMaxOut.write(arrayLength + "\t" + insertionSort.getMax() + "\n"); timSigOut.write(arrayLength + "\t" + timSort.getStandardDeviation() + "\n"); insertionSigOut.write(arrayLength + "\t" + insertionSort.getStandardDeviation() + "\n"); } timMeanOut.close(); insertionMeanOut.close(); timMaxOut.close(); insertionMaxOut.close(); timSigOut.close(); insertionSigOut.close(); } catch (IOException ex) { Logger.getLogger(StableSort.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:edu.cmu.lti.oaqa.knn4qa.apps.FilterTranTable.java
public static void main(String[] args) { Options options = new Options(); options.addOption(INPUT_PARAM, null, true, INPUT_DESC); options.addOption(OUTPUT_PARAM, null, true, OUTPUT_DESC); options.addOption(CommonParams.MEM_FWD_INDEX_PARAM, null, true, CommonParams.MEM_FWD_INDEX_DESC); options.addOption(CommonParams.GIZA_ITER_QTY_PARAM, null, true, CommonParams.GIZA_ITER_QTY_PARAM); options.addOption(CommonParams.GIZA_ROOT_DIR_PARAM, null, true, CommonParams.GIZA_ROOT_DIR_PARAM); options.addOption(CommonParams.MIN_PROB_PARAM, null, true, CommonParams.MIN_PROB_DESC); options.addOption(CommonParams.MAX_WORD_QTY_PARAM, null, true, CommonParams.MAX_WORD_QTY_PARAM); CommandLineParser parser = new org.apache.commons.cli.GnuParser(); try {//from w ww . ja v a 2 s . c o m CommandLine cmd = parser.parse(options, args); String outputFile = null; outputFile = cmd.getOptionValue(OUTPUT_PARAM); if (null == outputFile) { Usage("Specify 'A name of the output file'", options); } String gizaRootDir = cmd.getOptionValue(CommonParams.GIZA_ROOT_DIR_PARAM); if (null == gizaRootDir) { Usage("Specify '" + CommonParams.GIZA_ROOT_DIR_DESC + "'", options); } String gizaIterQty = cmd.getOptionValue(CommonParams.GIZA_ITER_QTY_PARAM); if (null == gizaIterQty) { Usage("Specify '" + CommonParams.GIZA_ITER_QTY_DESC + "'", options); } float minProb = 0; String tmpf = cmd.getOptionValue(CommonParams.MIN_PROB_PARAM); if (tmpf != null) { minProb = Float.parseFloat(tmpf); } int maxWordQty = Integer.MAX_VALUE; String tmpi = cmd.getOptionValue(CommonParams.MAX_WORD_QTY_PARAM); if (null != tmpi) { maxWordQty = Integer.parseInt(tmpi); } String memFwdIndxName = cmd.getOptionValue(CommonParams.MEM_FWD_INDEX_PARAM); if (null == memFwdIndxName) { Usage("Specify '" + CommonParams.MEM_FWD_INDEX_DESC + "'", options); } System.out.println("Filtering index: " + memFwdIndxName + " max # of frequent words: " + maxWordQty + " min. probability:" + minProb); VocabularyFilterAndRecoder filter = new FrequentIndexWordFilterAndRecoder(memFwdIndxName, maxWordQty); String srcVocFile = CompressUtils.findFileVariant(gizaRootDir + "/source.vcb"); System.out.println("Source vocabulary file: " + srcVocFile); GizaVocabularyReader srcVoc = new GizaVocabularyReader(srcVocFile, filter); String dstVocFile = CompressUtils.findFileVariant(gizaRootDir + "/target.vcb"); System.out.println("Target vocabulary file: " + dstVocFile); GizaVocabularyReader dstVoc = new GizaVocabularyReader(CompressUtils.findFileVariant(dstVocFile), filter); String inputFile = CompressUtils.findFileVariant(gizaRootDir + "/output.t1." + gizaIterQty); BufferedReader finp = new BufferedReader( new InputStreamReader(CompressUtils.createInputStream(inputFile))); BufferedWriter fout = new BufferedWriter( new OutputStreamWriter(CompressUtils.createOutputStream(outputFile))); try { String line; int prevSrcId = -1; int wordQty = 0; long addedQty = 0; long totalQty = 0; boolean isNotFiltered = false; for (totalQty = 0; (line = finp.readLine()) != null;) { ++totalQty; // Skip empty lines line = line.trim(); if (line.isEmpty()) continue; GizaTranRec rec = new GizaTranRec(line); if (rec.mSrcId != prevSrcId) { ++wordQty; } if (totalQty % REPORT_INTERVAL_QTY == 0) { System.out.println(String.format( "Processed %d lines (%d source word entries) from '%s', added %d lines", totalQty, wordQty, inputFile, addedQty)); } // isNotFiltered should be set after procOneWord if (rec.mSrcId != prevSrcId) { if (rec.mSrcId == 0) isNotFiltered = true; else { String wordSrc = srcVoc.getWord(rec.mSrcId); isNotFiltered = filter == null || (wordSrc != null && filter.checkWord(wordSrc)); } } prevSrcId = rec.mSrcId; if (rec.mProb >= minProb && isNotFiltered) { String wordDst = dstVoc.getWord(rec.mDstId); if (filter == null || (wordDst != null && filter.checkWord(wordDst))) { fout.write(String.format(rec.mSrcId + " " + rec.mDstId + " " + rec.mProb)); fout.newLine(); addedQty++; } } } System.out.println( String.format("Processed %d lines (%d source word entries) from '%s', added %d lines", totalQty, wordQty, inputFile, addedQty)); } finally { finp.close(); fout.close(); } } catch (ParseException e) { Usage("Cannot parse arguments", options); } catch (Exception e) { e.printStackTrace(); System.err.println("Terminating due to an exception: " + e); System.exit(1); } }
From source file:com.twentyn.chemicalClassifier.Runner.java
public static void main(String[] args) throws Exception { BufferedReader reader = new BufferedReader(new FileReader(args[0])); BufferedWriter writer = new BufferedWriter(new FileWriter(args[1])); try {//from ww w . j a va 2s . c o m Oscar oscar = new Oscar(); String line = null; /* NOTE: this is exactly the wrong way to write a TSV reader. Caveat emptor. * See http://tburette.github.io/blog/2014/05/25/so-you-want-to-write-your-own-CSV-code/ * and then use org.apache.commons.csv.CSVParser instead. */ while ((line = reader.readLine()) != null) { // TSV means split on tabs! Nothing else will do. List<String> fields = Arrays.asList(line.split("\t")); // Choke if our invariants aren't satisfied. We expect ever line to have a name and an InChI. if (fields.size() != 2) { throw new RuntimeException( String.format("Found malformed line (all lines must have two fields: %s", line)); } String name = fields.get(1); List<ResolvedNamedEntity> entities = oscar.findAndResolveNamedEntities(name); System.out.println("**********"); System.out.println("Name: " + name); List<String> outputFields = new ArrayList<>(fields.size() + 1); outputFields.addAll(fields); if (entities.size() == 0) { System.out.println("No match"); outputFields.add("noMatch"); } else if (entities.size() == 1) { ResolvedNamedEntity entity = entities.get(0); NamedEntity ne = entity.getNamedEntity(); if (ne.getStart() != 0 || ne.getEnd() != name.length()) { System.out.println("Partial match"); printEntity(entity); outputFields.add("partialMatch"); } else { System.out.println("Exact match"); printEntity(entity); outputFields.add("exactMatch"); List<ChemicalStructure> structures = entity.getChemicalStructures(FormatType.STD_INCHI); for (ChemicalStructure s : structures) { outputFields.add(s.getValue()); } } } else { // Multiple matches found! System.out.println("Multiple matches"); for (ResolvedNamedEntity e : entities) { printEntity(e); } outputFields.add("multipleMatches"); } writer.write(String.join("\t", outputFields)); writer.newLine(); } } finally { writer.flush(); writer.close(); } }
From source file:edu.cmu.lti.oaqa.knn4qa.apps.CollectionDiffer.java
public static void main(String[] args) { Options options = new Options(); options.addOption("i1", null, true, "Input file 1"); options.addOption("i2", null, true, "Input file 2"); options.addOption("o", null, true, "Output file"); CommandLineParser parser = new org.apache.commons.cli.GnuParser(); try {/*from w w w . j ava 2s. c om*/ CommandLine cmd = parser.parse(options, args); InputStream input1 = null, input2 = null; if (cmd.hasOption("i1")) { input1 = CompressUtils.createInputStream(cmd.getOptionValue("i1")); } else { Usage("Specify 'Input file 1'"); } if (cmd.hasOption("i2")) { input2 = CompressUtils.createInputStream(cmd.getOptionValue("i2")); } else { Usage("Specify 'Input file 2'"); } HashSet<String> hSubj = new HashSet<String>(); BufferedWriter out = null; if (cmd.hasOption("o")) { String outFile = cmd.getOptionValue("o"); out = new BufferedWriter(new OutputStreamWriter(CompressUtils.createOutputStream(outFile))); } else { Usage("Specify 'Output file'"); } XmlIterator inpIter2 = new XmlIterator(input2, YahooAnswersReader.DOCUMENT_TAG); int docNum = 1; for (String oneRec = inpIter2.readNext(); !oneRec.isEmpty(); oneRec = inpIter2.readNext(), ++docNum) { if (docNum % 10000 == 0) { System.out.println(String.format( "Loaded and memorized questions for %d documents from the second input file", docNum)); } ParsedQuestion q = YahooAnswersParser.parse(oneRec, false); hSubj.add(q.mQuestion); } XmlIterator inpIter1 = new XmlIterator(input1, YahooAnswersReader.DOCUMENT_TAG); System.out.println("============================================="); System.out.println("Memoization is done... now let's diff!!!"); System.out.println("============================================="); docNum = 1; int skipOverlapQty = 0, skipErrorQty = 0; for (String oneRec = inpIter1.readNext(); !oneRec.isEmpty(); ++docNum, oneRec = inpIter1.readNext()) { if (docNum % 10000 == 0) { System.out.println(String.format("Processed %d documents from the first input file", docNum)); } oneRec = oneRec.trim() + System.getProperty("line.separator"); ParsedQuestion q = null; try { q = YahooAnswersParser.parse(oneRec, false); } catch (Exception e) { // If <bestanswer>...</bestanswer> is missing we may end up here... // This is a bit funny, because this element is supposed to be mandatory, // but it's not. System.err.println("Skipping due to parsing error, exception: " + e); skipErrorQty++; continue; } if (hSubj.contains(q.mQuestion.trim())) { //System.out.println(String.format("Skipping uri='%s', question='%s'", q.mQuestUri, q.mQuestion)); skipOverlapQty++; continue; } out.write(oneRec); } System.out.println( String.format("Processed %d documents, skipped because of overlap/errors %d/%d documents", docNum - 1, skipOverlapQty, skipErrorQty)); out.close(); } catch (ParseException e) { Usage("Cannot parse arguments"); } catch (Exception e) { e.printStackTrace(); System.err.println("Terminating due to an exception: " + e); System.exit(1); } }
From source file:apps.ParsedPost.java
public static void main(String args[]) { Options options = new Options(); options.addOption(INPUT_PARAM, null, true, INPUT_DESC); options.addOption(OUTPUT_PARAM, null, true, OUTPUT_DESC); options.addOption(MAX_NUM_REC_PARAM, null, true, MAX_NUM_REC_DESC); options.addOption(DEBUG_PRINT_PARAM, null, false, DEBUG_PRINT_DESC); options.addOption(EXCLUDE_CODE_PARAM, null, false, EXCLUDE_CODE_DESC); CommandLineParser parser = new org.apache.commons.cli.GnuParser(); HashMap<String, ParsedPost> hQuestions = new HashMap<String, ParsedPost>(); try {// w w w .j a va2 s . c om CommandLine cmd = parser.parse(options, args); String inputFile = cmd.getOptionValue(INPUT_PARAM); if (null == inputFile) Usage("Specify: " + INPUT_PARAM, options); String outputFile = cmd.getOptionValue(OUTPUT_PARAM); if (null == outputFile) Usage("Specify: " + OUTPUT_PARAM, options); InputStream input = CompressUtils.createInputStream(inputFile); BufferedWriter output = new BufferedWriter(new FileWriter(new File(outputFile))); int maxNumRec = Integer.MAX_VALUE; String tmp = cmd.getOptionValue(MAX_NUM_REC_PARAM); if (tmp != null) maxNumRec = Integer.parseInt(tmp); boolean debug = cmd.hasOption(DEBUG_PRINT_PARAM); boolean excludeCode = cmd.hasOption(EXCLUDE_CODE_PARAM); System.out.println("Processing at most " + maxNumRec + " records, excluding code? " + excludeCode); XmlIterator xi = new XmlIterator(input, ROOT_POST_TAG); String elem; output.write("<?xml version='1.0' encoding='UTF-8'?><ystfeed>\n"); for (int num = 1; num <= maxNumRec && !(elem = xi.readNext()).isEmpty(); ++num) { ParsedPost post = null; try { post = parsePost(elem, excludeCode); if (!post.mAcceptedAnswerId.isEmpty()) { hQuestions.put(post.mId, post); } else if (post.mpostIdType.equals("2")) { String parentId = post.mParentId; String id = post.mId; if (!parentId.isEmpty()) { ParsedPost parentPost = hQuestions.get(parentId); if (parentPost != null && parentPost.mAcceptedAnswerId.equals(id)) { output.write(createYahooAnswersQuestion(parentPost, post)); hQuestions.remove(parentId); } } } } catch (Exception e) { e.printStackTrace(); throw new Exception("Error parsing record # " + num + ", error message: " + e); } if (debug) { System.out.println(String.format("%s parentId=%s acceptedAnswerId=%s type=%s", post.mId, post.mParentId, post.mAcceptedAnswerId, post.mpostIdType)); System.out.println("================================"); if (!post.mTitle.isEmpty()) { System.out.println(post.mTitle); System.out.println("--------------------------------"); } System.out.println(post.mBody); System.out.println("================================"); } } output.write("</ystfeed>\n"); input.close(); output.close(); } catch (ParseException e) { Usage("Cannot parse arguments", options); } catch (Exception e) { e.printStackTrace(); System.err.println("Terminating due to an exception: " + e); System.exit(1); } }