List of usage examples for java.io BufferedWriter append
public Writer append(CharSequence csq) throws IOException
From source file:de.dfki.km.perspecting.obie.corpus.TextCorpus.java
public LabeledTextCorpus labelRDFTypes(final File corpus, final Pipeline pipeline, final String template) throws Exception { final BufferedWriter writer = new BufferedWriter(new FileWriter(corpus)); this.forEach(new DocumentProcedure<String>() { @Override/*w w w . jav a 2s .c om*/ public String process(Reader doc, URI uri) throws Exception { Document document = pipeline.createDocument(doc, uri, corpusMediaType, template, language); for (int step = 0; pipeline.hasNext(step); step = pipeline.execute(step, document)) ; TIntHashSet sentenceBoundaries = new TIntHashSet(); for (TokenSequence<Integer> sentence : document.getSentences()) { sentenceBoundaries.add(sentence.getEnd()); } for (Token token : document) { String word = token.toString(); String pos = token.getPartOfSpeechTag(); String phrase = token.getNounPhraseTag(); int label = -1; int[] types = token.getTypes(0.0).toArray(); if (types.length > 0) { label = pipeline.getKnowledgeBase().getCluster(types); // System.out.println(word + " " + kb.getURI(label)); } // int[] subjects = token.getSubjects().toArray(); // if (subjects.length > 0) { // System.out.println(word + " " + // Arrays.toString(subjects)); // } writer.append(word); writer.append(SPACE); writer.append(pos); writer.append(SPACE); writer.append(phrase); writer.append(SPACE); if (label > 0) { writer.append(Integer.toString(label)); } else { writer.append(LabeledTextCorpus.OUTSIDE_ANY_LABEL); } writer.newLine(); if (sentenceBoundaries.contains(token.getEnd())) { writer.newLine(); } } writer.flush(); return uri.toString(); } }); writer.close(); return new LabeledTextCorpus(corpus, MediaType.TEXT, this); }
From source file:analytics.storage.store2csv.java
@Override public void storeElementValueData(HashMap<String, Integer> data, String metricName, String dataProvider, String analysisType, String headerColumn, String element, Logger logger, int time) { // TODO Auto-generated method stub String sFileName = dataProvider + analysisType + ".csv"; Properties props = new Properties(); try {/*from w ww .ja v a2 s . com*/ props.load(new FileInputStream("configure.properties")); } catch (FileNotFoundException e1) { // TODO Auto-generated catch block e1.printStackTrace(); System.exit(-1); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); System.exit(-1); } ; File anls = new File(props.getProperty(AnalyticsConstants.resultsPath) + "Analysis_Results"); if (!anls.exists()) anls.mkdir(); else { // if (temporal == false) { // FileUtils.deleteQuietly(anls); // anls.mkdir(); // } } File dir = new File(anls, dataProvider); if (!dir.exists()) dir.mkdir(); File file = new File(dir, sFileName); FileWriter writer; BufferedWriter bw = null; BufferedReader reader = null; try { if (file.exists() && time == 0) file.delete(); // if (!file.exists() && time == 0) { writer = new FileWriter(file); bw = new BufferedWriter(writer); createHeaders(bw, metricName, headerColumn); Set<String> keySet = data.keySet(); Iterator<String> iterator = keySet.iterator(); StringBuffer logString = new StringBuffer(); StringBuffer key = new StringBuffer(); while (iterator.hasNext()) { // String key = iterator.next(); key.append(iterator.next()); Integer value = data.get(key.toString()); if (key.toString().contains(",")) key.replace(0, key.length(), key.toString().replace(",", "/")); // key = key.toString().replace(",", "/"); // bw.append(element); // bw.append(','); bw.append(key); logString.append(dataProvider); logString.append(" " + element); logString.append(" " + key.toString().replace(" ", "_")); // logString.append(" " + key.replace(" ", "_")); bw.append(','); bw.append(String.valueOf(value)); logString.append(" " + String.valueOf(value)); bw.newLine(); logger.info(logString.toString()); logString.delete(0, logString.capacity()); key.delete(0, key.length()); } bw.close(); // } else if (file.exists() && time == 0) { // file.delete(); // writer = new FileWriter(file); // bw = new BufferedWriter(writer); // createHeaders(bw, metricName, headerColumn); // // Set<String> keySet = data.keySet(); // Iterator<String> iterator = keySet.iterator(); // StringBuffer logString = new StringBuffer(); // // StringBuffer key = new StringBuffer(); // // while (iterator.hasNext()) { // // String key = iterator.next(); // key.append(iterator.next()); // // Integer value = data.get(key.toString()); // // if (key.toString().contains(",")) // key.replace(0, key.length(), // key.toString().replace(",", "/")); // // key = key.toString().replace(",", "/"); // // // bw.append(element); // // bw.append(','); // bw.append(key); // logString.append(dataProvider); // logString.append(" " + element); // logString.append(" " + key.toString().replace(" ", "_")); // // logString.append(" " + key.replace(" ", "_")); // bw.append(','); // bw.append(String.valueOf(value)); // logString.append(" " + String.valueOf(value)); // bw.newLine(); // // logger.info(logString.toString()); // logString.delete(0, logString.capacity()); // key.delete(0, key.length()); // } // bw.close(); // } else if (file.exists() && time > 0) { // // reader = new BufferedReader(new FileReader(file)); // // File temp = new File(dir, "temp.csv"); // // writer = new FileWriter(temp); // bw = new BufferedWriter(writer); // // String line; // int counter = 0; // // // Set<String> keySet = data.keySet(); // // Iterator<String> iterator = keySet.iterator(); // StringBuffer logString = new StringBuffer(); // StringBuffer key = new StringBuffer(); // while ((line = reader.readLine()) != null) { // String[] split = line.split(","); // // System.out.println(line); // // if (counter == 0) { // line = line + "," + metricName; // bw.append(line); // bw.newLine(); // // } else { // // String key = iterator.next(); // // String key = split[0]; // key.append(split[0]); // Integer value = data.get(key); // // // if (key.contains(",")) // // key = key.replace(",", "/"); // if (key.toString().contains(",")) // key.replace(0, key.length(), key.toString() // .replace(",", "/")); // // line = line + "," + value; // bw.append(line); // logString.append(dataProvider); // logString.append(" " + element); // logString // .append(" " + key.toString().replace(" ", "_")); // logString.append(" " + value); // // bw.newLine(); // // logger.info(logString.toString()); // logString.delete(0, logString.capacity()); // key.delete(0, key.length()); // } // // counter += 1; // // } // bw.close(); // FileUtils.copyFile(temp, file); // temp.delete(); // reader.close(); // // } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { try { if (bw != null) bw.close(); if (reader != null) reader.close(); } catch (IOException ex) { ex.printStackTrace(); } } }
From source file:gdsc.smlm.ij.plugins.TraceMolecules.java
private void saveTraceData(StoredDataStatistics s, String name, String fileSuffix) { BufferedWriter file = null; try {// w w w. j a v a 2 s.c o m file = new BufferedWriter( new FileWriter(settings.traceDataDirectory + TITLE + "." + fileSuffix + ".txt")); file.append(name); file.newLine(); for (double d : s.getValues()) { file.append(Utils.rounded(d, 4)); file.newLine(); } } catch (Exception e) { // Q. Add better handling of errors? e.printStackTrace(); IJ.log("Failed to save trace data to results directory: " + settings.traceDataDirectory); } finally { if (file != null) { try { file.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:com.civprod.writerstoolbox.OpenNLP.training.WordSplitingTokenizerTrainer.java
private void cmdTrainActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_cmdTrainActionPerformed final WordSplitingTokenizerTrainer tempThis = this; final Charset utf8 = Charset.forName("UTF-8"); new Thread(() -> { textTestResults.setText(""); //create TokenizerFactory part of the training context WordSplittingTokenizerFactory myTokenizerFactory = new WordSplittingTokenizerFactory("EN", mAbbreviationDictionary, false, null, mSpellingDictionary, (TimeComplexity) comboTimeComplexity.getSelectedItem()); Tokenizer stdTokenizer = null;/*from ww w .jav a 2s .co m*/ try { stdTokenizer = OpenNLPUtils.createTokenizer(); } catch (IOException ex) { Logger.getLogger(WordSplitingTokenizerTrainer.class.getName()).log(Level.SEVERE, null, ex); } Tokenizer myNonSplitingTokenizer = null; try { myNonSplitingTokenizer = OpenNLPUtils.createTokenizer(OpenNLPUtils.readTokenizerModel( OpenNLPUtils.buildModelFileStream(".\\data\\OpenNLP\\en-fiction-token.bin"))); } catch (IOException ex) { Logger.getLogger(WordSplitingTokenizerTrainer.class.getName()).log(Level.SEVERE, null, ex); } List<FileSplit> FileSplits = FileSplit.generateFileSplitsLOO(mFileCollectionListModel); File trainingFile = new File("en-token.train"); File testFile = new File("en-token.test"); SummaryStatistics curFStats = new SummaryStatistics(); SummaryStatistics curRecallStats = new SummaryStatistics(); SummaryStatistics curPrecisionStats = new SummaryStatistics(); SummaryStatistics stdFStats = new SummaryStatistics(); SummaryStatistics stdRecallStats = new SummaryStatistics(); SummaryStatistics stdPrecisionStats = new SummaryStatistics(); SummaryStatistics myNonSplitFStats = new SummaryStatistics(); SummaryStatistics myNonSplitRecallStats = new SummaryStatistics(); SummaryStatistics myNonSplitPrecisionStats = new SummaryStatistics(); java.io.BufferedWriter trainingFileWriter = null; for (FileSplit curFileSplit : FileSplits) { try { //create training file trainingFileWriter = new java.io.BufferedWriter( new java.io.OutputStreamWriter(new java.io.FileOutputStream(trainingFile), utf8)); for (File curTrainingFile : curFileSplit.getTrainingFiles()) { java.io.BufferedReader curTrainingFileReader = null; try { Charset fileCharset = FileUtils.determineCharset(curTrainingFile); if (fileCharset == null) { fileCharset = utf8; } curTrainingFileReader = new java.io.BufferedReader(new java.io.InputStreamReader( new java.io.FileInputStream(curTrainingFile), fileCharset)); while (curTrainingFileReader.ready()) { String curLine = curTrainingFileReader.readLine(); trainingFileWriter.append(curLine).append("\n"); } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (curTrainingFileReader != null) { curTrainingFileReader.close(); } } } trainingFileWriter.write('\n'); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (trainingFileWriter != null) { try { trainingFileWriter.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } //create test file java.io.BufferedWriter testFileWriter = null; try { //create training file testFileWriter = new java.io.BufferedWriter( new java.io.OutputStreamWriter(new java.io.FileOutputStream(testFile), utf8)); for (File curTrainingFile : curFileSplit.getTestFiles()) { String testingFileName = curTrainingFile.getCanonicalPath(); textTestResults .setText(textTestResults.getText() + "testing with " + testingFileName + "\n"); java.io.BufferedReader curTrainingFileReader = null; try { Charset fileCharset = FileUtils.determineCharset(curTrainingFile); if (fileCharset == null) { fileCharset = utf8; } curTrainingFileReader = new java.io.BufferedReader(new java.io.InputStreamReader( new java.io.FileInputStream(curTrainingFile), fileCharset)); while (curTrainingFileReader.ready()) { String curLine = curTrainingFileReader.readLine(); testFileWriter.append(curLine).append("\n"); } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (curTrainingFileReader != null) { curTrainingFileReader.close(); } } } testFileWriter.write('\n'); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (testFileWriter != null) { try { testFileWriter.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } //create and train model ObjectStream<String> trainingLineStream = null; TokenizerModel train = null; try { trainingLineStream = new PlainTextByLineStream(new FileInputStream(trainingFile), utf8); ObjectStream<TokenSample> sampleStream = null; try { sampleStream = new TokenSampleStream(trainingLineStream); train = TokenizerME.train(sampleStream, myTokenizerFactory, TrainingParameters.defaultParams()); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (sampleStream != null) { try { sampleStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } catch (FileNotFoundException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (trainingLineStream != null) { try { trainingLineStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } if (train != null) { ObjectStream<String> testingLineStream = null; try { testingLineStream = new PlainTextByLineStream(new FileInputStream(testFile), utf8); ObjectStream<TokenSample> sampleStream = null; try { sampleStream = new TokenSampleStream(testingLineStream); TokenizerME testDetector = new TokenizerME(train); TokenizerEvaluator evaluator = new TokenizerEvaluator(testDetector); evaluator.evaluate(sampleStream); FMeasure testFMeasure = evaluator.getFMeasure(); curFStats.addValue(testFMeasure.getFMeasure()); curRecallStats.addValue(testFMeasure.getRecallScore()); curPrecisionStats.addValue(testFMeasure.getPrecisionScore()); textTestResults.setText(textTestResults.getText() + testFMeasure.getFMeasure() + " " + testFMeasure.getPrecisionScore() + " " + testFMeasure.getRecallScore() + "\n"); if (stdTokenizer != null) { testingLineStream = new PlainTextByLineStream(new FileInputStream(testFile), utf8); sampleStream = new TokenSampleStream(testingLineStream); TokenizerEvaluator stdEvaluator = new TokenizerEvaluator(stdTokenizer); stdEvaluator.evaluate(sampleStream); FMeasure stdFMeasure = stdEvaluator.getFMeasure(); stdFStats.addValue(stdFMeasure.getFMeasure()); stdRecallStats.addValue(stdFMeasure.getRecallScore()); stdPrecisionStats.addValue(stdFMeasure.getPrecisionScore()); textTestResults.setText(textTestResults.getText() + " " + stdFMeasure.getFMeasure() + " " + stdFMeasure.getPrecisionScore() + " " + stdFMeasure.getRecallScore() + "\n"); } if (myNonSplitingTokenizer != null) { testingLineStream = new PlainTextByLineStream(new FileInputStream(testFile), utf8); sampleStream = new TokenSampleStream(testingLineStream); TokenizerEvaluator myNonSplitingEvaluator = new TokenizerEvaluator( myNonSplitingTokenizer); myNonSplitingEvaluator.evaluate(sampleStream); FMeasure myNonSplitFMeasure = myNonSplitingEvaluator.getFMeasure(); myNonSplitFStats.addValue(myNonSplitFMeasure.getFMeasure()); myNonSplitRecallStats.addValue(myNonSplitFMeasure.getRecallScore()); myNonSplitPrecisionStats.addValue(myNonSplitFMeasure.getPrecisionScore()); textTestResults .setText(textTestResults.getText() + " " + myNonSplitFMeasure.getFMeasure() + " " + myNonSplitFMeasure.getPrecisionScore() + " " + myNonSplitFMeasure.getRecallScore() + "\n"); } textTestResults.setText(textTestResults.getText() + "\n"); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (sampleStream != null) { try { sampleStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } catch (FileNotFoundException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (testingLineStream != null) { try { testingLineStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } } textTestResults.setText(textTestResults.getText() + "\n"); textTestResults.setText(textTestResults.getText() + "test model\n"); textTestResults.setText(textTestResults.getText() + "f score mean " + curFStats.getMean() + " stdDev " + curFStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "recall mean " + curRecallStats.getMean() + " stdDev " + curRecallStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "precision score mean " + curPrecisionStats.getMean() + " stdDev " + curPrecisionStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "std model\n"); textTestResults.setText(textTestResults.getText() + "f score mean " + stdFStats.getMean() + " stdDev " + stdFStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "recall mean " + stdRecallStats.getMean() + " stdDev " + stdRecallStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "precision score mean " + stdPrecisionStats.getMean() + " stdDev " + stdPrecisionStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "my non spliting model\n"); textTestResults.setText(textTestResults.getText() + "f score mean " + myNonSplitFStats.getMean() + " stdDev " + myNonSplitFStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "recall mean " + myNonSplitRecallStats.getMean() + " stdDev " + myNonSplitRecallStats.getStandardDeviation() + "\n"); textTestResults.setText( textTestResults.getText() + "precision score mean " + myNonSplitPrecisionStats.getMean() + " stdDev " + myNonSplitPrecisionStats.getStandardDeviation() + "\n"); //create combinded training file trainingFileWriter = null; try { trainingFileWriter = new java.io.BufferedWriter( new java.io.OutputStreamWriter(new java.io.FileOutputStream(trainingFile), utf8)); for (File curTrainingFile : mFileCollectionListModel) { java.io.BufferedReader curTrainingFileReader = null; try { Charset fileCharset = FileUtils.determineCharset(curTrainingFile); if (fileCharset == null) { fileCharset = utf8; } curTrainingFileReader = new java.io.BufferedReader(new java.io.InputStreamReader( new java.io.FileInputStream(curTrainingFile), fileCharset)); while (curTrainingFileReader.ready()) { String curLine = curTrainingFileReader.readLine(); trainingFileWriter.append(curLine).append("\n"); } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (curTrainingFileReader != null) { curTrainingFileReader.close(); } } } trainingFileWriter.write('\n'); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (trainingFileWriter != null) { try { trainingFileWriter.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } //create and train model ObjectStream<String> lineStream = null; this.createdObject = null; try { lineStream = new PlainTextByLineStream(new FileInputStream(trainingFile), utf8); ObjectStream<TokenSample> sampleStream = null; try { sampleStream = new TokenSampleStream(lineStream); this.createdObject = TokenizerME.train(sampleStream, myTokenizerFactory, TrainingParameters.defaultParams()); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (sampleStream != null) { try { sampleStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } catch (FileNotFoundException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (lineStream != null) { try { lineStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } if (createdObject != null) { OutputStream modelOut = null; File modelFile = new File("en-fiction-token.bin"); try { modelOut = new BufferedOutputStream(new FileOutputStream(modelFile)); createdObject.serialize(modelOut); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (modelOut != null) { try { modelOut.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } textTestResults.setText(textTestResults.getText() + "done"); }).start(); }
From source file:nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser.TestEQTLDatasetForInteractions.java
public final String performInteractionAnalysis(String[] covsToCorrect, String[] covsToCorrect2, TextFile outputTopCovs, File snpsToSwapFile, HashMultimap<String, String> qtlProbeSnpMultiMap, String[] covariatesToTest, HashMap hashSamples, int numThreads, final TIntHashSet snpsToTest, boolean skipNormalization, boolean skipCovariateNormalization, HashMultimap<String, String> qtlProbeSnpMultiMapCovariates) throws IOException, Exception { //hashSamples = excludeOutliers(hashSamples); HashMap<String, Integer> covariatesToLoad = new HashMap(); if (covariatesToTest != null) { for (String c : covariatesToTest) { covariatesToLoad.put(c, null); }//from ww w .j av a 2 s . c o m for (String c : covsToCorrect) { covariatesToLoad.put(c, null); } for (String c : covsToCorrect2) { covariatesToLoad.put(c, null); } for (int i = 1; i <= 50; ++i) { covariatesToLoad.put("Comp" + i, null); } } else { covariatesToLoad = null; } ExpressionDataset datasetExpression = new ExpressionDataset( inputDir + "/bigTableLude.txt.Expression.binary", '\t', null, hashSamples); ExpressionDataset datasetCovariates = new ExpressionDataset( inputDir + "/covariateTableLude.txt.Covariates.binary", '\t', covariatesToLoad, hashSamples); org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression regression = new org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression(); int nrSamples = datasetGenotypes.nrSamples; correctDosageDirectionForQtl(snpsToSwapFile, datasetGenotypes, datasetExpression); if (!skipNormalization) { correctExpressionData(covsToCorrect2, datasetGenotypes, datasetCovariates, datasetExpression); } ExpressionDataset datasetCovariatesPCAForceNormal = new ExpressionDataset( inputDir + "/covariateTableLude.txt.Covariates.binary", '\t', covariatesToLoad, hashSamples); if (!skipNormalization && !skipCovariateNormalization) { correctCovariateDataPCA(covsToCorrect2, covsToCorrect, datasetGenotypes, datasetCovariatesPCAForceNormal); } if (1 == 1) { if (!skipNormalization && !skipCovariateNormalization && covsToCorrect2.length != 0 && covsToCorrect.length != 0) { correctCovariateData(covsToCorrect2, covsToCorrect, datasetGenotypes, datasetCovariates); } if (!skipNormalization && !skipCovariateNormalization && !qtlProbeSnpMultiMapCovariates.isEmpty()) { correctCovariatesForQtls(datasetCovariates, datasetGenotypes, qtlProbeSnpMultiMapCovariates); } if (1 == 2) { saveCorrectedCovariates(datasetCovariates); } if (1 == 2) { icaCovariates(datasetCovariates); } if (!skipNormalization) { forceNormalCovariates(datasetCovariates, datasetGenotypes); } } ExpressionDataset datasetExpressionBeforeEQTLCorrection = new ExpressionDataset(datasetExpression.nrProbes, datasetExpression.nrSamples); for (int p = 0; p < datasetExpression.nrProbes; p++) { for (int s = 0; s < datasetExpression.nrSamples; s++) { datasetExpressionBeforeEQTLCorrection.rawData[p][s] = datasetExpression.rawData[p][s]; } } if (!skipNormalization && covsToCorrect.length != 0) { correctExpressionDataForInteractions(covsToCorrect, datasetCovariates, datasetGenotypes, nrSamples, datasetExpression, regression, qtlProbeSnpMultiMap); } if (!skipNormalization) { forceNormalExpressionData(datasetExpression); } datasetExpression.save(outputDir + "/expressionDataRound_" + covsToCorrect.length + ".txt"); datasetExpression.save(outputDir + "/expressionDataRound_" + covsToCorrect.length + ".binary"); datasetCovariates.save(outputDir + "/covariateData_" + covsToCorrect.length + ".binary"); if (1 == 1) { ExpressionDataset datasetZScores = new ExpressionDataset(datasetCovariates.nrProbes, datasetExpression.nrProbes); datasetZScores.probeNames = datasetCovariates.probeNames; datasetZScores.sampleNames = new String[datasetGenotypes.probeNames.length]; for (int i = 0; i < datasetGenotypes.probeNames.length; ++i) { datasetZScores.sampleNames[i] = datasetGenotypes.probeNames[i] + datasetExpression.probeNames[i] .substring(datasetExpression.probeNames[i].lastIndexOf('_')); } datasetZScores.recalculateHashMaps(); SkippedInteractionWriter skippedWriter = new SkippedInteractionWriter( new File(outputDir + "/skippedInteractionsRound_" + covsToCorrect.length + ".txt")); java.util.concurrent.ExecutorService threadPool = Executors.newFixedThreadPool(numThreads); CompletionService<DoubleArrayIntegerObject> pool = new ExecutorCompletionService<DoubleArrayIntegerObject>( threadPool); int nrTasks = 0; for (int cov = 0; cov < datasetCovariates.nrProbes; cov++) { double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[cov]); if (stdev > 0) { PerformInteractionAnalysisPermutationTask task = new PerformInteractionAnalysisPermutationTask( datasetGenotypes, datasetExpression, datasetCovariates, datasetCovariatesPCAForceNormal, cov, skippedWriter, snpsToTest); pool.submit(task); nrTasks++; } } String maxChi2Cov = ""; int maxChi2CovI = 0; double maxChi2 = 0; try { // If gene annotation provided, for chi2sum calculation use only genes that are 1mb apart //if (geneDistanceMap != null) { for (int task = 0; task < nrTasks; task++) { try { //System.out.println("Waiting on thread for: " + datasetCovariates.probeNames[cov]); DoubleArrayIntegerObject result = pool.take().get(); int cov = result.intValue; double chi2Sum = 0; double[] covZ = datasetZScores.rawData[cov]; for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { //if (genesFarAway(datasetZScores.sampleNames[snp], datasetZScores.probeNames[cov])) { double z = result.doubleArray[snp]; covZ[snp] = z; if (!Double.isNaN(z)) { chi2Sum += z * z; } //} } if (chi2Sum > maxChi2 && !datasetCovariates.probeNames[cov].startsWith("Comp") && !datasetCovariates.probeNames[cov].equals("LLS") && !datasetCovariates.probeNames[cov].equals("LLdeep") && !datasetCovariates.probeNames[cov].equals("RS") && !datasetCovariates.probeNames[cov].equals("CODAM")) { maxChi2 = chi2Sum; maxChi2CovI = cov; maxChi2Cov = datasetCovariates.probeNames[cov]; } //System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); if ((task + 1) % 512 == 0) { System.out.println(task + 1 + " tasks processed"); } } catch (ExecutionException ex) { Logger.getLogger(PerformInteractionAnalysisPermutationTask.class.getName()) .log(Level.SEVERE, null, ex); } } /*} //If gene annotation not provided, use all gene pairs else { for (int task = 0; task < nrTasks; task++) { try { DoubleArrayIntegerObject result = pool.take().get(); int cov = result.intValue; double chi2Sum = 0; double[] covZ = datasetZScores.rawData[cov]; for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { double z = result.doubleArray[snp]; covZ[snp] = z; if (!Double.isNaN(z)) { chi2Sum += z * z; } } if (chi2Sum > maxChi2) { maxChi2 = chi2Sum; maxChi2Cov = datasetCovariates.probeNames[cov]; } //System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); if ((task + 1) % 512 == 0) { System.out.println(task + 1 + " tasks processed"); } } catch (ExecutionException ex) { Logger.getLogger(PerformInteractionAnalysisPermutationTask.class.getName()).log(Level.SEVERE, null, ex); } } }*/ threadPool.shutdown(); } catch (Exception e) { e.printStackTrace(); System.out.println(e.getMessage()); } System.out.println("Top covariate:\t" + maxChi2 + "\t" + maxChi2Cov); outputTopCovs.writeln("Top covariate:\t" + maxChi2 + "\t" + maxChi2Cov); outputTopCovs.flush(); skippedWriter.close(); datasetZScores.save(outputDir + "/InteractionZScoresMatrix-" + covsToCorrect.length + "Covariates.txt"); BufferedWriter writer = new BufferedWriter( new FileWriter(outputDir + "/" + "topCov" + maxChi2Cov + "_expression.txt")); double[] topCovExpression = datasetCovariates.rawData[maxChi2CovI]; for (int i = 0; i < topCovExpression.length; ++i) { writer.append(datasetCovariates.sampleNames[i]); writer.append('\t'); writer.append(String.valueOf(topCovExpression[i])); writer.append('\n'); } writer.close(); return maxChi2Cov; } return null; }
From source file:de.dfki.km.perspecting.obie.experiments.PhraseExperiment.java
@Test public void testDifferentPrefixLengths() { final String template = "SELECT * WHERE {?s ?p ?o}"; try {//from w w w . j av a 2s .c om URL url = new URL("http://en.wikipedia.org/wiki/Kaiserslautern"); Document document = pipeline.createDocument(FileUtils.toFile(url), url.toURI(), MediaType.HTML, template, Language.EN); for (int step = 0; pipeline.hasNext(step) && step <= 5; step = pipeline.execute(step, document)) { System.out.println(step); } final BufferedWriter bw = new BufferedWriter( new FileWriter($SCOOBIE_HOME + "results/response_time_prefix_hashing.csv")); for (int SIZE = 1; SIZE < 11; SIZE++) { TreeSet<String> hist = new TreeSet<String>(); int count = 0; for (TokenSequence<String> i : document.getNounPhrases()) { String[] words = i.toString().split("[\\s]+"); for (String word : words) { count++; if (word.length() >= SIZE) hist.add(word.substring(0, SIZE)); else hist.add(word); } } StringBuilder query = new StringBuilder(); query.append("SELECT count(*) FROM index_literals, symbols WHERE " + "( symbols.object = index_literals.index AND substr(index_literals.literal,1," + SIZE + ") IN ("); for (String p : hist) { query.append("(?) , "); } query.setLength(query.length() - 3); query.append("))"); System.out.println(query.toString()); Connection c = pool.getConnection(); PreparedStatement stmtGetDatatypePropertyValues = c.prepareStatement(query.toString()); int paramIndex = 0; for (String p : hist) { stmtGetDatatypePropertyValues.setString(++paramIndex, p); } long start = System.currentTimeMillis(); ResultSet rs = stmtGetDatatypePropertyValues.executeQuery(); long end = System.currentTimeMillis(); while (rs.next()) { bw.append(SIZE + "\t" + (end - start) + "\t" + rs.getInt(1)); bw.newLine(); } stmtGetDatatypePropertyValues.close(); c.close(); } bw.close(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:org.ut.biolab.medsavant.server.db.variants.VariantManagerUtils.java
private static File cleanVariantFile(File inputFile, String tableName, String sessionId) throws SQLException, RemoteException, IOException, SessionExpiredException { LOG.info("Cleaning file " + inputFile.getAbsolutePath() + " for table " + tableName); TableSchema table = CustomTables.getInstance().getCustomTableSchema(sessionId, tableName); BufferedReader br = new BufferedReader(new FileReader(inputFile)); File outputFile = new File(inputFile.getCanonicalPath() + "_clean"); BufferedWriter bw = new BufferedWriter(new FileWriter(outputFile)); String line;/*ww w . j av a 2 s . c om*/ int expectedColumnCount = table.getColumns().size(); long row = 0; int[] expectedColumnLengths = new int[table.getColumns().size()]; LOG.info("Cleaning " + expectedColumnCount + " fields..."); for (int i = 0; i < expectedColumnCount; ++i) { if (isVarchar(table.getColumns().get(i).getTypeNameSQL())) { expectedColumnLengths[i] = table.getColumns().get(i).getTypeLength(); } else { expectedColumnLengths[i] = Integer.MAX_VALUE; } } int announceEvery = 100000; LOG.info("Cleaning " + inputFile.getAbsolutePath()); int maxWarningsToLog = 30; int warnings = 0; while ((line = br.readLine()) != null) { ++row; if (row % announceEvery == 0) { LOG.info("Cleaned " + row + " lines of " + inputFile.getAbsolutePath()); } //int numFields = StringUtils.countMatches(line,VariantManagerUtils.FIELD_DELIMITER) + 1; String[] values = line.split(VariantManagerUtils.FIELD_DELIMITER, -1); if (values.length != expectedColumnCount) { if (warnings < maxWarningsToLog) { LOG.warn("Unexpected number of columns: expected [" + expectedColumnCount + "] found [" + values.length + "] at line [" + row + "] in file [" + inputFile.getAbsolutePath() + "]"); } else if (warnings == maxWarningsToLog) { LOG.warn(maxWarningsToLog + "+ warnings"); } warnings++; continue; } for (int i = 0; i < expectedColumnCount; ++i) { // truncate fields that are too long int lengthOfField = values[i].length() - 2; if (lengthOfField >= expectedColumnLengths[i]) { LOG.warn("Value too long: [" + values[i] + "]; trimmed to [" + expectedColumnLengths[i] + "] characters"); String unenclosed = values[i].replace(VariantManagerUtils.ENCLOSED_BY, ""); values[i] = VariantManagerUtils.ENCLOSED_BY + unenclosed.substring(0, expectedColumnLengths[i]) + VariantManagerUtils.ENCLOSED_BY; } } bw.append(StringUtils.join(values, VariantManagerUtils.FIELD_DELIMITER)); bw.append("\n"); } LOG.info("Done cleaning " + inputFile.getAbsolutePath() + " output to " + outputFile.getAbsolutePath()); LOG.warn(warnings + " warnings while cleaning"); bw.close(); br.close(); if (inputFile.exists()) { inputFile.delete(); } return outputFile; }
From source file:edu.isi.pfindr.learn.util.PairsFileIO.java
public void generatePairsFromTwoDifferentFilesWithClass(String inputFilePath1, String inputFilePath2, String outputFilePath) {/*from www . j a va 2s .c o m*/ List<String> phenotypeList1 = new ArrayList<String>(); List<String> phenotypeList2 = new ArrayList<String>(); try { phenotypeList1 = FileUtils.readLines(new File(inputFilePath1)); phenotypeList2 = FileUtils.readLines(new File(inputFilePath2)); } catch (IOException ioe) { ioe.printStackTrace(); } String[] phenotype1, phenotype2; StringBuffer outputBuffer = new StringBuffer(); //List<String> resultList = new ArrayList<String>(); BufferedWriter bw = null; try { bw = new BufferedWriter(new FileWriter(outputFilePath)); int count = 0; for (int i = 0; i < phenotypeList1.size(); i++) { phenotype1 = phenotypeList1.get(i).split("\t"); for (int j = 0; j < phenotypeList2.size(); j++) { count++; phenotype2 = phenotypeList2.get(j).split("\t"); System.out.println("i " + i + "j " + j + " " + phenotype1[0] + " " + phenotype2[0]); if (phenotype1[1].equals(phenotype2[1])) { //if the classes are the same //if (phenotype1[1].equals(phenotype2[0])) { //if the classes are the same //resultList.add(String.format("%s\t%s\t%d", phenotype1[3], phenotype2[3], 1)); //resultList.add(String.format("%s\t%s\t%d", phenotype1[0], phenotype2[1], 1)); outputBuffer.append(String.format("%s\t%s\t%d", phenotype1[0], phenotype2[0], 1)) .append("\n"); //bw.write(String.format("%s\t%s\t%d", phenotype1[0], phenotype2[0], 1) + "\n"); } else { //resultList.add(String.format("%s\t%s\t%d", phenotype1[3], phenotype2[3], 0)); //resultList.add(String.format("%s\t%s\t%d", phenotype1[0], phenotype2[1], 0)); //bw.write(String.format("%s\t%s\t%d", phenotype1[0], phenotype2[0], 0) + "\n"); outputBuffer.append(String.format("%s\t%s\t%d", phenotype1[0], phenotype2[0], 0)) .append("\n"); } bw.append(outputBuffer.toString()); outputBuffer.setLength(0); } } bw.flush(); System.out.println("The count is: " + count); } catch (IOException io) { try { if (bw != null) bw.close(); io.printStackTrace(); } catch (IOException e) { System.out.println("Problem occured while closing output stream " + bw); e.printStackTrace(); } } catch (Exception e) { e.printStackTrace(); } finally { try { if (bw != null) bw.close(); } catch (IOException e) { System.out.println("Problem occured while closing output stream " + bw); e.printStackTrace(); } } }
From source file:com.linkedin.databus.core.TestTrailFilePositionSetter.java
private void createTrailFiles(String dir, String prefix, long numTxns, long numLinesPerFile, long numLinesPerNewline, String newlineChar, int startLine, Set<Long> corruptedScns, String corruption, boolean addAlternateCorruption, String altCorruption, String[] txnPattern, int numDbUpdatesWithSameScn, long currScn) throws IOException { long numFiles = ((numTxns * (txnPattern.length)) / numLinesPerFile) + 1; long numDigits = new Double(Math.log10(numFiles)).longValue() + 1; long currFileNum = 0; String currFile = prefix + toFixedLengthString(currFileNum, numDigits); long lineCount = 0; BufferedWriter w = createWriter(dir, currFile); int start = startLine; int dbUpdates = 0; for (long txn = 0; txn < numTxns; ++txn) { boolean corruptNextTokensEndTag = false; if (txn > 0) start = 0;//from ww w . j a v a 2s. c o m for (int j = 0; j < txnPattern.length; ++j) { lineCount++; String txnLine = txnPattern[j]; if (txnLine.contains("${SCN}")) { dbUpdates++; txnLine = txnLine.replace("${SCN}", new Long(currScn).toString() + (corruptedScns.contains(currScn) ? corruption : "")); if (addAlternateCorruption && corruptedScns.contains(currScn)) corruptNextTokensEndTag = true; if (dbUpdates >= numDbUpdatesWithSameScn) { currScn++; dbUpdates = 0; } } if (corruptNextTokensEndTag && txnLine.contains("</tokens>")) { //txnLine = txnLine.append(newlineChar).append(" ").append(altCorruption); txnLine = txnLine + newlineChar + " " + altCorruption; corruptNextTokensEndTag = false; } if (j >= start) { w.append(txnLine); if (lineCount % numLinesPerNewline == 0) w.append(newlineChar); } if ((lineCount % numLinesPerFile) == 0) { w.close(); currFileNum++; currFile = prefix + toFixedLengthString(currFileNum, numDigits); w = createWriter(dir, currFile); } } } if (w != null) w.close(); }
From source file:org.ops4j.pax.scanner.obr.internal.ObrScanner.java
/** * Reads the bundles from the file specified by the urlSpec. * {@inheritDoc}//from ww w . j a va 2s.co m */ public List<ScannedBundle> scan(final ProvisionSpec provisionSpec) throws MalformedSpecificationException, ScannerException { NullArgumentException.validateNotNull(provisionSpec, "Provision spec"); LOG.debug("Scanning [" + provisionSpec.getPath() + "]"); final List<ScannedBundle> scannedBundles = new ArrayList<ScannedBundle>(); final ScannerConfiguration config = createConfiguration(); BufferedReader bufferedReader = null; BufferedWriter bufferedWriter = null; try { try { bufferedReader = new BufferedReader(new InputStreamReader( URLUtils.prepareInputStream(provisionSpec.getPathAsUrl(), !config.getCertificateCheck()))); final Integer defaultStartLevel = getDefaultStartLevel(provisionSpec, config); final Boolean defaultStart = getDefaultStart(provisionSpec, config); final Boolean defaultUpdate = getDefaultUpdate(provisionSpec, config); // we always install the obr and pax scanner obr script scannedBundles.add(new ScannedFileBundle("mvn:org.apache.felix/org.apache.felix.bundlerepository", defaultStartLevel, defaultStart, defaultUpdate)); scannedBundles.add(new ScannedFileBundle("mvn:org.ops4j.pax.scanner/pax-scanner-obr-script", defaultStartLevel, defaultStart, defaultUpdate)); // and we set the script name final File scriptFile = createScript(); System.setProperty("org.ops4j.pax.scanner.obr.script", scriptFile.toURI().toASCIIString()); // and repositories property System.setProperty("obr.repository.url", m_propertyResolver.get("obr.repository.url")); bufferedWriter = new BufferedWriter(new FileWriter(scriptFile)); String line; while ((line = bufferedReader.readLine()) != null) { if (!"".equals(line.trim()) && !line.trim().startsWith(COMMENT_SIGN)) { if (line.trim().startsWith(PROPERTY_PREFIX)) { final Matcher matcher = PROPERTY_PATTERN.matcher(line.trim()); if (!matcher.matches() || matcher.groupCount() != 2) { throw new ScannerException("Invalid property: " + line); } String value = matcher.group(2); value = SystemPropertyUtils.resolvePlaceholders(value); System.setProperty(matcher.group(1), value); } else { line = SystemPropertyUtils.resolvePlaceholders(line); final String obrFilter = createObrFilter(line); bufferedWriter.append(obrFilter); bufferedWriter.newLine(); } } } } finally { if (bufferedReader != null) { bufferedReader.close(); } if (bufferedWriter != null) { bufferedWriter.flush(); bufferedWriter.close(); } } } catch (IOException e) { throw new ScannerException("Could not parse the provision file", e); } return scannedBundles; }