List of usage examples for java.io BufferedWriter close
@SuppressWarnings("try") public void close() throws IOException
From source file:edu.cmu.lti.oaqa.knn4qa.apps.ParsedPost.java
public static void main(String args[]) { Options options = new Options(); options.addOption(INPUT_PARAM, null, true, INPUT_DESC); options.addOption(OUTPUT_PARAM, null, true, OUTPUT_DESC); options.addOption(CommonParams.MAX_NUM_REC_PARAM, null, true, CommonParams.MAX_NUM_REC_DESC); options.addOption(DEBUG_PRINT_PARAM, null, false, DEBUG_PRINT_DESC); options.addOption(EXCLUDE_CODE_PARAM, null, false, EXCLUDE_CODE_DESC); CommandLineParser parser = new org.apache.commons.cli.GnuParser(); HashMap<String, ParsedPost> hQuestions = new HashMap<String, ParsedPost>(); try {//from ww w . j ava 2 s . com CommandLine cmd = parser.parse(options, args); String inputFile = cmd.getOptionValue(INPUT_PARAM); if (null == inputFile) Usage("Specify: " + INPUT_PARAM, options); String outputFile = cmd.getOptionValue(OUTPUT_PARAM); if (null == outputFile) Usage("Specify: " + OUTPUT_PARAM, options); InputStream input = CompressUtils.createInputStream(inputFile); BufferedWriter output = new BufferedWriter(new FileWriter(new File(outputFile))); int maxNumRec = Integer.MAX_VALUE; String tmp = cmd.getOptionValue(CommonParams.MAX_NUM_REC_PARAM); if (tmp != null) maxNumRec = Integer.parseInt(tmp); boolean debug = cmd.hasOption(DEBUG_PRINT_PARAM); boolean excludeCode = cmd.hasOption(EXCLUDE_CODE_PARAM); System.out.println("Processing at most " + maxNumRec + " records, excluding code? " + excludeCode); XmlIterator xi = new XmlIterator(input, ROOT_POST_TAG); String elem; output.write("<?xml version='1.0' encoding='UTF-8'?><ystfeed>\n"); for (int num = 1; num <= maxNumRec && !(elem = xi.readNext()).isEmpty(); ++num) { ParsedPost post = null; try { post = parsePost(elem, excludeCode); if (!post.mAcceptedAnswerId.isEmpty()) { hQuestions.put(post.mId, post); } else if (post.mpostIdType.equals("2")) { String parentId = post.mParentId; String id = post.mId; if (!parentId.isEmpty()) { ParsedPost parentPost = hQuestions.get(parentId); if (parentPost != null && parentPost.mAcceptedAnswerId.equals(id)) { output.write(createYahooAnswersQuestion(parentPost, post)); hQuestions.remove(parentId); } } } } catch (Exception e) { e.printStackTrace(); throw new Exception("Error parsing record # " + num + ", error message: " + e); } if (debug) { System.out.println(String.format("%s parentId=%s acceptedAnswerId=%s type=%s", post.mId, post.mParentId, post.mAcceptedAnswerId, post.mpostIdType)); System.out.println("================================"); if (!post.mTitle.isEmpty()) { System.out.println(post.mTitle); System.out.println("--------------------------------"); } System.out.println(post.mBody); System.out.println("================================"); } } output.write("</ystfeed>\n"); input.close(); output.close(); } catch (ParseException e) { Usage("Cannot parse arguments", options); } catch (Exception e) { e.printStackTrace(); System.err.println("Terminating due to an exception: " + e); System.exit(1); } }
From source file:edu.cmu.lti.oaqa.knn4qa.apps.QueryGenNMSLIB.java
public static void main(String[] args) { Options options = new Options(); options.addOption(CommonParams.QUERY_FILE_PARAM, null, true, CommonParams.QUERY_FILE_DESC); options.addOption(CommonParams.MEMINDEX_PARAM, null, true, CommonParams.MEMINDEX_DESC); options.addOption(CommonParams.KNN_QUERIES_PARAM, null, true, CommonParams.KNN_QUERIES_DESC); options.addOption(CommonParams.NMSLIB_FIELDS_PARAM, null, true, CommonParams.NMSLIB_FIELDS_DESC); options.addOption(CommonParams.MAX_NUM_QUERY_PARAM, null, true, CommonParams.MAX_NUM_QUERY_DESC); options.addOption(CommonParams.SEL_PROB_PARAM, null, true, CommonParams.SEL_PROB_DESC); CommandLineParser parser = new org.apache.commons.cli.GnuParser(); BufferedWriter knnQueries = null; int maxNumQuery = Integer.MAX_VALUE; Float selProb = null;// www . j a v a 2s .c o m try { CommandLine cmd = parser.parse(options, args); String queryFile = null; if (cmd.hasOption(CommonParams.QUERY_FILE_PARAM)) { queryFile = cmd.getOptionValue(CommonParams.QUERY_FILE_PARAM); } else { Usage("Specify 'query file'", options); } String knnQueriesFile = cmd.getOptionValue(CommonParams.KNN_QUERIES_PARAM); if (null == knnQueriesFile) Usage("Specify '" + CommonParams.KNN_QUERIES_DESC + "'", options); String tmpn = cmd.getOptionValue(CommonParams.MAX_NUM_QUERY_PARAM); if (tmpn != null) { try { maxNumQuery = Integer.parseInt(tmpn); } catch (NumberFormatException e) { Usage("Maximum number of queries isn't integer: '" + tmpn + "'", options); } } String tmps = cmd.getOptionValue(CommonParams.NMSLIB_FIELDS_PARAM); if (null == tmps) Usage("Specify '" + CommonParams.NMSLIB_FIELDS_DESC + "'", options); String nmslibFieldList[] = tmps.split(","); knnQueries = new BufferedWriter(new FileWriter(knnQueriesFile)); knnQueries.write("isQueryFile=1"); knnQueries.newLine(); knnQueries.newLine(); String memIndexPref = cmd.getOptionValue(CommonParams.MEMINDEX_PARAM); if (null == memIndexPref) { Usage("Specify '" + CommonParams.MEMINDEX_DESC + "'", options); } String tmpf = cmd.getOptionValue(CommonParams.SEL_PROB_PARAM); if (tmpf != null) { try { selProb = Float.parseFloat(tmpf); } catch (NumberFormatException e) { Usage("A selection probability isn't a number in the range (0,1)'" + tmpf + "'", options); } if (selProb < Float.MIN_NORMAL || selProb + Float.MIN_NORMAL >= 1) Usage("A selection probability isn't a number in the range (0,1)'" + tmpf + "'", options); } BufferedReader inpText = new BufferedReader( new InputStreamReader(CompressUtils.createInputStream(queryFile))); String docText = XmlHelper.readNextXMLIndexEntry(inpText); NmslibQueryGenerator queryGen = new NmslibQueryGenerator(nmslibFieldList, memIndexPref); Random rnd = new Random(); for (int docNum = 1; docNum <= maxNumQuery && docText != null; ++docNum, docText = XmlHelper.readNextXMLIndexEntry(inpText)) { if (selProb != null) { if (rnd.nextFloat() > selProb) continue; } Map<String, String> docFields = null; try { docFields = XmlHelper.parseXMLIndexEntry(docText); String queryObjStr = queryGen.getStrObjForKNNService(docFields); knnQueries.append(queryObjStr); knnQueries.newLine(); } catch (SAXException e) { System.err.println("Parsing error, offending DOC:" + NL + docText + " doc # " + docNum); throw new Exception("Parsing error."); } } knnQueries.close(); } catch (ParseException e) { Usage("Cannot parse arguments", options); if (null != knnQueries) try { knnQueries.close(); } catch (IOException e1) { e1.printStackTrace(); } } catch (Exception e) { System.err.println("Terminating due to an exception: " + e); try { if (knnQueries != null) knnQueries.close(); } catch (IOException e1) { e1.printStackTrace(); } System.exit(1); } System.out.println("Terminated successfully!"); }
From source file:edu.cmu.lti.oaqa.annographix.apps.SolrQueryApp.java
public static void main(String[] args) { Options options = new Options(); options.addOption("u", null, true, "Solr URI"); options.addOption("q", null, true, "Query"); options.addOption("n", null, true, "Max # of results"); options.addOption("o", null, true, "An optional TREC-style output file"); options.addOption("w", null, false, "Do a warm-up query call, before each query"); CommandLineParser parser = new org.apache.commons.cli.GnuParser(); BufferedWriter trecOutFile = null; try {/*from w w w .j a v a2s. com*/ CommandLine cmd = parser.parse(options, args); String queryFile = null, solrURI = null; if (cmd.hasOption("u")) { solrURI = cmd.getOptionValue("u"); } else { Usage("Specify Solr URI"); } SolrServerWrapper solr = new SolrServerWrapper(solrURI); if (cmd.hasOption("q")) { queryFile = cmd.getOptionValue("q"); } else { Usage("Specify Query file"); } int numRet = 100; if (cmd.hasOption("n")) { numRet = Integer.parseInt(cmd.getOptionValue("n")); } if (cmd.hasOption("o")) { trecOutFile = new BufferedWriter(new FileWriter(new File(cmd.getOptionValue("o")))); } List<String> fieldList = new ArrayList<String>(); fieldList.add(UtilConst.ID_FIELD); fieldList.add(UtilConst.SCORE_FIELD); double totalTime = 0; double retQty = 0; ArrayList<Double> queryTimes = new ArrayList<Double>(); boolean bDoWarmUp = cmd.hasOption("w"); if (bDoWarmUp) { System.out.println("Using a warmup step!"); } int queryQty = 0; for (String t : FileUtils.readLines(new File(queryFile))) { t = t.trim(); if (t.isEmpty()) continue; int ind = t.indexOf('|'); if (ind < 0) throw new Exception("Wrong format, line: '" + t + "'"); String qID = t.substring(0, ind); String q = t.substring(ind + 1); SolrDocumentList res = null; if (bDoWarmUp) { res = solr.runQuery(q, fieldList, numRet); } Long tm1 = System.currentTimeMillis(); res = solr.runQuery(q, fieldList, numRet); Long tm2 = System.currentTimeMillis(); retQty += res.getNumFound(); System.out.println(qID + " Obtained: " + res.getNumFound() + " entries in " + (tm2 - tm1) + " ms"); double delta = (tm2 - tm1); totalTime += delta; queryTimes.add(delta); ++queryQty; if (trecOutFile != null) { ArrayList<SolrRes> resArr = new ArrayList<SolrRes>(); for (SolrDocument doc : res) { String id = (String) doc.getFieldValue(UtilConst.ID_FIELD); float score = (Float) doc.getFieldValue(UtilConst.SCORE_FIELD); resArr.add(new SolrRes(id, "", score)); } SolrRes[] results = resArr.toArray(new SolrRes[resArr.size()]); Arrays.sort(results); SolrEvalUtils.saveTrecResults(qID, results, trecOutFile, TREC_RUN, results.length); } } double devTime = 0, meanTime = totalTime / queryQty; for (int i = 0; i < queryQty; ++i) { double d = queryTimes.get(i) - meanTime; devTime += d * d; } devTime = Math.sqrt(devTime / (queryQty - 1)); System.out.println(String.format("Query time, mean/standard dev: %.2f/%.2f (ms)", meanTime, devTime)); System.out.println(String.format("Avg # of docs returned: %.2f", retQty / queryQty)); solr.close(); trecOutFile.close(); } catch (ParseException e) { Usage("Cannot parse arguments"); } catch (Exception e) { System.err.println("Terminating due to an exception: " + e); System.exit(1); } }
From source file:apps.LuceneQuery.java
public static void main(String[] args) { Options options = new Options(); options.addOption("d", null, true, "index directory"); options.addOption("i", null, true, "input file"); options.addOption("s", null, true, "stop word file"); options.addOption("n", null, true, "max # of results"); options.addOption("o", null, true, "a TREC-style output file"); options.addOption("r", null, true, "an optional QREL file, if specified," + "we save results only for queries for which we find at least one relevant entry."); options.addOption("prob", null, true, "question sampling probability"); options.addOption("max_query_qty", null, true, "a maximum number of queries to run"); options.addOption("bm25_b", null, true, "BM25 parameter: b"); options.addOption("bm25_k1", null, true, "BM25 parameter: k1"); options.addOption("bm25fixed", null, false, "use the fixed BM25 similarity"); options.addOption("seed", null, true, "random seed"); Joiner commaJoin = Joiner.on(','); Joiner spaceJoin = Joiner.on(' '); options.addOption("source_type", null, true, "query source type: " + commaJoin.join(SourceFactory.getQuerySourceList())); CommandLineParser parser = new org.apache.commons.cli.GnuParser(); QrelReader qrels = null;//from w w w .j a v a2 s . com try { CommandLine cmd = parser.parse(options, args); String indexDir = null; if (cmd.hasOption("d")) { indexDir = cmd.getOptionValue("d"); } else { Usage("Specify 'index directory'", options); } String inputFileName = null; if (cmd.hasOption("i")) { inputFileName = cmd.getOptionValue("i"); } else { Usage("Specify 'input file'", options); } DictNoComments stopWords = null; if (cmd.hasOption("s")) { String stopWordFileName = cmd.getOptionValue("s"); stopWords = new DictNoComments(new File(stopWordFileName), true /* lowercasing */); System.out.println("Using the stopword file: " + stopWordFileName); } String sourceName = cmd.getOptionValue("source_type"); if (sourceName == null) Usage("Specify document source type", options); int numRet = 100; if (cmd.hasOption("n")) { numRet = Integer.parseInt(cmd.getOptionValue("n")); System.out.println("Retrieving at most " + numRet + " candidate entries."); } String trecOutFileName = null; if (cmd.hasOption("o")) { trecOutFileName = cmd.getOptionValue("o"); } else { Usage("Specify 'a TREC-style output file'", options); } double fProb = 1.0f; if (cmd.hasOption("prob")) { try { fProb = Double.parseDouble(cmd.getOptionValue("prob")); } catch (NumberFormatException e) { Usage("Wrong format for 'question sampling probability'", options); } } if (fProb <= 0 || fProb > 1) { Usage("Question sampling probability should be >0 and <=1", options); } System.out.println("Sample the following fraction of questions: " + fProb); float bm25_k1 = UtilConst.BM25_K1_DEFAULT, bm25_b = UtilConst.BM25_B_DEFAULT; if (cmd.hasOption("bm25_k1")) { try { bm25_k1 = Float.parseFloat(cmd.getOptionValue("bm25_k1")); } catch (NumberFormatException e) { Usage("Wrong format for 'bm25_k1'", options); } } if (cmd.hasOption("bm25_b")) { try { bm25_b = Float.parseFloat(cmd.getOptionValue("bm25_b")); } catch (NumberFormatException e) { Usage("Wrong format for 'bm25_b'", options); } } long seed = 0; String tmpl = cmd.getOptionValue("seed"); if (tmpl != null) seed = Long.parseLong(tmpl); System.out.println("Using seed: " + seed); Random randGen = new Random(seed); System.out.println(String.format("BM25 parameters k1=%f b=%f ", bm25_k1, bm25_b)); boolean useFixedBM25 = cmd.hasOption("bm25fixed"); EnglishAnalyzer analyzer = new EnglishAnalyzer(); Similarity similarity = null; if (useFixedBM25) { System.out.println(String.format("Using fixed BM25Simlarity, k1=%f b=%f", bm25_k1, bm25_b)); similarity = new BM25SimilarityFix(bm25_k1, bm25_b); } else { System.out.println(String.format("Using Lucene BM25Similarity, k1=%f b=%f", bm25_k1, bm25_b)); similarity = new BM25Similarity(bm25_k1, bm25_b); } int maxQueryQty = Integer.MAX_VALUE; if (cmd.hasOption("max_query_qty")) { try { maxQueryQty = Integer.parseInt(cmd.getOptionValue("max_query_qty")); } catch (NumberFormatException e) { Usage("Wrong format for 'max_query_qty'", options); } } System.out.println(String.format("Executing at most %d queries", maxQueryQty)); if (cmd.hasOption("r")) { String qrelFile = cmd.getOptionValue("r"); System.out.println("Using the qrel file: '" + qrelFile + "', queries not returning a relevant entry will be ignored."); qrels = new QrelReader(qrelFile); } System.out.println(String.format("Using indexing directory %s", indexDir)); LuceneCandidateProvider candProvider = new LuceneCandidateProvider(indexDir, analyzer, similarity); TextCleaner textCleaner = new TextCleaner(stopWords); QuerySource inpQuerySource = SourceFactory.createQuerySource(sourceName, inputFileName); QueryEntry inpQuery = null; BufferedWriter trecOutFile = new BufferedWriter(new FileWriter(new File(trecOutFileName))); int questNum = 0, questQty = 0; long totalTimeMS = 0; while ((inpQuery = inpQuerySource.next()) != null) { if (questQty >= maxQueryQty) break; ++questNum; String queryID = inpQuery.mQueryId; if (randGen.nextDouble() <= fProb) { ++questQty; String tokQuery = spaceJoin.join(textCleaner.cleanUp(inpQuery.mQueryText)); String query = TextCleaner.luceneSafeCleanUp(tokQuery).trim(); ResEntry[] results = null; if (query.isEmpty()) { results = new ResEntry[0]; System.out.println(String.format("WARNING, empty query id = '%s'", inpQuery.mQueryId)); } else { try { long start = System.currentTimeMillis(); results = candProvider.getCandidates(questNum, query, numRet); long end = System.currentTimeMillis(); long searchTimeMS = end - start; totalTimeMS += searchTimeMS; System.out.println(String.format( "Obtained results for the query # %d (answered %d queries), queryID %s the search took %d ms, we asked for max %d entries got %d", questNum, questQty, queryID, searchTimeMS, numRet, results.length)); } catch (ParseException e) { e.printStackTrace(); System.err.println( "Error parsing query: " + query + " orig question is :" + inpQuery.mQueryText); System.exit(1); } } boolean bSave = true; if (qrels != null) { boolean bOk = false; for (ResEntry r : results) { String label = qrels.get(queryID, r.mDocId); if (candProvider.isRelevLabel(label, 1)) { bOk = true; break; } } if (!bOk) bSave = false; } // System.out.println(String.format("Ranking results the query # %d queryId='%s' save results? %b", // questNum, queryID, bSave)); if (bSave) { saveTrecResults(queryID, results, trecOutFile, TREC_RUN, numRet); } } if (questNum % 1000 == 0) System.out.println(String.format("Proccessed %d questions", questNum)); } System.out.println(String.format("Proccessed %d questions, the search took %f MS on average", questQty, (float) totalTimeMS / questQty)); trecOutFile.close(); } catch (ParseException e) { e.printStackTrace(); Usage("Cannot parse arguments: " + e, options); } catch (Exception e) { System.err.println("Terminating due to an exception: " + e); System.exit(1); } }
From source file:net.java.sen.tools.MkSenDic.java
/** * Build sen dictionary./*www .j a v a 2 s .co m*/ * * @param args * custom dictionary files. see dic/build.xml. */ public static void main(String args[]) { ResourceBundle rb = ResourceBundle.getBundle("dictionary"); DictionaryMaker dm1 = new DictionaryMaker(); DictionaryMaker dm2 = new DictionaryMaker(); DictionaryMaker dm3 = new DictionaryMaker(); // 1st field information of connect file. Vector rule1 = new Vector(); // 2nd field information of connect file. Vector rule2 = new Vector(); // 3rd field information of connect file. Vector rule3 = new Vector(); // 4th field information of connect file. // this field shows cost of morpheme connection // [size3*(x3*size2+x2)+x1] // [size3*(Attr1*size2+Attr2)+Attl] short score[] = new short[20131]; long start = System.currentTimeMillis(); // ///////////////////////////////////////// // // Step1. Loading connetion file. // log.info("(1/7): reading connection matrix ... "); try { log.info("connection file = " + rb.getString("text_connection_file")); log.info("charset = " + rb.getString("dic.charset")); CSVParser csvparser = new CSVParser(new FileInputStream(rb.getString("text_connection_file")), rb.getString("dic.charset")); String t[]; int line = 0; while ((t = csvparser.nextTokens()) != null) { if (t.length < 4) { log.warn("invalid line in " + rb.getString("text_connection_file") + ":" + line); log.warn(rb.getString("text_connection_file") + "may be broken."); break; } dm1.add(t[0]); rule1.add(t[0]); dm2.add(t[1]); rule2.add(t[1]); dm3.add(t[2]); rule3.add(t[2]); if (line == score.length) { score = resize(score); } score[line++] = (short) Integer.parseInt(t[3]); } // ///////////////////////////////////////// // // Step2. Building internal dictionary // log.info("(2/7): building type dictionary ... "); dm1.build(); dm2.build(); dm3.build(); // if you want check specified morpheme, you uncomment and modify // following line: /* * System.out.print("22="); dm3.getById(22); * System.out.print("368="); dm3.getById(368); * * System.out.println(dm3.getDicId("?????*,*,*,*,?")); * DictionaryMaker.debug = true; * System.out.println(dm3.getDicId("?????*,*,*,*,?")); * System.out.println(dm3.getDicIdNoCache("?????*,*,*,*,?")); */ } catch (IOException e) { e.printStackTrace(); System.exit(0); } // ------------------------------------------------- int size1 = dm1.size(); int size2 = dm2.size(); int size3 = dm3.size(); int ruleSize = rule1.size(); short matrix[] = new short[size1 * size2 * size3]; short default_cost = (short) Integer.parseInt(rb.getString("default_connection_cost")); // ///////////////////////////////////////// // // Step3. Writing Connection Matrix // log.info("(3/7): writing conection matrix (" + size1 + " x " + size2 + " x " + size3 + " = " + size1 * size2 * size3 + ") ..."); for (int i = 0; i < (int) (size1 * size2 * size3); i++) matrix[i] = default_cost; for (int i = 0; i < ruleSize; i++) { Vector r1 = dm1.getRuleIdList((String) rule1.get(i)); Vector r2 = dm2.getRuleIdList((String) rule2.get(i)); Vector r3 = dm3.getRuleIdList((String) rule3.get(i)); for (Iterator i1 = r1.iterator(); i1.hasNext();) { int ii1 = ((Integer) i1.next()).intValue(); for (Iterator i2 = r2.iterator(); i2.hasNext();) { int ii2 = ((Integer) i2.next()).intValue(); for (Iterator i3 = r3.iterator(); i3.hasNext();) { int ii3 = ((Integer) i3.next()).intValue(); int pos = size3 * (size2 * ii1 + ii2) + ii3; matrix[pos] = score[i]; } } } } try { DataOutputStream out = new DataOutputStream( new BufferedOutputStream(new FileOutputStream(rb.getString("matrix_file")))); out.writeShort(size1); out.writeShort(size2); out.writeShort(size3); for (int i1 = 0; i1 < size1; i1++) for (int i2 = 0; i2 < size2; i2++) for (int i3 = 0; i3 < size3; i3++) { out.writeShort(matrix[size3 * (size2 * i1 + i2) + i3]); // if (matrix[size3 * (size2 * i1 + i2) + i3] != // default_cost) { // } } out.close(); } catch (IOException e) { e.printStackTrace(); System.exit(0); } matrix = null; score = null; // ------------------------------------------------- int pos_start = Integer.parseInt(rb.getString("pos_start")); int pos_size = Integer.parseInt(rb.getString("pos_size")); int di = 0; int offset = 0; ArrayList dicList = new ArrayList(); // ///////////////////////////////////////// // // Step4. Reading Morpheme Information // log.info("(4/7): reading morpheme information ... "); String t = null; String[] csv = null; try { // writer for feature file. BufferedWriter bw = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(rb.getString("pos_file")), rb.getString("sen.charset"))); log.info("load dic: " + rb.getString("text_dic_file")); BufferedReader dicStream = null; int custom_dic = -1; if (args.length == 0) { dicStream = new BufferedReader(new InputStreamReader( new FileInputStream(rb.getString("text_dic_file")), rb.getString("dic.charset"))); } else { custom_dic = 0; dicStream = new BufferedReader( new InputStreamReader(new FileInputStream(args[custom_dic]), rb.getString("dic.charset"))); } int line = 0; CSVData key_b = new CSVData(); CSVData pos_b = new CSVData(); while (true) { t = dicStream.readLine(); if (t == null) { dicStream.close(); custom_dic++; if (args.length == custom_dic) { break; } else { // read custum dictionary log.info("load dic: " + "args[custum_dic]"); dicStream = new BufferedReader(new InputStreamReader(new FileInputStream(args[custom_dic]), rb.getString("dic.charset"))); } continue; } CSVParser parser = new CSVParser(t); csv = parser.nextTokens(); if (csv.length < (pos_size + pos_start)) { throw new RuntimeException("format error:" + t); } key_b.clear(); pos_b.clear(); for (int i = pos_start; i < (pos_start + pos_size - 1); i++) { key_b.append(csv[i]); pos_b.append(csv[i]); } key_b.append(csv[pos_start + pos_size - 1]); pos_b.append(csv[pos_start + pos_size - 1]); for (int i = pos_start + pos_size; i < (csv.length - 1); i++) { pos_b.append(csv[i]); } pos_b.append(csv[csv.length - 1]); CToken token = new CToken(); token.rcAttr2 = (short) dm1.getDicId(key_b.toString()); token.rcAttr1 = (short) dm2.getDicId(key_b.toString()); token.lcAttr = (short) dm3.getDicId(key_b.toString()); token.posid = 0; token.posID = offset; token.length = (short) csv[0].length(); token.cost = (short) Integer.parseInt(csv[1]); dicList.add(new PairObject(csv[0], token)); byte b[] = pos_b.toString().getBytes(rb.getString("sen.charset")); offset += (b.length + 1); String pos_b_str = pos_b.toString(); bw.write(pos_b_str, 0, pos_b_str.length()); // bw.write(b, 0, b.length); bw.write(0); if (++di % 50000 == 0) log.info("" + di + "... "); } bw.close(); // ----end of writing feature.cha ---- } catch (Exception e) { log.error("Error: " + t); e.printStackTrace(); System.exit(1); } rule1 = null; rule2 = null; rule3 = null; // ///////////////////////////////////////// // // Step5. Sort lexs and write to file // log.info("(5/7): sorting lex... "); int value[] = new int[dicList.size()]; char key[][] = new char[dicList.size()][]; int spos = 0; int dsize = 0; int bsize = 0; String prev = ""; Collections.sort(dicList); // ///////////////////////////////////////// // // Step6. Writing Token Information // log.info("(6/7): writing token... "); try { // writer for token file. DataOutputStream out = new DataOutputStream( new BufferedOutputStream(new FileOutputStream(rb.getString("token_file")))); // writing 'bos' and 'eos' and 'unknown' token. CToken token = new CToken(); token.rcAttr2 = (short) dm1.getDicId(rb.getString("bos_pos")); token.rcAttr1 = (short) dm2.getDicId(rb.getString("bos_pos")); token.lcAttr = (short) dm3.getDicId(rb.getString("bos_pos")); token.write(out); token.rcAttr2 = (short) dm1.getDicId(rb.getString("eos_pos")); token.rcAttr1 = (short) dm2.getDicId(rb.getString("eos_pos")); token.lcAttr = (short) dm3.getDicId(rb.getString("eos_pos")); token.write(out); token.rcAttr2 = (short) dm1.getDicId(rb.getString("unknown_pos")); token.rcAttr1 = (short) dm2.getDicId(rb.getString("unknown_pos")); token.lcAttr = (short) dm3.getDicId(rb.getString("unknown_pos")); token.posID = -1; token.write(out); log.info("key size = " + key.length); for (int i = 0; i < key.length; i++) { String k = (String) ((PairObject) dicList.get(i)).key; if (!prev.equals(k) && i != 0) { key[dsize] = ((String) ((PairObject) dicList.get(spos)).key).toCharArray(); value[dsize] = bsize + (spos << 8); dsize++; bsize = 1; spos = i; } else { bsize++; } prev = (String) ((PairObject) dicList.get(i)).key; ((CToken) (((PairObject) dicList.get(i)).value)).write(out); } out.flush(); out.close(); } catch (Exception e) { e.printStackTrace(); System.exit(1); } key[dsize] = ((String) ((PairObject) dicList.get(spos)).key).toCharArray(); value[dsize] = bsize + (spos << 8); dsize++; dm1 = null; dm2 = null; dm3 = null; dicList = null; // ///////////////////////////////////////// // // Step7. Build Double Array // log.info("(7/7): building Double-Array (size = " + dsize + ") ..."); DoubleArrayTrie da = new DoubleArrayTrie(); da.build(key, null, value, dsize); try { da.save(rb.getString("double_array_file")); } catch (Exception e) { e.printStackTrace(); } log.info("total time = " + (System.currentTimeMillis() - start) / 1000 + "[ms]"); }
From source file:TwitterClustering.java
public static void main(String[] args) throws FileNotFoundException, IOException { // TODO code application logic here File outFile = new File(args[3]); Scanner s = new Scanner(new File(args[1])).useDelimiter(","); JSONParser parser = new JSONParser(); Set<Cluster> clusterSet = new HashSet<Cluster>(); HashMap<String, Tweet> tweets = new HashMap(); FileWriter fw = new FileWriter(outFile.getAbsoluteFile()); BufferedWriter bw = new BufferedWriter(fw); // init//from w w w . j a va 2 s .c o m try { Object obj = parser.parse(new FileReader(args[2])); JSONArray jsonArray = (JSONArray) obj; for (int i = 0; i < jsonArray.size(); i++) { Tweet twt = new Tweet(); JSONObject jObj = (JSONObject) jsonArray.get(i); String text = jObj.get("text").toString(); long sum = 0; for (int y = 0; y < text.toCharArray().length; y++) { sum += (int) text.toCharArray()[y]; } String[] token = text.split(" "); String tID = jObj.get("id").toString(); Set<String> mySet = new HashSet<String>(Arrays.asList(token)); twt.setAttributeValue(sum); twt.setText(mySet); twt.setTweetID(tID); tweets.put(tID, twt); } // preparing initial clusters int i = 0; while (s.hasNext()) { String id = s.next();// id Tweet t = tweets.get(id.trim()); clusterSet.add(new Cluster(i + 1, t, new LinkedList())); i++; } Iterator it = tweets.entrySet().iterator(); for (int l = 0; l < 2; l++) { // limit to 25 iterations while (it.hasNext()) { Map.Entry me = (Map.Entry) it.next(); // calculate distance to each centroid Tweet p = (Tweet) me.getValue(); HashMap<Cluster, Float> distMap = new HashMap(); for (Cluster clust : clusterSet) { distMap.put(clust, jaccardDistance(p.getText(), clust.getCentroid().getText())); } HashMap<Cluster, Float> sorted = (HashMap<Cluster, Float>) sortByValue(distMap); sorted.keySet().iterator().next().getMembers().add(p); } // calculate new centroid and update Clusterset for (Cluster clust : clusterSet) { TreeMap<String, Long> tDistMap = new TreeMap(); Tweet newCentroid = null; Long avgSumDist = new Long(0); for (int j = 0; j < clust.getMembers().size(); j++) { avgSumDist += clust.getMembers().get(j).getAttributeValue(); tDistMap.put(clust.getMembers().get(j).getTweetID(), clust.getMembers().get(j).getAttributeValue()); } if (clust.getMembers().size() != 0) { avgSumDist /= (clust.getMembers().size()); } ArrayList<Long> listValues = new ArrayList<Long>(tDistMap.values()); if (tDistMap.containsValue(findClosestNumber(listValues, avgSumDist))) { // found closest newCentroid = tweets .get(getKeyByValue(tDistMap, findClosestNumber(listValues, avgSumDist))); clust.setCentroid(newCentroid); } } } // create an iterator Iterator iterator = clusterSet.iterator(); // check values while (iterator.hasNext()) { Cluster c = (Cluster) iterator.next(); bw.write(c.getId() + "\t"); System.out.print(c.getId() + "\t"); for (Tweet t : c.getMembers()) { bw.write(t.getTweetID() + ", "); System.out.print(t.getTweetID() + ","); } bw.write("\n"); System.out.println(""); } System.out.println(""); System.out.println("SSE " + sumSquaredErrror(clusterSet)); } catch (Exception e) { e.printStackTrace(); } finally { bw.close(); fw.close(); } }
From source file:apps.Source2XML.java
public static void main(String[] args) { Options options = new Options(); options.addOption("i", null, true, "input file"); options.addOption("o", null, true, "output file"); options.addOption("reparse_xml", null, false, "reparse each XML entry to ensure the parser doesn't fail"); Joiner commaJoin = Joiner.on(','); options.addOption("source_type", null, true, "document source type: " + commaJoin.join(SourceFactory.getDocSourceList())); Joiner spaceJoin = Joiner.on(' '); CommandLineParser parser = new org.apache.commons.cli.GnuParser(); BufferedWriter outputFile = null; int docNum = 0; if (USE_LEMMATIZER && USE_STEMMER) { System.err.println("Bug/inconsistent code: cann't use the stemmer and lemmatizer at the same time!"); System.exit(1);/*from w w w . j a v a 2 s. c om*/ } //Stemmer stemmer = new Stemmer(); KrovetzStemmer stemmer = new KrovetzStemmer(); System.out.println("Using Stanford NLP? " + USE_STANFORD); System.out.println("Using Stanford lemmatizer? " + USE_LEMMATIZER); System.out.println("Using stemmer? " + USE_STEMMER + (USE_STEMMER ? " (class: " + stemmer.getClass().getCanonicalName() + ")" : "")); try { CommandLine cmd = parser.parse(options, args); String inputFileName = null, outputFileName = null; if (cmd.hasOption("i")) { inputFileName = cmd.getOptionValue("i"); } else { Usage("Specify 'input file'", options); } if (cmd.hasOption("o")) { outputFileName = cmd.getOptionValue("o"); } else { Usage("Specify 'output file'", options); } outputFile = new BufferedWriter( new OutputStreamWriter(CompressUtils.createOutputStream(outputFileName))); String sourceName = cmd.getOptionValue("source_type"); if (sourceName == null) Usage("Specify document source type", options); boolean reparseXML = options.hasOption("reparse_xml"); DocumentSource inpDocSource = SourceFactory.createDocumentSource(sourceName, inputFileName); DocumentEntry inpDoc = null; TextCleaner textCleaner = new TextCleaner( new DictNoComments(new File("data/stopwords.txt"), true /* lower case */), USE_STANFORD, USE_LEMMATIZER); Map<String, String> outputMap = new HashMap<String, String>(); outputMap.put(UtilConst.XML_FIELD_DOCNO, null); outputMap.put(UtilConst.XML_FIELD_TEXT, null); XmlHelper xmlHlp = new XmlHelper(); if (reparseXML) System.out.println("Will reparse every XML entry to verify correctness!"); while ((inpDoc = inpDocSource.next()) != null) { ++docNum; ArrayList<String> toks = textCleaner.cleanUp(inpDoc.mDocText); ArrayList<String> goodToks = new ArrayList<String>(); for (String s : toks) if (s.length() <= MAX_WORD_LEN && // Exclude long and short words s.length() >= MIN_WORD_LEN && isGoodWord(s)) goodToks.add(USE_STEMMER ? stemmer.stem(s) : s); String partlyCleanedText = spaceJoin.join(goodToks); String cleanText = XmlHelper.removeInvaildXMLChars(partlyCleanedText); // isGoodWord combiend with Stanford tokenizer should be quite restrictive already //cleanText = replaceSomePunct(cleanText); outputMap.replace(UtilConst.XML_FIELD_DOCNO, inpDoc.mDocId); outputMap.replace(UtilConst.XML_FIELD_TEXT, cleanText); String xml = xmlHlp.genXMLIndexEntry(outputMap); if (reparseXML) { try { XmlHelper.parseDocWithoutXMLDecl(xml); } catch (Exception e) { System.err.println("Error re-parsing xml for document ID: " + inpDoc.mDocId); System.exit(1); } } /* { System.out.println(inpDoc.mDocId); System.out.println("====================="); System.out.println(partlyCleanedText); System.out.println("====================="); System.out.println(cleanText); } */ try { outputFile.write(xml); outputFile.write(NL); } catch (Exception e) { e.printStackTrace(); System.err.println("Error processing/saving a document!"); } if (docNum % 1000 == 0) System.out.println(String.format("Processed %d documents", docNum)); } } catch (ParseException e) { e.printStackTrace(); Usage("Cannot parse arguments" + e, options); } catch (Exception e) { System.err.println("Terminating due to an exception: " + e); System.exit(1); } finally { System.out.println(String.format("Processed %d documents", docNum)); try { if (null != outputFile) { outputFile.close(); System.out.println("Output file is closed! all seems to be fine..."); } } catch (IOException e) { System.err.println("IO exception: " + e); e.printStackTrace(); } } }
From source file:apps.LuceneIndexer.java
public static void main(String[] args) { Options options = new Options(); options.addOption("i", null, true, "input file"); options.addOption("o", null, true, "output directory"); options.addOption("r", null, true, "optional output TREC-format QREL file"); options.addOption("bm25_b", null, true, "BM25 parameter: b"); options.addOption("bm25_k1", null, true, "BM25 parameter: k1"); options.addOption("bm25fixed", null, false, "use the fixed BM25 similarity"); Joiner commaJoin = Joiner.on(','); Joiner spaceJoin = Joiner.on(' '); options.addOption("source_type", null, true, "document source type: " + commaJoin.join(SourceFactory.getDocSourceList())); // If you increase this value, you may need to modify the following line in *.sh file // export MAVEN_OPTS="-Xms8192m -server" double ramBufferSizeMB = 1024 * 8; // 8 GB CommandLineParser parser = new org.apache.commons.cli.GnuParser(); IndexWriter indexWriter = null;/*from w w w . ja va 2s. c o m*/ BufferedWriter qrelWriter = null; int docNum = 0; try { CommandLine cmd = parser.parse(options, args); String inputFileName = null, outputDirName = null, qrelFileName = null; if (cmd.hasOption("i")) { inputFileName = cmd.getOptionValue("i"); } else { Usage("Specify 'input file'", options); } if (cmd.hasOption("o")) { outputDirName = cmd.getOptionValue("o"); } else { Usage("Specify 'index directory'", options); } if (cmd.hasOption("r")) { qrelFileName = cmd.getOptionValue("r"); } String sourceName = cmd.getOptionValue("source_type"); if (sourceName == null) Usage("Specify document source type", options); if (qrelFileName != null) qrelWriter = new BufferedWriter(new FileWriter(qrelFileName)); File outputDir = new File(outputDirName); if (!outputDir.exists()) { if (!outputDir.mkdirs()) { System.out.println("couldn't create " + outputDir.getAbsolutePath()); System.exit(1); } } if (!outputDir.isDirectory()) { System.out.println(outputDir.getAbsolutePath() + " is not a directory!"); System.exit(1); } if (!outputDir.canWrite()) { System.out.println("Can't write to " + outputDir.getAbsolutePath()); System.exit(1); } boolean useFixedBM25 = cmd.hasOption("bm25fixed"); float bm25_k1 = UtilConst.BM25_K1_DEFAULT, bm25_b = UtilConst.BM25_B_DEFAULT; if (cmd.hasOption("bm25_k1")) { try { bm25_k1 = Float.parseFloat(cmd.getOptionValue("bm25_k1")); } catch (NumberFormatException e) { Usage("Wrong format for 'bm25_k1'", options); } } if (cmd.hasOption("bm25_b")) { try { bm25_b = Float.parseFloat(cmd.getOptionValue("bm25_b")); } catch (NumberFormatException e) { Usage("Wrong format for 'bm25_b'", options); } } EnglishAnalyzer analyzer = new EnglishAnalyzer(); FSDirectory indexDir = FSDirectory.open(Paths.get(outputDirName)); IndexWriterConfig indexConf = new IndexWriterConfig(analyzer); /* OpenMode.CREATE creates a new index or overwrites an existing one. https://lucene.apache.org/core/6_0_0/core/org/apache/lucene/index/IndexWriterConfig.OpenMode.html#CREATE */ indexConf.setOpenMode(OpenMode.CREATE); indexConf.setRAMBufferSizeMB(ramBufferSizeMB); System.out.println(String.format("BM25 parameters k1=%f b=%f ", bm25_k1, bm25_b)); if (useFixedBM25) { System.out.println(String.format("Using fixed BM25Simlarity, k1=%f b=%f", bm25_k1, bm25_b)); indexConf.setSimilarity(new BM25SimilarityFix(bm25_k1, bm25_b)); } else { System.out.println(String.format("Using Lucene BM25Similarity, k1=%f b=%f", bm25_k1, bm25_b)); indexConf.setSimilarity(new BM25Similarity(bm25_k1, bm25_b)); } indexWriter = new IndexWriter(indexDir, indexConf); DocumentSource inpDocSource = SourceFactory.createDocumentSource(sourceName, inputFileName); DocumentEntry inpDoc = null; TextCleaner textCleaner = new TextCleaner(null); while ((inpDoc = inpDocSource.next()) != null) { ++docNum; Document luceneDoc = new Document(); ArrayList<String> cleanedToks = textCleaner.cleanUp(inpDoc.mDocText); String cleanText = spaceJoin.join(cleanedToks); // System.out.println(inpDoc.mDocId); // System.out.println(cleanText); // System.out.println("=============================="); luceneDoc.add(new StringField(UtilConst.FIELD_ID, inpDoc.mDocId, Field.Store.YES)); luceneDoc.add(new TextField(UtilConst.FIELD_TEXT, cleanText, Field.Store.YES)); indexWriter.addDocument(luceneDoc); if (inpDoc.mIsRel != null && qrelWriter != null) { saveQrelOneEntry(qrelWriter, inpDoc.mQueryId, inpDoc.mDocId, inpDoc.mIsRel ? MAX_GRADE : 0); } if (docNum % 1000 == 0) System.out.println(String.format("Indexed %d documents", docNum)); } } catch (ParseException e) { e.printStackTrace(); Usage("Cannot parse arguments" + e, options); } catch (Exception e) { System.err.println("Terminating due to an exception: " + e); System.exit(1); } finally { System.out.println(String.format("Indexed %d documents", docNum)); try { if (null != indexWriter) indexWriter.close(); if (null != qrelWriter) qrelWriter.close(); } catch (IOException e) { System.err.println("IO exception: " + e); e.printStackTrace(); } } }
From source file:diffhunter.DiffHunter.java
/** * @param args the command line arguments * @throws org.apache.commons.cli.ParseException * @throws java.io.IOException/*from ww w .ja v a 2 s . co m*/ */ public static void main(String[] args) throws ParseException, IOException { //String test_ = Paths.get("J:\\VishalData\\additional\\", "Sasan" + "_BDB").toAbsolutePath().toString(); // TODO code application logic here /*args = new String[] { "-i", "-b", "J:\\VishalData\\additional\\Ptbp2_E18_5_cortex_CLIP_mm9_plus_strand_sorted.bed", "-r", "J:\\VishalData\\additional\\mouse_mm9.txt", "-o", "J:\\VishalData" };*/ /*args = new String[] { "-c", "-r", "J:\\VishalData\\additional\\mouse_mm9.txt", "-1", "J:\\VishalData\\Ptbp2_Adult_testis_CLIP_mm9_plus_strand_sorted_BDB", "-2", "J:\\VishalData\\Ptbp2_E18_5_cortex_CLIP_mm9_plus_strand_sorted_BDB", "-w", "200", "-s", "50", "-o", "J:\\VishalData" };*/ Options options = new Options(); // add t option options.addOption("i", "index", false, "Indexing BED files."); options.addOption("b", "bed", true, "bed file to be indexed"); options.addOption("o", "output", true, "Folder that the index/comparison file will be created."); options.addOption("r", "reference", true, "Reference annotation file to be used for indexing"); options.addOption("c", "compare", false, "Finding differences between two conditions"); options.addOption("1", "first", true, "First sample index location"); options.addOption("2", "second", true, "Second sample index location"); options.addOption("w", "window", true, "Length of window for identifying differences"); options.addOption("s", "sliding", true, "Length of sliding"); CommandLineParser parser = new BasicParser(); CommandLine cmd = parser.parse(options, args); boolean indexing = false; boolean comparing = false; //Indexing! if (cmd.hasOption("i")) { //if(cmd.hasOption("1")) //System.err.println("sasan"); //System.out.println("sasa"); indexing = true; } else if (cmd.hasOption("c")) { //System.err.println(""); comparing = true; } else { //System.err.println("Option is not deteced."); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("diffhunter", options); return; } //Indexing is selected // if (indexing == true) { //Since indexing is true. //User have to provide file for indexing. if (!(cmd.hasOption("o") || cmd.hasOption("r") || cmd.hasOption("b"))) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("diffhunter", options); return; } String bedfile_ = cmd.getOptionValue("b"); String reference_file = cmd.getOptionValue("r"); String folder_loc = cmd.getOptionValue("o"); String sample_name = FilenameUtils.getBaseName(bedfile_); try (Database B2 = BerkeleyDB_Box.Get_BerkeleyDB( Paths.get(folder_loc, sample_name + "_BDB").toAbsolutePath().toString(), true, sample_name)) { Indexer indexing_ = new Indexer(reference_file); indexing_.Make_Index(B2, bedfile_, Paths.get(folder_loc, sample_name + "_BDB").toAbsolutePath().toString()); B2.close(); } } else if (comparing == true) { if (!(cmd.hasOption("o") || cmd.hasOption("w") || cmd.hasOption("s") || cmd.hasOption("1") || cmd.hasOption("2"))) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("diffhunter", options); return; } String folder_loc = cmd.getOptionValue("o"); int window_ = Integer.parseInt(cmd.getOptionValue("w")); //int window_=600; int slide_ = Integer.parseInt(cmd.getOptionValue("s")); String first = cmd.getOptionValue("1").replace("_BDB", ""); String second = cmd.getOptionValue("2").replace("_BDB", ""); String reference_file = cmd.getOptionValue("r"); //String folder_loc=cmd.getOptionValue("o"); String sample_name_first = FilenameUtils.getBaseName(first); String sample_name_second = FilenameUtils.getBaseName(second); Database B1 = BerkeleyDB_Box.Get_BerkeleyDB(first + "_BDB", false, sample_name_first); Database B2 = BerkeleyDB_Box.Get_BerkeleyDB(second + "_BDB", false, sample_name_second); List<String> first_condition_genes = Files .lines(Paths.get(first + "_BDB", sample_name_first + ".txt").toAbsolutePath()) .collect(Collectors.toList()); List<String> second_condition_genes = Files .lines(Paths.get(second + "_BDB", sample_name_second + ".txt").toAbsolutePath()) .collect(Collectors.toList()); System.out.println("First and second condition are loaded!!! "); List<String> intersection_ = new ArrayList<>(first_condition_genes); intersection_.retainAll(second_condition_genes); BufferedWriter output = new BufferedWriter( new FileWriter(Paths.get(folder_loc, "differences_" + window_ + "_s" + slide_ + "_c" + ".txt") .toAbsolutePath().toString(), false)); List<Result_Window> final_results = Collections.synchronizedList(new ArrayList<>()); Worker_New worker_class = new Worker_New(); worker_class.Read_Reference(reference_file); while (!intersection_.isEmpty()) { List<String> selected_genes = new ArrayList<>(); //if (intersection_.size()<=10000){selected_genes.addAll(intersection_.subList(0, intersection_.size()));} //else selected_genes.addAll(intersection_.subList(0, 10000)); if (intersection_.size() <= intersection_.size()) { selected_genes.addAll(intersection_.subList(0, intersection_.size())); } else { selected_genes.addAll(intersection_.subList(0, intersection_.size())); } intersection_.removeAll(selected_genes); //System.out.println("Intersection count is:"+intersection_.size()); //final List<Result_Window> resultssss_=new ArrayList<>(); IntStream.range(0, selected_genes.size()).parallel().forEach(i -> { System.out.println(selected_genes.get(i) + "\tprocessing......"); String gene_of_interest = selected_genes.get(i);//"ENSG00000142657|PGD";//intersection_.get(6);////"ENSG00000163395|IGFN1";//"ENSG00000270066|SCARNA2"; int start = worker_class.dic_genes.get(gene_of_interest).start_loc; int end = worker_class.dic_genes.get(gene_of_interest).end_loc; Map<Integer, Integer> first_ = Collections.EMPTY_MAP; try { first_ = BerkeleyDB_Box.Get_Coord_Read(B1, gene_of_interest); } catch (IOException | ClassNotFoundException ex) { Logger.getLogger(DiffHunter.class.getName()).log(Level.SEVERE, null, ex); } Map<Integer, Integer> second_ = Collections.EMPTY_MAP; try { second_ = BerkeleyDB_Box.Get_Coord_Read(B2, gene_of_interest); } catch (IOException | ClassNotFoundException ex) { Logger.getLogger(DiffHunter.class.getName()).log(Level.SEVERE, null, ex); } List<Window> top_windows_first = worker_class.Get_Top_Windows(window_, first_, slide_); List<Window> top_windows_second = worker_class.Get_Top_Windows(window_, second_, slide_); //System.out.println("passed for window peak call for gene \t"+selected_genes.get(i)); // System.out.println("top_window_first_Count\t"+top_windows_first.size()); // System.out.println("top_window_second_Count\t"+top_windows_second.size()); if (top_windows_first.isEmpty() && top_windows_second.isEmpty()) { return; } List<Result_Window> res_temp = new Worker_New().Get_Significant_Windows(gene_of_interest, start, end, top_windows_first, top_windows_second, second_, first_, sample_name_first, sample_name_second, 0.01); if (!res_temp.isEmpty()) { final_results.addAll(res_temp);//final_results.addAll(worker_class.Get_Significant_Windows(gene_of_interest, start, end, top_windows_first, top_windows_second, second_, first_, first_condition, second_condition, 0.01)); } //System.out.println(selected_genes.get(i)+"\tprocessed."); }); /*selected_genes.parallelStream().forEach(i -> { });*/ List<Double> pvals = new ArrayList<>(); for (int i = 0; i < final_results.size(); i++) { pvals.add(final_results.get(i).p_value); } List<Double> qvals = MultipleTestCorrection.benjaminiHochberg(pvals); System.out.println("Writing to file..."); output.append("Gene_Symbol\tContributing_Sample\tStart\tEnd\tOddsRatio\tp_Value\tFDR"); output.newLine(); for (int i = 0; i < final_results.size(); i++) { Result_Window item = final_results.get(i); output.append(item.associated_gene_symbol + "\t" + item.contributing_windows + "\t" + item.start_loc + "\t" + item.end_loc + "\t" + item.oddsratio_ + "\t" + item.p_value + "\t" + qvals.get(i)); //+ "\t" + item.average_other_readcount_cotributing + "\t" + item.average_other_readcount_cotributing + "\t" + item.average_window_readcount_non + "\t" + item.average_other_readcount_non); output.newLine(); } /* for (Result_Window item : final_results) { output.append(item.associated_gene_symbol + "\t" + item.contributing_windows + "\t" + item.start_loc + "\t" + item.end_loc + "\t" + item.oddsratio_ + "\t" + item.p_value); //+ "\t" + item.average_other_readcount_cotributing + "\t" + item.average_other_readcount_cotributing + "\t" + item.average_window_readcount_non + "\t" + item.average_other_readcount_non); output.newLine(); } */ final_results.clear(); } output.close(); } System.out.println("Done."); }
From source file:com.cws.esolutions.security.main.PasswordUtility.java
public static void main(final String[] args) { final String methodName = PasswordUtility.CNAME + "#main(final String[] args)"; if (DEBUG) {// www . j a v a 2 s. c o m DEBUGGER.debug("Value: {}", methodName); } if (args.length == 0) { HelpFormatter usage = new HelpFormatter(); usage.printHelp(PasswordUtility.CNAME, options, true); System.exit(1); } BufferedReader bReader = null; BufferedWriter bWriter = null; try { // load service config first !! SecurityServiceInitializer.initializeService(PasswordUtility.SEC_CONFIG, PasswordUtility.LOG_CONFIG, false); if (DEBUG) { DEBUGGER.debug("Options options: {}", options); for (String arg : args) { DEBUGGER.debug("Value: {}", arg); } } CommandLineParser parser = new PosixParser(); CommandLine commandLine = parser.parse(options, args); if (DEBUG) { DEBUGGER.debug("CommandLineParser parser: {}", parser); DEBUGGER.debug("CommandLine commandLine: {}", commandLine); DEBUGGER.debug("CommandLine commandLine.getOptions(): {}", (Object[]) commandLine.getOptions()); DEBUGGER.debug("CommandLine commandLine.getArgList(): {}", commandLine.getArgList()); } final SecurityConfigurationData secConfigData = PasswordUtility.svcBean.getConfigData(); final SecurityConfig secConfig = secConfigData.getSecurityConfig(); final PasswordRepositoryConfig repoConfig = secConfigData.getPasswordRepo(); final SystemConfig systemConfig = secConfigData.getSystemConfig(); if (DEBUG) { DEBUGGER.debug("SecurityConfigurationData secConfig: {}", secConfigData); DEBUGGER.debug("SecurityConfig secConfig: {}", secConfig); DEBUGGER.debug("RepositoryConfig secConfig: {}", repoConfig); DEBUGGER.debug("SystemConfig systemConfig: {}", systemConfig); } if (commandLine.hasOption("encrypt")) { if ((StringUtils.isBlank(repoConfig.getPasswordFile())) || (StringUtils.isBlank(repoConfig.getSaltFile()))) { System.err.println("The password/salt files are not configured. Entries will not be stored!"); } File passwordFile = FileUtils.getFile(repoConfig.getPasswordFile()); File saltFile = FileUtils.getFile(repoConfig.getSaltFile()); if (DEBUG) { DEBUGGER.debug("File passwordFile: {}", passwordFile); DEBUGGER.debug("File saltFile: {}", saltFile); } final String entryName = commandLine.getOptionValue("entry"); final String username = commandLine.getOptionValue("username"); final String password = commandLine.getOptionValue("password"); final String salt = RandomStringUtils.randomAlphanumeric(secConfig.getSaltLength()); if (DEBUG) { DEBUGGER.debug("String entryName: {}", entryName); DEBUGGER.debug("String username: {}", username); DEBUGGER.debug("String password: {}", password); DEBUGGER.debug("String salt: {}", salt); } final String encodedSalt = PasswordUtils.base64Encode(salt); final String encodedUserName = PasswordUtils.base64Encode(username); final String encryptedPassword = PasswordUtils.encryptText(password, salt, secConfig.getSecretAlgorithm(), secConfig.getIterations(), secConfig.getKeyBits(), secConfig.getEncryptionAlgorithm(), secConfig.getEncryptionInstance(), systemConfig.getEncoding()); final String encodedPassword = PasswordUtils.base64Encode(encryptedPassword); if (DEBUG) { DEBUGGER.debug("String encodedSalt: {}", encodedSalt); DEBUGGER.debug("String encodedUserName: {}", encodedUserName); DEBUGGER.debug("String encodedPassword: {}", encodedPassword); } if (commandLine.hasOption("store")) { try { new File(passwordFile.getParent()).mkdirs(); new File(saltFile.getParent()).mkdirs(); boolean saltFileExists = (saltFile.exists()) ? true : saltFile.createNewFile(); if (DEBUG) { DEBUGGER.debug("saltFileExists: {}", saltFileExists); } // write the salt out first if (!(saltFileExists)) { throw new IOException("Unable to create salt file"); } boolean passwordFileExists = (passwordFile.exists()) ? true : passwordFile.createNewFile(); if (!(passwordFileExists)) { throw new IOException("Unable to create password file"); } if (commandLine.hasOption("replace")) { File[] files = new File[] { saltFile, passwordFile }; if (DEBUG) { DEBUGGER.debug("File[] files: {}", (Object) files); } for (File file : files) { if (DEBUG) { DEBUGGER.debug("File: {}", file); } String currentLine = null; File tmpFile = new File(FileUtils.getTempDirectory() + "/" + "tmpFile"); if (DEBUG) { DEBUGGER.debug("File tmpFile: {}", tmpFile); } bReader = new BufferedReader(new FileReader(file)); bWriter = new BufferedWriter(new FileWriter(tmpFile)); while ((currentLine = bReader.readLine()) != null) { if (!(StringUtils.equals(currentLine.trim().split(",")[0], entryName))) { bWriter.write(currentLine + System.getProperty("line.separator")); bWriter.flush(); } } bWriter.close(); FileUtils.deleteQuietly(file); FileUtils.copyFile(tmpFile, file); FileUtils.deleteQuietly(tmpFile); } } FileUtils.writeStringToFile(saltFile, entryName + "," + encodedUserName + "," + encodedSalt + System.getProperty("line.separator"), true); FileUtils.writeStringToFile(passwordFile, entryName + "," + encodedUserName + "," + encodedPassword + System.getProperty("line.separator"), true); } catch (IOException iox) { ERROR_RECORDER.error(iox.getMessage(), iox); } } System.out.println("Entry Name " + entryName + " stored."); } if (commandLine.hasOption("decrypt")) { String saltEntryName = null; String saltEntryValue = null; String decryptedPassword = null; String passwordEntryName = null; if ((StringUtils.isEmpty(commandLine.getOptionValue("entry")) && (StringUtils.isEmpty(commandLine.getOptionValue("username"))))) { throw new ParseException("No entry or username was provided to decrypt."); } if (StringUtils.isEmpty(commandLine.getOptionValue("username"))) { throw new ParseException("no entry provided to decrypt"); } String entryName = commandLine.getOptionValue("entry"); String username = commandLine.getOptionValue("username"); if (DEBUG) { DEBUGGER.debug("String entryName: {}", entryName); DEBUGGER.debug("String username: {}", username); } File passwordFile = FileUtils.getFile(repoConfig.getPasswordFile()); File saltFile = FileUtils.getFile(repoConfig.getSaltFile()); if (DEBUG) { DEBUGGER.debug("File passwordFile: {}", passwordFile); DEBUGGER.debug("File saltFile: {}", saltFile); } if ((!(saltFile.canRead())) || (!(passwordFile.canRead()))) { throw new IOException( "Unable to read configured password/salt file. Please check configuration and/or permissions."); } for (String lineEntry : FileUtils.readLines(saltFile, systemConfig.getEncoding())) { saltEntryName = lineEntry.split(",")[0]; if (DEBUG) { DEBUGGER.debug("String saltEntryName: {}", saltEntryName); } if (StringUtils.equals(saltEntryName, entryName)) { saltEntryValue = PasswordUtils.base64Decode(lineEntry.split(",")[2]); break; } } if (StringUtils.isEmpty(saltEntryValue)) { throw new SecurityException("No entries were found that matched the provided information"); } for (String lineEntry : FileUtils.readLines(passwordFile, systemConfig.getEncoding())) { passwordEntryName = lineEntry.split(",")[0]; if (DEBUG) { DEBUGGER.debug("String passwordEntryName: {}", passwordEntryName); } if (StringUtils.equals(passwordEntryName, saltEntryName)) { String decodedPassword = PasswordUtils.base64Decode(lineEntry.split(",")[2]); decryptedPassword = PasswordUtils.decryptText(decodedPassword, saltEntryValue, secConfig.getSecretAlgorithm(), secConfig.getIterations(), secConfig.getKeyBits(), secConfig.getEncryptionAlgorithm(), secConfig.getEncryptionInstance(), systemConfig.getEncoding()); break; } } if (StringUtils.isEmpty(decryptedPassword)) { throw new SecurityException("No entries were found that matched the provided information"); } System.out.println(decryptedPassword); } else if (commandLine.hasOption("encode")) { System.out.println(PasswordUtils.base64Encode((String) commandLine.getArgList().get(0))); } else if (commandLine.hasOption("decode")) { System.out.println(PasswordUtils.base64Decode((String) commandLine.getArgList().get(0))); } } catch (IOException iox) { ERROR_RECORDER.error(iox.getMessage(), iox); System.err.println("An error occurred during processing: " + iox.getMessage()); System.exit(1); } catch (ParseException px) { ERROR_RECORDER.error(px.getMessage(), px); System.err.println("An error occurred during processing: " + px.getMessage()); System.exit(1); } catch (SecurityException sx) { ERROR_RECORDER.error(sx.getMessage(), sx); System.err.println("An error occurred during processing: " + sx.getMessage()); System.exit(1); } catch (SecurityServiceException ssx) { ERROR_RECORDER.error(ssx.getMessage(), ssx); System.exit(1); } finally { try { if (bReader != null) { bReader.close(); } if (bWriter != null) { bReader.close(); } } catch (IOException iox) { } } System.exit(0); }