List of usage examples for java.io BufferedWriter append
public Writer append(CharSequence csq) throws IOException
From source file:org.trec.liveqa.GetYAnswersPropertiesFromQid.java
public static void main(String[] args) throws Exception { if (args.length != 2) { System.out.println("Usage: GetYAnswersPropertiesFromQid <plaintext-list-of-qids> <out-file>"); return;//www . j a va 2s . co m } BufferedWriter writer = getWriter(args[1]); String[] qids = getQids(args[0]); for (String qid : qids) { System.out.println("Getting data for QID " + qid); Map<String, String> data = extractData(qid); JSONObject jo = new JSONObject(); for (Map.Entry<String, String> e : data.entrySet()) { jo.put(e.getKey(), JSONValue.escape(e.getValue())); } writer.append(jo.toString()); writer.newLine(); writer.flush(); } writer.close(); }
From source file:org.archive.net.PublicSuffixes.java
/** * Utility method for dumping a regex String, based on a published public * suffix list, which matches any SURT-form hostname up through the broadest * 'private' (assigned/sold) domain-segment. That is, for any of the * SURT-form hostnames...//w w w . ja v a 2 s .c o m * * com,example, com,example,www, com,example,california,www * * ...the regex will match 'com,example,'. * * @param args * @throws IOException */ public static void main(String args[]) throws IOException { InputStream is; if (args.length == 0 || "=".equals(args[0])) { // use bundled list is = PublicSuffixes.class.getClassLoader().getResourceAsStream("effective_tld_names.dat"); } else { is = new FileInputStream(args[0]); } BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8")); String regex = getTopmostAssignedSurtPrefixRegex(reader); IOUtils.closeQuietly(is); boolean needsClose = false; BufferedWriter writer; if (args.length >= 2) { // write to specified file writer = new BufferedWriter(new FileWriter(args[1])); needsClose = true; } else { // write to stdout writer = new BufferedWriter(new OutputStreamWriter(System.out)); } writer.append(regex); writer.flush(); if (needsClose) { writer.close(); } }
From source file:PodbaseMetadataMigration2.java
public static void main(String[] args) throws Exception { System.out.println("Running data migration"); String projectString = FileUtils.readFileToString(new File("projects.txt")); Map<String, Integer> projectIdMapping = new HashMap<String, Integer>(); for (String line : projectString.split("\n")) { String[] split = line.split(":"); int id = Integer.parseInt(split[0].trim()); String name = split[1].trim(); projectIdMapping.put(name, id);/*from w w w . ja v a2s. c o m*/ } System.out.println("Reading projects.."); List<ProjectEntry> projects = dataFromFile("./migrate/projects.data", ProjectEntry.class); projectIdMap = parseProjectMap(projects, projectIdMapping); System.out.println("Found " + projects.size() + " projects."); System.out.println("Reading tags.."); List<TagEntry> tags = dataFromFile("./migrate/tags.data", TagEntry.class); System.out.println("Found " + tags.size() + " tags."); System.out.println("Reading templates.."); List<TemplateEntry> templates = dataFromFile("./migrate/templates.data", TemplateEntry.class); System.out.println("Found " + templates.size() + " templates."); System.out.println("Reading template fields.."); List<TemplateFieldEntry> templateFields = dataFromFile("./migrate/template_fields.data", TemplateFieldEntry.class); System.out.println("Found " + templateFields.size() + " templateFields."); int entryCount = tags.size() + templates.size() + templateFields.size(); //System.out.println("Generating Project SQL"); //String projectSql = generateSql((List<AbstractEntry>)(List<?>)projects); System.out.println("Generating Attribute SQL"); String imageAttributes = generateSql((List<AbstractEntry>) (List<?>) tags); System.out.println("Generating Image SQL"); String databaseImages = generateDatabaseImageSql(); //System.out.println("Generating Directory SQL"); //String directorySql = generateDirectorySql(projects); //System.out.println("Generating Template SQL"); //String templateSql = generateSql((List<AbstractEntry>)(List<?>)templates); //System.out.println("Generating Field SQL"); //String fieldsSql = generateSql((List<AbstractEntry>)(List<?>)templateFields); System.out.println("Writing database.sql"); BufferedWriter bw = new BufferedWriter(new FileWriter(new File("./database.sql"))); //bw.append(projectSql); //bw.append("\n\n"); bw.append(databaseImages); bw.append("\n\n"); //bw.append(directorySql); //bw.append("\n\n"); bw.append(imageAttributes); bw.append("\n\n"); // bw.append(templateSql); // bw.append("\n\n"); // bw.append(fieldsSql); // bw.append("\n\n"); bw.close(); System.out.println("Writing missingImages.txt"); bw = new BufferedWriter(new FileWriter(new File("./missingImages.txt"))); for (String img : missingImages) { bw.append(img + "\n"); } bw.close(); System.out.println("Migration completed successfully!"); }
From source file:discovery.compression.kdd2011.ratio.RatioCompressionReport.java
public static void main(String[] args) throws GraphReadingException, IOException, java.text.ParseException { opts.addOption("r", true, "Goal compression ratio"); // opts.addOption( "a", // true, // "Algorithm used for compression. The default and only currently available option is \"greedy\""); //opts.addOption("cost-output",true,"Output file for costs, default is costs.txt"); //opts.addOption("cost-format",true,"Output format for "); opts.addOption("ctype", true, "Connectivity type: global or local, default is global."); opts.addOption("connectivity", false, "enables output for connectivity. Connectivity info will be written to connectivity.txt"); opts.addOption("output_bmg", true, "Write bmg file with groups to given file."); opts.addOption("algorithm", true, "Algorithm to use, one of: greedy random1 random2 bruteforce slowgreedy"); opts.addOption("hop2", false, "Only try to merge nodes that have common neighbors"); opts.addOption("kmedoids", false, "Enables output for kmedoids clustering"); opts.addOption("kmedoids_k", true, "Number of clusters to be used in kmedoids. Default is 3"); opts.addOption("kmedoids_output", true, "Output file for kmedoid clusters. Default is clusters.txt. This file will be overwritten."); opts.addOption("norefresh", false, "Use old style merging: all connectivities are not refreshed when merging"); opts.addOption("edge_attribute", true, "Attribute from bmgraph used as edge weight"); opts.addOption("only_times", false, "Only write times.txt"); //opts.addOption("no_metrics",false,"Exit after compression, don't calculate any metrics or produce output bmg for the compression."); CommandLineParser parser = new PosixParser(); CommandLine cmd = null;/*from www . j av a 2s . c o m*/ try { cmd = parser.parse(opts, args); } catch (ParseException e) { e.printStackTrace(); System.exit(0); } boolean connectivity = false; double ratio = 0; boolean hop2 = cmd.hasOption("hop2"); RatioCompression compression = new GreedyRatioCompression(hop2); if (cmd.hasOption("connectivity")) connectivity = true; ConnectivityType ctype = ConnectivityType.GLOBAL; CompressionMergeModel mergeModel = new PathAverageMergeModel(); if (cmd.hasOption("ctype")) { String ctypeStr = cmd.getOptionValue("ctype"); if (ctypeStr.equals("local")) { ctype = ConnectivityType.LOCAL; mergeModel = new EdgeAverageMergeModel(); } else if (ctypeStr.equals("global")) { ctype = ConnectivityType.GLOBAL; mergeModel = new PathAverageMergeModel(); } else { System.out.println(PROGRAM_NAME + ": unknown connectivity type " + ctypeStr); printHelp(); } } if (cmd.hasOption("norefresh")) mergeModel = new PathAverageMergeModelNorefresh(); if (cmd.hasOption("algorithm")) { String alg = cmd.getOptionValue("algorithm"); if (alg.equals("greedy")) { compression = new GreedyRatioCompression(hop2); } else if (alg.equals("random1")) { compression = new RandomRatioCompression(hop2); } else if (alg.equals("random2")) { compression = new SmartRandomRatioCompression(hop2); } else if (alg.equals("bruteforce")) { compression = new BruteForceCompression(hop2, ctype == ConnectivityType.LOCAL); } else if (alg.equals("slowgreedy")) { compression = new SlowGreedyRatioCompression(hop2); } else { System.out.println("algorithm must be one of: greedy random1 random2 bruteforce slowgreedy"); printHelp(); } } compression.setMergeModel(mergeModel); if (cmd.hasOption("r")) { ratio = Double.parseDouble(cmd.getOptionValue("r")); } else { System.out.println(PROGRAM_NAME + ": compression ratio not defined"); printHelp(); } if (cmd.hasOption("help")) { printHelp(); } String infile = null; if (cmd.getArgs().length != 0) { infile = cmd.getArgs()[0]; } else { printHelp(); } boolean kmedoids = false; int kmedoidsK = 3; String kmedoidsOutput = "clusters.txt"; if (cmd.hasOption("kmedoids")) kmedoids = true; if (cmd.hasOption("kmedoids_k")) kmedoidsK = Integer.parseInt(cmd.getOptionValue("kmedoids_k")); if (cmd.hasOption("kmedoids_output")) kmedoidsOutput = cmd.getOptionValue("kmedoids_output"); String edgeAttrib = "goodness"; if (cmd.hasOption("edge_attribute")) edgeAttrib = cmd.getOptionValue("edge_attribute"); // This program should directly use bmgraph-java to read and // DefaultGraph should have a constructor that takes a BMGraph as an // argument. //VisualGraph vg = new VisualGraph(infile, edgeAttrib, false); //System.out.println("vg read"); //SimpleVisualGraph origSG = new SimpleVisualGraph(vg); BMGraph bmg = BMGraphUtils.readBMGraph(infile); int origN = bmg.getNodes().size(); //for(int i=0;i<origN;i++) //System.out.println(i+"="+origSG.getVisualNode(i)); System.out.println("bmgraph read"); BMNode[] i2n = new BMNode[origN]; HashMap<BMNode, Integer> n2i = new HashMap<BMNode, Integer>(); { int pi = 0; for (BMNode nod : bmg.getNodes()) { n2i.put(nod, pi); i2n[pi++] = nod; } } DefaultGraph dg = new DefaultGraph(); for (BMEdge e : bmg.getEdges()) { dg.addEdge(n2i.get(e.getSource()), n2i.get(e.getTarget()), Double.parseDouble(e.get(edgeAttrib))); } DefaultGraph origDG = dg.copy(); System.out.println("inputs read"); RatioCompression nopCompressor = new RatioCompression.DefaultRatioCompression(); ResultGraph nopResult = nopCompressor.compressGraph(dg, 1); long start = System.currentTimeMillis(); ResultGraph result = compression.compressGraph(dg, ratio); long timeSpent = System.currentTimeMillis() - start; double seconds = timeSpent * 0.001; BufferedWriter timesWriter = new BufferedWriter(new FileWriter("times.txt", true)); timesWriter.append("" + seconds + "\n"); timesWriter.close(); if (cmd.hasOption("only_times")) { System.out.println("Compression done, exiting."); System.exit(0); } BufferedWriter costsWriter = new BufferedWriter(new FileWriter("costs.txt", true)); costsWriter.append("" + nopResult.getCompressorCosts() + " " + result.getCompressorCosts() + "\n"); costsWriter.close(); double[][] origProb; double[][] compProb; int[] group = new int[origN]; for (int i = 0; i < result.partition.size(); i++) for (int x : result.partition.get(i)) group[x] = i; if (ctype == ConnectivityType.LOCAL) { origProb = new double[origN][origN]; compProb = new double[origN][origN]; DefaultGraph g = result.uncompressedGraph(); for (int i = 0; i < origN; i++) { for (int j = 0; j < origN; j++) { origProb[i][j] = dg.getEdgeWeight(i, j); compProb[i][j] = g.getEdgeWeight(i, j); } } System.out.println("Writing edge-dissimilarity"); } else { origProb = ProbDijkstra.getProbMatrix(origDG); compProb = new double[origN][origN]; System.out.println("nodeCount = " + result.graph.getNodeCount()); double[][] ccProb = ProbDijkstra.getProbMatrix(result.graph); System.out.println("ccProb.length = " + ccProb.length); System.out.println("ccProb[0].length = " + ccProb[0].length); for (int i = 0; i < origN; i++) { for (int j = 0; j < origN; j++) { if (group[i] == group[j]) compProb[i][j] = result.graph.getEdgeWeight(group[i], group[j]); else { int gj = group[j]; int gi = group[i]; compProb[i][j] = ccProb[group[i]][group[j]]; } } } System.out.println("Writing best-path-dissimilarity"); //compProb = ProbDijkstra.getProbMatrix(result.uncompressedGraph()); } { BufferedWriter connWr = null;// if (connectivity) { connWr = new BufferedWriter(new FileWriter("connectivity.txt", true)); } double totalDiff = 0; for (int i = 0; i < origN; i++) { for (int j = i + 1; j < origN; j++) { double diff = Math.abs(origProb[i][j] - compProb[i][j]); //VisualNode ni = origSG.getVisualNode(i); //VisualNode nj = origSG.getVisualNode(j); BMNode ni = i2n[i]; BMNode nj = i2n[j]; if (connectivity) connWr.append(ni + "\t" + nj + "\t" + origProb[i][j] + "\t" + compProb[i][j] + "\t" + diff + "\n"); totalDiff += diff * diff; } } if (connectivity) { connWr.append("\n"); connWr.close(); } totalDiff = Math.sqrt(totalDiff); BufferedWriter dissWr = new BufferedWriter(new FileWriter("dissimilarity.txt", true)); dissWr.append("" + totalDiff + "\n"); dissWr.close(); } if (cmd.hasOption("output_bmg")) { BMGraph outgraph = new BMGraph(); String outputfile = cmd.getOptionValue("output_bmg"); HashMap<Integer, BMNode> nodes = new HashMap<Integer, BMNode>(); for (int i = 0; i < result.partition.size(); i++) { ArrayList<Integer> g = result.partition.get(i); if (g.size() == 0) continue; BMNode node = new BMNode("Supernode_" + i); HashMap<String, String> attributes = new HashMap<String, String>(); StringBuffer contents = new StringBuffer(); for (int x : g) contents.append(i2n[x] + ","); contents.delete(contents.length() - 1, contents.length()); attributes.put("nodes", contents.toString()); attributes.put("self-edge", "" + result.graph.getEdgeWeight(i, i)); node.setAttributes(attributes); nodes.put(i, node); outgraph.ensureHasNode(node); } for (int i = 0; i < result.partition.size(); i++) { if (result.partition.get(i).size() == 0) continue; for (int x : result.graph.getNeighbors(i)) { if (x < i) continue; BMNode from = nodes.get(i); BMNode to = nodes.get(x); if (from == null || to == null) { System.out.println(from + "->" + to); System.out.println(i + "->" + x); System.out.println(""); } BMEdge e = new BMEdge(nodes.get(i), nodes.get(x), "notype"); e.setAttributes(new HashMap<String, String>()); e.put("goodness", "" + result.graph.getEdgeWeight(i, x)); outgraph.ensureHasEdge(e); } } BMGraphUtils.writeBMGraph(outgraph, outputfile); } // k medoids! if (kmedoids) { //KMedoidsResult clustersOrig=KMedoids.runKMedoids(origProb,kmedoidsK); if (ctype == ConnectivityType.LOCAL) { compProb = ProbDijkstra.getProbMatrix(result.uncompressedGraph()); } //KMedoidsResult compClusters = KMedoids.runKMedoids(ProbDijkstra.getProbMatrix(result.graph),kmedoidsK); KMedoidsResult clustersComp = KMedoids.runKMedoids(compProb, kmedoidsK); BufferedWriter bw = new BufferedWriter(new FileWriter(kmedoidsOutput)); for (int i = 0; i < origN; i++) { int g = group[i]; //bw.append(origSG.getVisualNode(i).getBMNode()+" "+compClusters.clusters[g]+"\n"); bw.append(i2n[i] + " " + clustersComp.clusters[i] + "\n"); } bw.close(); } System.exit(0); }
From source file:org.lieuofs.extraction.etatpays.ExtractionPays.java
/** * @param args//from www . j a v a 2s . c o m */ public static void main(String[] args) throws IOException { ApplicationContext context = new ClassPathXmlApplicationContext(new String[] { "beans_lieuofs.xml" }); EtatTerritoireCritere critere = new EtatTerritoireCritere(); //critere.setEstEtat(Boolean.FALSE); EtatTerritoireDao dao = (EtatTerritoireDao) context.getBean("etatTerritoireDao"); Set<EtatTerritoirePersistant> etats = dao.rechercher(critere); EtatWriter etatWriter = new NumOFSEtatWriter(); BufferedWriter writer = new BufferedWriter( new OutputStreamWriter(new FileOutputStream("ExtractionPaysOFSReconnuRecemment.txt"), "UTF-8")); List<EtatTerritoirePersistant> listeTriee = new ArrayList<EtatTerritoirePersistant>(etats); Collections.sort(listeTriee, new Comparator<EtatTerritoirePersistant>() { @Override public int compare(EtatTerritoirePersistant o1, EtatTerritoirePersistant o2) { //return o1.getFormeCourte("fr").compareTo(o2.getFormeCourte("fr")); return o1.getNumeroOFS() - o2.getNumeroOFS(); } }); for (EtatTerritoirePersistant etat : filtre(listeTriee)) { String etatStr = etatWriter.ecrireEtat(etat); if (null != etatStr) { writer.append(etatStr); } } writer.close(); }
From source file:org.lieuofs.extraction.etatpays.ExtractionEtat.java
/** * @param args//from w w w .j a v a 2 s . co m */ public static void main(String[] args) throws IOException { ApplicationContext context = new ClassPathXmlApplicationContext(new String[] { "beans_lieuofs.xml" }); EtatTerritoireCritere critere = new EtatTerritoireCritere(); critere.setEstEtat(Boolean.TRUE); // critere.setValide(Boolean.TRUE); EtatTerritoireDao dao = (EtatTerritoireDao) context.getBean("etatTerritoireDao"); Set<EtatTerritoirePersistant> etats = dao.rechercher(critere); EtatWriter etatWriter = new CsvPlatEtatWriter(); BufferedWriter writer = new BufferedWriter( new OutputStreamWriter(new FileOutputStream("Etat.txt"), "UTF-8")); List<EtatTerritoirePersistant> listeTriee = new ArrayList<EtatTerritoirePersistant>(etats); Collections.sort(listeTriee, new Comparator<EtatTerritoirePersistant>() { @Override public int compare(EtatTerritoirePersistant o1, EtatTerritoirePersistant o2) { //return o1.getFormeCourte("fr").compareTo(o2.getFormeCourte("fr")); return o1.getNumeroOFS() - o2.getNumeroOFS(); } }); for (EtatTerritoirePersistant etat : listeTriee) { String etatStr = etatWriter.ecrireEtat(etat); if (null != etatStr) { writer.append(etatStr); } } writer.close(); }
From source file:di.uniba.it.tee2.wiki.Wikidump2Text.java
/** * @param args the command line arguments *//*from w w w . j a va 2 s . c o m*/ public static void main(String[] args) { try { CommandLine cmd = cmdParser.parse(options, args); if (cmd.hasOption("l") && cmd.hasOption("d") && cmd.hasOption("o")) { encoding = cmd.getOptionValue("e", "UTF-8"); int counter = 0; try { BufferedWriter writer = new BufferedWriter(new OutputStreamWriter( new GZIPOutputStream(new FileOutputStream(cmd.getOptionValue("o"))), "UTF-8")); WikipediaDumpIterator it = new WikipediaDumpIterator(new File(cmd.getOptionValue("d")), encoding); PageCleaner cleaner = PageCleanerWrapper.getInstance(cmd.getOptionValue("l")); while (it.hasNext()) { WikiPage wikiPage = it.next(); ParsedPage parsedPage = wikiPage.getParsedPage(); if (parsedPage != null) { String title = wikiPage.getTitle(); if (!title.matches(notValidTitle)) { if (parsedPage.getText() != null) { writer.append(cleaner.clean(parsedPage.getText())); writer.newLine(); writer.newLine(); counter++; if (counter % 10000 == 0) { System.out.println(counter); writer.flush(); } } } } } writer.flush(); writer.close(); } catch (Exception ex) { Logger.getLogger(Wikidump2Text.class.getName()).log(Level.SEVERE, null, ex); } System.out.println("Indexed pages: " + counter); } else { HelpFormatter helpFormatter = new HelpFormatter(); helpFormatter.printHelp("Wikipedia dump to text", options, true); } } catch (ParseException ex) { Logger.getLogger(Wikidump2Text.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:fr.eo.util.dumper.JSONDumper.java
/** * @param args main args//from w w w.ja v a2 s.c om */ public static void main(String[] args) { String appName = args[0]; String jdbcConnectionType = args.length > 1 ? args[1] : "jtds"; System.out.println("Starting dumper ..."); try (Connection conn = getConnection(jdbcConnectionType)) { System.out.println("Getting database connection ..."); List<RequestDefinitionBean> requests = RequestDefinitionParser.getRequests(appName); baseFolder = RequestDefinitionParser.getAppBaseDir(appName) + "/"; System.out.println("Reading old table dumps..."); Map<String, JsonTableDump> oldTables = JSONDeltaDumper.readOldTables(baseFolder); List<JsonTableDump> newTables = new ArrayList<>(); for (RequestDefinitionBean request : requests) { try (Statement stmt = conn.createStatement()) { BufferedWriter bw = getWriter(request.name, baseFolder); if (!request.disabled) { System.out.println("Dumping " + request.name + "..."); ResultSet rs = stmt.executeQuery(request.sql); JsonTableWritable dump = new JsonTableWritable(); dump.name = request.table; while (rs.next()) { int pos = 0; for (String fieldName : request.fields) { Object obj = getFieldValue(request, pos, rs, fieldName); dump.addColumn(obj); pos++; } dump.commit(); } bw.append(dump.toJson()); newTables.add(dump); bw.flush(); bw.close(); } else { System.out.println("Skiping " + request.name + "..."); } } System.out.println("done."); } newTables.addAll(BlueprintDumper.dump(baseFolder)); newTables.addAll(TranslationsDumper.dump(baseFolder, jdbcConnectionType)); System.out.println("Computing delta..."); JSONDeltaDumper.computeDelta(oldTables, newTables, baseFolder); } catch (SQLException | ClassNotFoundException | IOException e) { e.printStackTrace(); } }
From source file:edu.cmu.lti.oaqa.knn4qa.apps.QueryGenNMSLIB.java
public static void main(String[] args) { Options options = new Options(); options.addOption(CommonParams.QUERY_FILE_PARAM, null, true, CommonParams.QUERY_FILE_DESC); options.addOption(CommonParams.MEMINDEX_PARAM, null, true, CommonParams.MEMINDEX_DESC); options.addOption(CommonParams.KNN_QUERIES_PARAM, null, true, CommonParams.KNN_QUERIES_DESC); options.addOption(CommonParams.NMSLIB_FIELDS_PARAM, null, true, CommonParams.NMSLIB_FIELDS_DESC); options.addOption(CommonParams.MAX_NUM_QUERY_PARAM, null, true, CommonParams.MAX_NUM_QUERY_DESC); options.addOption(CommonParams.SEL_PROB_PARAM, null, true, CommonParams.SEL_PROB_DESC); CommandLineParser parser = new org.apache.commons.cli.GnuParser(); BufferedWriter knnQueries = null; int maxNumQuery = Integer.MAX_VALUE; Float selProb = null;/*www .j a v a 2 s .c o m*/ try { CommandLine cmd = parser.parse(options, args); String queryFile = null; if (cmd.hasOption(CommonParams.QUERY_FILE_PARAM)) { queryFile = cmd.getOptionValue(CommonParams.QUERY_FILE_PARAM); } else { Usage("Specify 'query file'", options); } String knnQueriesFile = cmd.getOptionValue(CommonParams.KNN_QUERIES_PARAM); if (null == knnQueriesFile) Usage("Specify '" + CommonParams.KNN_QUERIES_DESC + "'", options); String tmpn = cmd.getOptionValue(CommonParams.MAX_NUM_QUERY_PARAM); if (tmpn != null) { try { maxNumQuery = Integer.parseInt(tmpn); } catch (NumberFormatException e) { Usage("Maximum number of queries isn't integer: '" + tmpn + "'", options); } } String tmps = cmd.getOptionValue(CommonParams.NMSLIB_FIELDS_PARAM); if (null == tmps) Usage("Specify '" + CommonParams.NMSLIB_FIELDS_DESC + "'", options); String nmslibFieldList[] = tmps.split(","); knnQueries = new BufferedWriter(new FileWriter(knnQueriesFile)); knnQueries.write("isQueryFile=1"); knnQueries.newLine(); knnQueries.newLine(); String memIndexPref = cmd.getOptionValue(CommonParams.MEMINDEX_PARAM); if (null == memIndexPref) { Usage("Specify '" + CommonParams.MEMINDEX_DESC + "'", options); } String tmpf = cmd.getOptionValue(CommonParams.SEL_PROB_PARAM); if (tmpf != null) { try { selProb = Float.parseFloat(tmpf); } catch (NumberFormatException e) { Usage("A selection probability isn't a number in the range (0,1)'" + tmpf + "'", options); } if (selProb < Float.MIN_NORMAL || selProb + Float.MIN_NORMAL >= 1) Usage("A selection probability isn't a number in the range (0,1)'" + tmpf + "'", options); } BufferedReader inpText = new BufferedReader( new InputStreamReader(CompressUtils.createInputStream(queryFile))); String docText = XmlHelper.readNextXMLIndexEntry(inpText); NmslibQueryGenerator queryGen = new NmslibQueryGenerator(nmslibFieldList, memIndexPref); Random rnd = new Random(); for (int docNum = 1; docNum <= maxNumQuery && docText != null; ++docNum, docText = XmlHelper.readNextXMLIndexEntry(inpText)) { if (selProb != null) { if (rnd.nextFloat() > selProb) continue; } Map<String, String> docFields = null; try { docFields = XmlHelper.parseXMLIndexEntry(docText); String queryObjStr = queryGen.getStrObjForKNNService(docFields); knnQueries.append(queryObjStr); knnQueries.newLine(); } catch (SAXException e) { System.err.println("Parsing error, offending DOC:" + NL + docText + " doc # " + docNum); throw new Exception("Parsing error."); } } knnQueries.close(); } catch (ParseException e) { Usage("Cannot parse arguments", options); if (null != knnQueries) try { knnQueries.close(); } catch (IOException e1) { e1.printStackTrace(); } } catch (Exception e) { System.err.println("Terminating due to an exception: " + e); try { if (knnQueries != null) knnQueries.close(); } catch (IOException e1) { e1.printStackTrace(); } System.exit(1); } System.out.println("Terminated successfully!"); }
From source file:diffhunter.DiffHunter.java
/** * @param args the command line arguments * @throws org.apache.commons.cli.ParseException * @throws java.io.IOException// w w w . j ava 2 s. c o m */ public static void main(String[] args) throws ParseException, IOException { //String test_ = Paths.get("J:\\VishalData\\additional\\", "Sasan" + "_BDB").toAbsolutePath().toString(); // TODO code application logic here /*args = new String[] { "-i", "-b", "J:\\VishalData\\additional\\Ptbp2_E18_5_cortex_CLIP_mm9_plus_strand_sorted.bed", "-r", "J:\\VishalData\\additional\\mouse_mm9.txt", "-o", "J:\\VishalData" };*/ /*args = new String[] { "-c", "-r", "J:\\VishalData\\additional\\mouse_mm9.txt", "-1", "J:\\VishalData\\Ptbp2_Adult_testis_CLIP_mm9_plus_strand_sorted_BDB", "-2", "J:\\VishalData\\Ptbp2_E18_5_cortex_CLIP_mm9_plus_strand_sorted_BDB", "-w", "200", "-s", "50", "-o", "J:\\VishalData" };*/ Options options = new Options(); // add t option options.addOption("i", "index", false, "Indexing BED files."); options.addOption("b", "bed", true, "bed file to be indexed"); options.addOption("o", "output", true, "Folder that the index/comparison file will be created."); options.addOption("r", "reference", true, "Reference annotation file to be used for indexing"); options.addOption("c", "compare", false, "Finding differences between two conditions"); options.addOption("1", "first", true, "First sample index location"); options.addOption("2", "second", true, "Second sample index location"); options.addOption("w", "window", true, "Length of window for identifying differences"); options.addOption("s", "sliding", true, "Length of sliding"); CommandLineParser parser = new BasicParser(); CommandLine cmd = parser.parse(options, args); boolean indexing = false; boolean comparing = false; //Indexing! if (cmd.hasOption("i")) { //if(cmd.hasOption("1")) //System.err.println("sasan"); //System.out.println("sasa"); indexing = true; } else if (cmd.hasOption("c")) { //System.err.println(""); comparing = true; } else { //System.err.println("Option is not deteced."); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("diffhunter", options); return; } //Indexing is selected // if (indexing == true) { //Since indexing is true. //User have to provide file for indexing. if (!(cmd.hasOption("o") || cmd.hasOption("r") || cmd.hasOption("b"))) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("diffhunter", options); return; } String bedfile_ = cmd.getOptionValue("b"); String reference_file = cmd.getOptionValue("r"); String folder_loc = cmd.getOptionValue("o"); String sample_name = FilenameUtils.getBaseName(bedfile_); try (Database B2 = BerkeleyDB_Box.Get_BerkeleyDB( Paths.get(folder_loc, sample_name + "_BDB").toAbsolutePath().toString(), true, sample_name)) { Indexer indexing_ = new Indexer(reference_file); indexing_.Make_Index(B2, bedfile_, Paths.get(folder_loc, sample_name + "_BDB").toAbsolutePath().toString()); B2.close(); } } else if (comparing == true) { if (!(cmd.hasOption("o") || cmd.hasOption("w") || cmd.hasOption("s") || cmd.hasOption("1") || cmd.hasOption("2"))) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("diffhunter", options); return; } String folder_loc = cmd.getOptionValue("o"); int window_ = Integer.parseInt(cmd.getOptionValue("w")); //int window_=600; int slide_ = Integer.parseInt(cmd.getOptionValue("s")); String first = cmd.getOptionValue("1").replace("_BDB", ""); String second = cmd.getOptionValue("2").replace("_BDB", ""); String reference_file = cmd.getOptionValue("r"); //String folder_loc=cmd.getOptionValue("o"); String sample_name_first = FilenameUtils.getBaseName(first); String sample_name_second = FilenameUtils.getBaseName(second); Database B1 = BerkeleyDB_Box.Get_BerkeleyDB(first + "_BDB", false, sample_name_first); Database B2 = BerkeleyDB_Box.Get_BerkeleyDB(second + "_BDB", false, sample_name_second); List<String> first_condition_genes = Files .lines(Paths.get(first + "_BDB", sample_name_first + ".txt").toAbsolutePath()) .collect(Collectors.toList()); List<String> second_condition_genes = Files .lines(Paths.get(second + "_BDB", sample_name_second + ".txt").toAbsolutePath()) .collect(Collectors.toList()); System.out.println("First and second condition are loaded!!! "); List<String> intersection_ = new ArrayList<>(first_condition_genes); intersection_.retainAll(second_condition_genes); BufferedWriter output = new BufferedWriter( new FileWriter(Paths.get(folder_loc, "differences_" + window_ + "_s" + slide_ + "_c" + ".txt") .toAbsolutePath().toString(), false)); List<Result_Window> final_results = Collections.synchronizedList(new ArrayList<>()); Worker_New worker_class = new Worker_New(); worker_class.Read_Reference(reference_file); while (!intersection_.isEmpty()) { List<String> selected_genes = new ArrayList<>(); //if (intersection_.size()<=10000){selected_genes.addAll(intersection_.subList(0, intersection_.size()));} //else selected_genes.addAll(intersection_.subList(0, 10000)); if (intersection_.size() <= intersection_.size()) { selected_genes.addAll(intersection_.subList(0, intersection_.size())); } else { selected_genes.addAll(intersection_.subList(0, intersection_.size())); } intersection_.removeAll(selected_genes); //System.out.println("Intersection count is:"+intersection_.size()); //final List<Result_Window> resultssss_=new ArrayList<>(); IntStream.range(0, selected_genes.size()).parallel().forEach(i -> { System.out.println(selected_genes.get(i) + "\tprocessing......"); String gene_of_interest = selected_genes.get(i);//"ENSG00000142657|PGD";//intersection_.get(6);////"ENSG00000163395|IGFN1";//"ENSG00000270066|SCARNA2"; int start = worker_class.dic_genes.get(gene_of_interest).start_loc; int end = worker_class.dic_genes.get(gene_of_interest).end_loc; Map<Integer, Integer> first_ = Collections.EMPTY_MAP; try { first_ = BerkeleyDB_Box.Get_Coord_Read(B1, gene_of_interest); } catch (IOException | ClassNotFoundException ex) { Logger.getLogger(DiffHunter.class.getName()).log(Level.SEVERE, null, ex); } Map<Integer, Integer> second_ = Collections.EMPTY_MAP; try { second_ = BerkeleyDB_Box.Get_Coord_Read(B2, gene_of_interest); } catch (IOException | ClassNotFoundException ex) { Logger.getLogger(DiffHunter.class.getName()).log(Level.SEVERE, null, ex); } List<Window> top_windows_first = worker_class.Get_Top_Windows(window_, first_, slide_); List<Window> top_windows_second = worker_class.Get_Top_Windows(window_, second_, slide_); //System.out.println("passed for window peak call for gene \t"+selected_genes.get(i)); // System.out.println("top_window_first_Count\t"+top_windows_first.size()); // System.out.println("top_window_second_Count\t"+top_windows_second.size()); if (top_windows_first.isEmpty() && top_windows_second.isEmpty()) { return; } List<Result_Window> res_temp = new Worker_New().Get_Significant_Windows(gene_of_interest, start, end, top_windows_first, top_windows_second, second_, first_, sample_name_first, sample_name_second, 0.01); if (!res_temp.isEmpty()) { final_results.addAll(res_temp);//final_results.addAll(worker_class.Get_Significant_Windows(gene_of_interest, start, end, top_windows_first, top_windows_second, second_, first_, first_condition, second_condition, 0.01)); } //System.out.println(selected_genes.get(i)+"\tprocessed."); }); /*selected_genes.parallelStream().forEach(i -> { });*/ List<Double> pvals = new ArrayList<>(); for (int i = 0; i < final_results.size(); i++) { pvals.add(final_results.get(i).p_value); } List<Double> qvals = MultipleTestCorrection.benjaminiHochberg(pvals); System.out.println("Writing to file..."); output.append("Gene_Symbol\tContributing_Sample\tStart\tEnd\tOddsRatio\tp_Value\tFDR"); output.newLine(); for (int i = 0; i < final_results.size(); i++) { Result_Window item = final_results.get(i); output.append(item.associated_gene_symbol + "\t" + item.contributing_windows + "\t" + item.start_loc + "\t" + item.end_loc + "\t" + item.oddsratio_ + "\t" + item.p_value + "\t" + qvals.get(i)); //+ "\t" + item.average_other_readcount_cotributing + "\t" + item.average_other_readcount_cotributing + "\t" + item.average_window_readcount_non + "\t" + item.average_other_readcount_non); output.newLine(); } /* for (Result_Window item : final_results) { output.append(item.associated_gene_symbol + "\t" + item.contributing_windows + "\t" + item.start_loc + "\t" + item.end_loc + "\t" + item.oddsratio_ + "\t" + item.p_value); //+ "\t" + item.average_other_readcount_cotributing + "\t" + item.average_other_readcount_cotributing + "\t" + item.average_window_readcount_non + "\t" + item.average_other_readcount_non); output.newLine(); } */ final_results.clear(); } output.close(); } System.out.println("Done."); }