List of usage examples for java.lang Double parseDouble
public static double parseDouble(String s) throws NumberFormatException
From source file:eqtlmappingpipeline.util.ModuleEqtlGeuvadisReplication.java
/** * @param args the command line arguments *//*from w ww.j av a 2s . c o m*/ public static void main(String[] args) throws IOException, LdCalculatorException { System.out.println(HEADER); System.out.println(); System.out.flush(); //flush to make sure header is before errors try { Thread.sleep(25); //Allows flush to complete } catch (InterruptedException ex) { } CommandLineParser parser = new PosixParser(); final CommandLine commandLine; try { commandLine = parser.parse(OPTIONS, args, true); } catch (ParseException ex) { System.err.println("Invalid command line arguments: " + ex.getMessage()); System.err.println(); new HelpFormatter().printHelp(" ", OPTIONS); System.exit(1); return; } final String[] genotypesBasePaths = commandLine.getOptionValues("g"); final RandomAccessGenotypeDataReaderFormats genotypeDataType; final String replicationQtlFilePath = commandLine.getOptionValue("e"); final String interactionQtlFilePath = commandLine.getOptionValue("i"); final String outputFilePath = commandLine.getOptionValue("o"); final double ldCutoff = Double.parseDouble(commandLine.getOptionValue("ld")); final int window = Integer.parseInt(commandLine.getOptionValue("w")); System.out.println("Genotype: " + Arrays.toString(genotypesBasePaths)); System.out.println("Interaction file: " + interactionQtlFilePath); System.out.println("Replication file: " + replicationQtlFilePath); System.out.println("Output: " + outputFilePath); System.out.println("LD: " + ldCutoff); System.out.println("Window: " + window); try { if (commandLine.hasOption("G")) { genotypeDataType = RandomAccessGenotypeDataReaderFormats .valueOf(commandLine.getOptionValue("G").toUpperCase()); } else { if (genotypesBasePaths[0].endsWith(".vcf")) { System.err.println( "Only vcf.gz is supported. Please see manual on how to do create a vcf.gz file."); System.exit(1); return; } try { genotypeDataType = RandomAccessGenotypeDataReaderFormats .matchFormatToPath(genotypesBasePaths[0]); } catch (GenotypeDataException e) { System.err .println("Unable to determine input 1 type based on specified path. Please specify -G"); System.exit(1); return; } } } catch (IllegalArgumentException e) { System.err.println("Error parsing --genotypesFormat \"" + commandLine.getOptionValue("G") + "\" is not a valid input data format"); System.exit(1); return; } final RandomAccessGenotypeData genotypeData; try { genotypeData = genotypeDataType.createFilteredGenotypeData(genotypesBasePaths, 100, null, null, null, 0.8); } catch (TabixFileNotFoundException e) { LOGGER.fatal("Tabix file not found for input data at: " + e.getPath() + "\n" + "Please see README on how to create a tabix file"); System.exit(1); return; } catch (IOException e) { LOGGER.fatal("Error reading input data: " + e.getMessage(), e); System.exit(1); return; } catch (IncompatibleMultiPartGenotypeDataException e) { LOGGER.fatal("Error combining the impute genotype data files: " + e.getMessage(), e); System.exit(1); return; } catch (GenotypeDataException e) { LOGGER.fatal("Error reading input data: " + e.getMessage(), e); System.exit(1); return; } ChrPosTreeMap<ArrayList<EQTL>> replicationQtls = new QTLTextFile(replicationQtlFilePath, false) .readQtlsAsTreeMap(); int interactionSnpNotInGenotypeData = 0; int noReplicationQtlsInWindow = 0; int noReplicationQtlsInLd = 0; int multipleReplicationQtlsInLd = 0; int replicationTopSnpNotInGenotypeData = 0; final CSVWriter outputWriter = new CSVWriter(new FileWriter(new File(outputFilePath)), '\t', '\0'); final String[] outputLine = new String[14]; int c = 0; outputLine[c++] = "Chr"; outputLine[c++] = "Pos"; outputLine[c++] = "SNP"; outputLine[c++] = "Gene"; outputLine[c++] = "Module"; outputLine[c++] = "DiscoveryZ"; outputLine[c++] = "ReplicationZ"; outputLine[c++] = "DiscoveryZCorrected"; outputLine[c++] = "ReplicationZCorrected"; outputLine[c++] = "DiscoveryAlleleAssessed"; outputLine[c++] = "ReplicationAlleleAssessed"; outputLine[c++] = "bestLd"; outputLine[c++] = "bestLd_dist"; outputLine[c++] = "nextLd"; outputWriter.writeNext(outputLine); HashSet<String> notFound = new HashSet<>(); CSVReader interactionQtlReader = new CSVReader(new FileReader(interactionQtlFilePath), '\t'); interactionQtlReader.readNext();//skip header String[] interactionQtlLine; while ((interactionQtlLine = interactionQtlReader.readNext()) != null) { String snp = interactionQtlLine[1]; String chr = interactionQtlLine[2]; int pos = Integer.parseInt(interactionQtlLine[3]); String gene = interactionQtlLine[4]; String alleleAssessed = interactionQtlLine[9]; String module = interactionQtlLine[12]; double discoveryZ = Double.parseDouble(interactionQtlLine[10]); GeneticVariant interactionQtlVariant = genotypeData.getSnpVariantByPos(chr, pos); if (interactionQtlVariant == null) { System.err.println("Interaction QTL SNP not found in genotype data: " + chr + ":" + pos); ++interactionSnpNotInGenotypeData; continue; } EQTL bestMatch = null; double bestMatchR2 = Double.NaN; Ld bestMatchLd = null; double nextBestR2 = Double.NaN; ArrayList<EQTL> sameSnpQtls = replicationQtls.get(chr, pos); if (sameSnpQtls != null) { for (EQTL sameSnpQtl : sameSnpQtls) { if (sameSnpQtl.getProbe().equals(gene)) { bestMatch = sameSnpQtl; bestMatchR2 = 1; } } } NavigableMap<Integer, ArrayList<EQTL>> potentionalReplicationQtls = replicationQtls.getChrRange(chr, pos - window, true, pos + window, true); for (ArrayList<EQTL> potentialReplicationQtls : potentionalReplicationQtls.values()) { for (EQTL potentialReplicationQtl : potentialReplicationQtls) { if (!potentialReplicationQtl.getProbe().equals(gene)) { continue; } GeneticVariant potentialReplicationQtlVariant = genotypeData.getSnpVariantByPos( potentialReplicationQtl.getRsChr().toString(), potentialReplicationQtl.getRsChrPos()); if (potentialReplicationQtlVariant == null) { notFound.add(potentialReplicationQtl.getRsChr().toString() + ":" + potentialReplicationQtl.getRsChrPos()); ++replicationTopSnpNotInGenotypeData; continue; } Ld ld = interactionQtlVariant.calculateLd(potentialReplicationQtlVariant); double r2 = ld.getR2(); if (r2 > 1) { r2 = 1; } if (bestMatch == null) { bestMatch = potentialReplicationQtl; bestMatchR2 = r2; bestMatchLd = ld; } else if (r2 > bestMatchR2) { bestMatch = potentialReplicationQtl; nextBestR2 = bestMatchR2; bestMatchR2 = r2; bestMatchLd = ld; } } } double replicationZ = Double.NaN; double replicationZCorrected = Double.NaN; double discoveryZCorrected = Double.NaN; String replicationAlleleAssessed = null; if (bestMatch != null) { replicationZ = bestMatch.getZscore(); replicationAlleleAssessed = bestMatch.getAlleleAssessed(); if (pos != bestMatch.getRsChrPos()) { String commonHap = null; double commonHapFreq = -1; for (Map.Entry<String, Double> hapFreq : bestMatchLd.getHaplotypesFreq().entrySet()) { double f = hapFreq.getValue(); if (f > commonHapFreq) { commonHapFreq = f; commonHap = hapFreq.getKey(); } } String[] commonHapAlleles = StringUtils.split(commonHap, '/'); discoveryZCorrected = commonHapAlleles[0].equals(alleleAssessed) ? discoveryZ : discoveryZ * -1; replicationZCorrected = commonHapAlleles[1].equals(replicationAlleleAssessed) ? replicationZ : replicationZ * -1; } else { discoveryZCorrected = discoveryZ; replicationZCorrected = alleleAssessed.equals(replicationAlleleAssessed) ? replicationZ : replicationZ * -1; } } c = 0; outputLine[c++] = chr; outputLine[c++] = String.valueOf(pos); outputLine[c++] = snp; outputLine[c++] = gene; outputLine[c++] = module; outputLine[c++] = String.valueOf(discoveryZ); outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(replicationZ); outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(discoveryZCorrected); outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(replicationZCorrected); outputLine[c++] = alleleAssessed; outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(bestMatch.getAlleleAssessed()); outputLine[c++] = String.valueOf(bestMatchR2); outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(Math.abs(pos - bestMatch.getRsChrPos())); outputLine[c++] = String.valueOf(nextBestR2); outputWriter.writeNext(outputLine); } outputWriter.close(); for (String e : notFound) { System.err.println("Not found: " + e); } System.out.println("interactionSnpNotInGenotypeData: " + interactionSnpNotInGenotypeData); System.out.println("noReplicationQtlsInWindow: " + noReplicationQtlsInWindow); System.out.println("noReplicationQtlsInLd: " + noReplicationQtlsInLd); System.out.println("multipleReplicationQtlsInLd: " + multipleReplicationQtlsInLd); System.out.println("replicationTopSnpNotInGenotypeData: " + replicationTopSnpNotInGenotypeData); }
From source file:cc.twittertools.stream.GatherStatusStream.java
@SuppressWarnings("static-access") public static void main(String[] args) throws TwitterException { Options options = new Options(); options.addOption(OptionBuilder.withArgName("list").hasArgs() .withDescription("comma-separated list of BCP 47 language identifiers").withLongOpt(LANGUAGE_OPTION) .create('l')); options.addOption(OptionBuilder.withArgName("list").hasArgs() .withDescription(/*from w ww. j a v a2 s .c o m*/ "comma-separated list of longitude,latitude pairs specifying a set of bounding boxes") .withLongOpt(LOCATIONS_OPTION).create('g')); options.addOption("n", NO_BOUNDING_BOX_OPTION, false, "do not consider places' bounding box"); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(RunQueriesThrift.class.getName(), options); System.exit(-1); } PatternLayout layoutStandard = new PatternLayout(); layoutStandard.setConversionPattern("[%p] %d %c %M - %m%n"); PatternLayout layoutSimple = new PatternLayout(); layoutSimple.setConversionPattern("%m%n"); // Filter for the statuses: we only want INFO messages LevelRangeFilter filter = new LevelRangeFilter(); filter.setLevelMax(Level.INFO); filter.setLevelMin(Level.INFO); filter.setAcceptOnMatch(true); filter.activateOptions(); TimeBasedRollingPolicy statusesRollingPolicy = new TimeBasedRollingPolicy(); statusesRollingPolicy.setFileNamePattern("statuses.log" + HOUR_ROLL); statusesRollingPolicy.activateOptions(); RollingFileAppender statusesAppender = new RollingFileAppender(); statusesAppender.setRollingPolicy(statusesRollingPolicy); statusesAppender.addFilter(filter); statusesAppender.setLayout(layoutSimple); statusesAppender.activateOptions(); TimeBasedRollingPolicy warningsRollingPolicy = new TimeBasedRollingPolicy(); warningsRollingPolicy.setFileNamePattern("warnings.log" + HOUR_ROLL); warningsRollingPolicy.activateOptions(); RollingFileAppender warningsAppender = new RollingFileAppender(); warningsAppender.setRollingPolicy(statusesRollingPolicy); warningsAppender.setThreshold(Level.WARN); warningsAppender.setLayout(layoutStandard); warningsAppender.activateOptions(); ConsoleAppender consoleAppender = new ConsoleAppender(); consoleAppender.setThreshold(Level.WARN); consoleAppender.setLayout(layoutStandard); consoleAppender.activateOptions(); // configures the root logger Logger rootLogger = Logger.getRootLogger(); rootLogger.setLevel(Level.INFO); rootLogger.removeAllAppenders(); rootLogger.addAppender(consoleAppender); rootLogger.addAppender(statusesAppender); rootLogger.addAppender(warningsAppender); // creates filters for the query FilterQuery fq = new FilterQuery(); StringBuilder criteria = new StringBuilder(); /* * @see https://dev.twitter.com/docs/streaming-apis/parameters#language */ final boolean filterLanguage = cmdline.hasOption(LANGUAGE_OPTION); String[] languages = null; if (filterLanguage) { languages = cmdline.getOptionValue(LANGUAGE_OPTION).split(","); fq.language(languages); criteria.append("languages: [" + cmdline.getOptionValue(LANGUAGE_OPTION) + "]\t"); } final String[] langs = languages; /* * @see https://dev.twitter.com/docs/streaming-apis/parameters#locations */ double[][] locations = null; if (cmdline.hasOption(LOCATIONS_OPTION)) { String[] locationsArg = cmdline.getOptionValue(LOCATIONS_OPTION).split(","); int nCoords = locationsArg.length; if (nCoords % 2 == 0) { int pairs = nCoords / 2; locations = new double[pairs][2]; int cnt = 0; for (int i = 0; i < pairs; i++) { locations[i][0] = Double.parseDouble(locationsArg[cnt]); cnt++; locations[i][1] = Double.parseDouble(locationsArg[cnt]); cnt++; } fq.locations(locations); criteria.append("locations: [" + cmdline.getOptionValue(LOCATIONS_OPTION) + "]\t"); } else { System.err.println("There is a missing coordinate. See " + "https://dev.twitter.com/docs/streaming-apis/parameters#locations"); System.exit(-1); } } else { fq.locations(new double[][] { { -180, -90 }, { 180, 90 } }); } final double[][] loc = locations; final boolean no_bounding_box = cmdline.hasOption(NO_BOUNDING_BOX_OPTION); if (no_bounding_box) { criteria.append("--no-bounding-box\t"); } // creates a custom logger and log messages final Logger logger = Logger.getLogger(GatherStatusStream.class); logger.info(criteria); RawStreamListener rawListener = new RawStreamListener() { @Override public void onMessage(String rawString) { if (no_bounding_box && loc != null) { try { JSONObject status = new JSONObject(rawString); JSONObject coordObj = status.getJSONObject("coordinates"); JSONArray coords = coordObj.getJSONArray("coordinates"); double longitude = coords.getDouble(0); double latitude = coords.getDouble(1); // checks location for (int i = 0; i < loc.length; i += 2) { if (((loc[i][0] <= longitude) && (longitude <= loc[i + 1][0])) || ((loc[i][1] <= latitude) && (latitude <= loc[i + 1][1]))) { break; } else if (i == loc.length - 1) return; } } catch (JSONException e) { /* Either "Coordinates" is null or trash is coming*/ return; } } if (filterLanguage) { try { JSONObject status = new JSONObject(rawString); // checks language String lang = status.getString("lang"); for (int i = 0; i < langs.length; i++) { if (langs[i].equals(lang)) break; else if (i == langs.length - 1) return; } } catch (JSONException e) { /* Trash is coming */ return; } } cnt++; logger.info(rawString); if (cnt % 1000 == 0) { System.out.println(cnt + " messages received."); } } @Override public void onException(Exception ex) { logger.warn(ex); } }; TwitterStream twitterStream = new TwitterStreamFactory().getInstance(); twitterStream.addListener(rawListener); twitterStream.filter(fq); }
From source file:eu.fbk.utils.lsa.util.Anvur.java
public static void main(String[] args) throws Exception { String logConfig = System.getProperty("log-config"); if (logConfig == null) { logConfig = "log-config.txt"; }/*from ww w .j a v a 2s .c o m*/ PropertyConfigurator.configure(logConfig); /* if (args.length != 2) { log.println("Usage: java -mx512M eu.fbk.utils.lsa.util.Anvur in-file out-dir"); System.exit(1); } File l = new File(args[1]); if (!l.exists()) { l.mkdir(); } List<String[]> list = readText(new File(args[0])); String oldCategory = ""; for (int i=0;i<list.size();i++) { String[] s = list.get(i); if (!oldCategory.equals(s[0])) { File f = new File(args[1] + File.separator + s[0]); boolean b = f.mkdir(); logger.debug(f + " created " + b); } File g = new File(args[1] + File.separator + s[0] + File.separator + s[1] + ".txt"); logger.debug("writing " + g + "..."); PrintWriter pw = new PrintWriter(new FileWriter(g)); //pw.println(tokenize(s[1].substring(0, s[1].indexOf(".")).replace('_', ' ') + " " + s[2])); if (s.length == 5) { pw.println(tokenize(s[1].substring(0, s[1].indexOf(".")).replace('_', ' ') + " " + s[2] + " " + s[4].replace('_', ' '))); } else { pw.println(tokenize(s[1].substring(0, s[1].indexOf(".")).replace('_', ' ') + " " + s[2])); } pw.flush(); pw.close(); } // end for i */ if (args.length != 7) { System.out.println(args.length); System.out.println( "Usage: java -mx2G eu.fbk.utils.lsa.util.Anvur input threshold size dim idf in-file-csv fields\n\n"); System.exit(1); } // DecimalFormat dec = new DecimalFormat("#.00"); File Ut = new File(args[0] + "-Ut"); File Sk = new File(args[0] + "-S"); File r = new File(args[0] + "-row"); File c = new File(args[0] + "-col"); File df = new File(args[0] + "-df"); double threshold = Double.parseDouble(args[1]); int size = Integer.parseInt(args[2]); int dim = Integer.parseInt(args[3]); boolean rescaleIdf = Boolean.parseBoolean(args[4]); //"author_check"0, "authors"1, "title"2, "year"3, "pubtype"4, "publisher"5, "journal"6, "volume"7, "number"8, "pages"9, "abstract"10, "nauthors", "citedby" String[] labels = { "author_check", "authors", "title", "year", "pubtype", "publisher", "journal", "volume", "number", "pages", "abstract", "nauthors", "citedby" //author_id authors title year pubtype publisher journal volume number pages abstract nauthors citedby }; String name = buildName(labels, args[6]); File bwf = new File(args[5] + name + "-bow.txt"); PrintWriter bw = new PrintWriter( new BufferedWriter(new OutputStreamWriter(new FileOutputStream(bwf), "UTF-8"))); File bdf = new File(args[5] + name + "-bow.csv"); PrintWriter bd = new PrintWriter( new BufferedWriter(new OutputStreamWriter(new FileOutputStream(bdf), "UTF-8"))); File lwf = new File(args[5] + name + "-ls.txt"); PrintWriter lw = new PrintWriter( new BufferedWriter(new OutputStreamWriter(new FileOutputStream(lwf), "UTF-8"))); File ldf = new File(args[5] + name + "-ls.csv"); PrintWriter ld = new PrintWriter( new BufferedWriter(new OutputStreamWriter(new FileOutputStream(ldf), "UTF-8"))); File blwf = new File(args[5] + name + "-bow+ls.txt"); PrintWriter blw = new PrintWriter( new BufferedWriter(new OutputStreamWriter(new FileOutputStream(blwf), "UTF-8"))); File bldf = new File(args[5] + name + "-bow+ls.csv"); PrintWriter bld = new PrintWriter( new BufferedWriter(new OutputStreamWriter(new FileOutputStream(bldf), "UTF-8"))); File logf = new File(args[5] + name + ".log"); PrintWriter log = new PrintWriter( new BufferedWriter(new OutputStreamWriter(new FileOutputStream(logf), "UTF-8"))); //System.exit(0); LSM lsm = new LSM(Ut, Sk, r, c, df, dim, rescaleIdf); LSSimilarity lss = new LSSimilarity(lsm, size); List<String[]> list = readText(new File(args[5])); // author_check authors title year pubtype publisher journal volume number pages abstract nauthors citedby //header for (int i = 0; i < list.size(); i++) { String[] s1 = list.get(i); String t1 = s1[0].toLowerCase(); bw.print("\t"); lw.print("\t"); blw.print("\t"); bw.print(i + "(" + s1[0] + ")"); lw.print(i + "(" + s1[0] + ")"); blw.print(i + "(" + s1[0] + ")"); } // end for i bw.print("\n"); lw.print("\n"); blw.print("\n"); for (int i = 0; i < list.size(); i++) { logger.info(i + "\t"); String[] s1 = list.get(i); String t1 = buildText(s1, args[6]); BOW bow1 = new BOW(t1); logger.info(bow1); Vector d1 = lsm.mapDocument(bow1); d1.normalize(); log.println("d1:" + d1); Vector pd1 = lsm.mapPseudoDocument(d1); pd1.normalize(); log.println("pd1:" + pd1); Vector m1 = merge(pd1, d1); log.println("m1:" + m1); // write the orginal line for (int j = 0; j < s1.length; j++) { bd.print(s1[j]); bd.print("\t"); ld.print(s1[j]); ld.print("\t"); bld.print(s1[j]); bld.print("\t"); } // write the bow, ls, and bow+ls vectors bd.println(d1); ld.println(pd1); bld.println(m1); bw.print(i + "(" + s1[0] + ")"); lw.print(i + "(" + s1[0] + ")"); blw.print(i + "(" + s1[0] + ")"); for (int j = 0; j < i + 1; j++) { bw.print("\t"); lw.print("\t"); blw.print("\t"); } // end for j for (int j = i + 1; j < list.size(); j++) { logger.info(i + "\t" + j); String[] s2 = list.get(j); String t2 = buildText(s2, args[6]); BOW bow2 = new BOW(t2); log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") t1:" + t1); log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") t2:" + t2); log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") bow1:" + bow1); log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") bow2:" + bow2); Vector d2 = lsm.mapDocument(bow2); d2.normalize(); log.println("d2:" + d2); Vector pd2 = lsm.mapPseudoDocument(d2); pd2.normalize(); log.println("pd2:" + pd2); Vector m2 = merge(pd2, d2); log.println("m2:" + m2); float cosVSM = d1.dotProduct(d2) / (float) Math.sqrt(d1.dotProduct(d1) * d2.dotProduct(d2)); float cosLSM = pd1.dotProduct(pd2) / (float) Math.sqrt(pd1.dotProduct(pd1) * pd2.dotProduct(pd2)); float cosBOWLSM = m1.dotProduct(m2) / (float) Math.sqrt(m1.dotProduct(m1) * m2.dotProduct(m2)); bw.print("\t"); bw.print(dec.format(cosVSM)); lw.print("\t"); lw.print(dec.format(cosLSM)); blw.print("\t"); blw.print(dec.format(cosBOWLSM)); log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") bow\t" + cosVSM); log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") ls:\t" + cosLSM); log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") bow+ls:\t" + cosBOWLSM); } bw.print("\n"); lw.print("\n"); blw.print("\n"); } // end for i logger.info("wrote " + bwf); logger.info("wrote " + bwf); logger.info("wrote " + bdf); logger.info("wrote " + lwf); logger.info("wrote " + ldf); logger.info("wrote " + blwf); logger.info("wrote " + bldf); logger.info("wrote " + logf); ld.close(); bd.close(); bld.close(); bw.close(); lw.close(); blw.close(); log.close(); }
From source file:CTmousetrack.java
public static void main(String[] args) { String outLoc = new String("." + File.separator + "CTdata"); // Location of the base output data folder; only used when writing out CT data to a local folder String srcName = "CTmousetrack"; // name of the output CT source long blockPts = 10; // points per block flush long sampInterval = 10; // time between sampling updates, msec double trimTime = 0.0; // amount of data to keep (trim time), sec boolean debug = false; // turn on debug? // Specify the CT output connection CTWriteMode writeMode = CTWriteMode.LOCAL; // The selected mode for writing out CT data String serverHost = ""; // Server (FTP or HTTP/S) host:port String serverUser = ""; // Server (FTP or HTTPS) username String serverPassword = ""; // Server (FTP or HTTPS) password // For UDP output mode DatagramSocket udpServerSocket = null; InetAddress udpServerAddress = null; String udpHost = ""; int udpPort = -1; // Concatenate all of the CTWriteMode types String possibleWriteModes = ""; for (CTWriteMode wm : CTWriteMode.values()) { possibleWriteModes = possibleWriteModes + ", " + wm.name(); }//from ww w . j a v a2 s .c o m // Remove ", " from start of string possibleWriteModes = possibleWriteModes.substring(2); // // Argument processing using Apache Commons CLI // // 1. Setup command line options Options options = new Options(); options.addOption("h", "help", false, "Print this message."); options.addOption(Option.builder("o").argName("base output dir").hasArg().desc( "Base output directory when writing data to local folder (i.e., this is the location of CTdata folder); default = \"" + outLoc + "\".") .build()); options.addOption(Option.builder("s").argName("source name").hasArg() .desc("Name of source to write data to; default = \"" + srcName + "\".").build()); options.addOption(Option.builder("b").argName("points per block").hasArg() .desc("Number of points per block; UDP output mode will use 1 point/block; default = " + Long.toString(blockPts) + ".") .build()); options.addOption(Option.builder("dt").argName("samp interval msec").hasArg() .desc("Sampling period in msec; default = " + Long.toString(sampInterval) + ".").build()); options.addOption(Option.builder("t").argName("trim time sec").hasArg().desc( "Trim (ring-buffer loop) time (sec); this is only used when writing data to local folder; specify 0 for indefinite; default = " + Double.toString(trimTime) + ".") .build()); options.addOption( Option.builder("w").argName("write mode").hasArg() .desc("Type of write connection; one of " + possibleWriteModes + "; all but UDP mode write out to CT; default = " + writeMode.name() + ".") .build()); options.addOption(Option.builder("host").argName("host[:port]").hasArg() .desc("Host:port when writing via FTP, HTTP, HTTPS, UDP.").build()); options.addOption(Option.builder("u").argName("username,password").hasArg() .desc("Comma-delimited username and password when writing to CT via FTP or HTTPS.").build()); options.addOption("x", "debug", false, "Enable CloudTurbine debug output."); // 2. Parse command line options CommandLineParser parser = new DefaultParser(); CommandLine line = null; try { line = parser.parse(options, args); } catch (ParseException exp) { // oops, something went wrong System.err.println("Command line argument parsing failed: " + exp.getMessage()); return; } // 3. Retrieve the command line values if (line.hasOption("help")) { // Display help message and quit HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp("CTmousetrack", "", options, "NOTE: UDP output is a special non-CT output mode where single x,y points are sent via UDP to the specified host:port."); return; } outLoc = line.getOptionValue("o", outLoc); if (!outLoc.endsWith("\\") && !outLoc.endsWith("/")) { outLoc = outLoc + File.separator; } // Make sure the base output folder location ends in "CTdata" if (!outLoc.endsWith("CTdata\\") && !outLoc.endsWith("CTdata/")) { outLoc = outLoc + "CTdata" + File.separator; } srcName = line.getOptionValue("s", srcName); blockPts = Long.parseLong(line.getOptionValue("b", Long.toString(blockPts))); sampInterval = Long.parseLong(line.getOptionValue("dt", Long.toString(sampInterval))); trimTime = Double.parseDouble(line.getOptionValue("t", Double.toString(trimTime))); // Type of output connection String writeModeStr = line.getOptionValue("w", writeMode.name()); boolean bMatch = false; for (CTWriteMode wm : CTWriteMode.values()) { if (wm.name().toLowerCase().equals(writeModeStr.toLowerCase())) { writeMode = wm; bMatch = true; } } if (!bMatch) { System.err.println("Unrecognized write mode, \"" + writeModeStr + "\"; write mode must be one of " + possibleWriteModes); System.exit(0); } if (writeMode != CTWriteMode.LOCAL) { // User must have specified the host // If FTP or HTTPS, they may also specify username/password serverHost = line.getOptionValue("host", serverHost); if (serverHost.isEmpty()) { System.err.println( "When using write mode \"" + writeModeStr + "\", you must specify the server host."); System.exit(0); } if (writeMode == CTWriteMode.UDP) { // Force blockPts to be 1 blockPts = 1; // User must have specified both host and port int colonIdx = serverHost.indexOf(':'); if ((colonIdx == -1) || (colonIdx >= serverHost.length() - 1)) { System.err.println( "For UDP output mode, both the host and port (<host>:<port>)) must be specified."); System.exit(0); } udpHost = serverHost.substring(0, colonIdx); String udpPortStr = serverHost.substring(colonIdx + 1); try { udpPort = Integer.parseInt(udpPortStr); } catch (NumberFormatException nfe) { System.err.println("The UDP port must be a positive integer."); System.exit(0); } } if ((writeMode == CTWriteMode.FTP) || (writeMode == CTWriteMode.HTTPS)) { String userpassStr = line.getOptionValue("u", ""); if (!userpassStr.isEmpty()) { // This string should be comma-delimited username and password String[] userpassCSV = userpassStr.split(","); if (userpassCSV.length != 2) { System.err.println("When specifying a username and password for write mode \"" + writeModeStr + "\", separate the username and password by a comma."); System.exit(0); } serverUser = userpassCSV[0]; serverPassword = userpassCSV[1]; } } } debug = line.hasOption("debug"); System.err.println("CTmousetrack parameters:"); System.err.println("\toutput mode = " + writeMode.name()); if (writeMode == CTWriteMode.UDP) { System.err.println("\twrite to " + udpHost + ":" + udpPort); } else { System.err.println("\tsource = " + srcName); System.err.println("\ttrim time = " + trimTime + " sec"); } System.err.println("\tpoints per block = " + blockPts); System.err.println("\tsample interval = " + sampInterval + " msec"); try { // // Setup CTwriter or UDP output // CTwriter ctw = null; CTinfo.setDebug(debug); if (writeMode == CTWriteMode.LOCAL) { ctw = new CTwriter(outLoc + srcName, trimTime); System.err.println("\tdata will be written to local folder \"" + outLoc + "\""); } else if (writeMode == CTWriteMode.FTP) { CTftp ctftp = new CTftp(srcName); try { ctftp.login(serverHost, serverUser, serverPassword); } catch (Exception e) { throw new IOException( new String("Error logging into FTP server \"" + serverHost + "\":\n" + e.getMessage())); } ctw = ctftp; // upcast to CTWriter System.err.println("\tdata will be written to FTP server at " + serverHost); } else if (writeMode == CTWriteMode.HTTP) { // Don't send username/pw in HTTP mode since they will be unencrypted CThttp cthttp = new CThttp(srcName, "http://" + serverHost); ctw = cthttp; // upcast to CTWriter System.err.println("\tdata will be written to HTTP server at " + serverHost); } else if (writeMode == CTWriteMode.HTTPS) { CThttp cthttp = new CThttp(srcName, "https://" + serverHost); // Username/pw are optional for HTTPS mode; only use them if username is not empty if (!serverUser.isEmpty()) { try { cthttp.login(serverUser, serverPassword); } catch (Exception e) { throw new IOException(new String( "Error logging into HTTP server \"" + serverHost + "\":\n" + e.getMessage())); } } ctw = cthttp; // upcast to CTWriter System.err.println("\tdata will be written to HTTPS server at " + serverHost); } else if (writeMode == CTWriteMode.UDP) { try { udpServerSocket = new DatagramSocket(); } catch (SocketException se) { System.err.println("Error creating socket for UDP:\n" + se); System.exit(0); } try { udpServerAddress = InetAddress.getByName(udpHost); } catch (UnknownHostException uhe) { System.err.println("Error getting UDP server host address:\n" + uhe); System.exit(0); } } if (writeMode != CTWriteMode.UDP) { ctw.setBlockMode(blockPts > 1, blockPts > 1); ctw.autoFlush(0); // no autoflush ctw.autoSegment(1000); } // screen dims Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize(); double width = screenSize.getWidth(); double height = screenSize.getHeight(); // use Map for consolidated putData Map<String, Object> cmap = new LinkedHashMap<String, Object>(); // loop and write some output for (int i = 0; i < 1000000; i++) { // go until killed long currentTime = System.currentTimeMillis(); Point mousePos = MouseInfo.getPointerInfo().getLocation(); float x_pt = (float) (mousePos.getX() / width); // normalize float y_pt = (float) ((height - mousePos.getY()) / height); // flip Y (so bottom=0) if (writeMode != CTWriteMode.UDP) { // CT output mode ctw.setTime(currentTime); cmap.clear(); cmap.put("x", x_pt); cmap.put("y", y_pt); ctw.putData(cmap); if (((i + 1) % blockPts) == 0) { ctw.flush(); System.err.print("."); } } else { // UDP output mode // We force blockPts to be 1 for UDP output mode, i.e. we "flush" the data every time // Write the following data (21 bytes total): // header = "MOUSE", 5 bytes // current time, long, 8 bytes // 2 floats (x,y) 4 bytes each, 8 bytes int len = 21; ByteBuffer bb = ByteBuffer.allocate(len); String headerStr = "MOUSE"; bb.put(headerStr.getBytes("UTF-8")); bb.putLong(currentTime); bb.putFloat(x_pt); bb.putFloat(y_pt); // Might be able to use the following, but not sure: // byte[] sendData = bb.array(); byte[] sendData = new byte[len]; bb.position(0); bb.get(sendData, 0, len); DatagramPacket sendPacket = new DatagramPacket(sendData, sendData.length, udpServerAddress, udpPort); try { udpServerSocket.send(sendPacket); } catch (IOException e) { System.err.println("Test server caught exception trying to send data to UDP client:\n" + e); } System.err.print("."); } try { Thread.sleep(sampInterval); } catch (Exception e) { } ; } if (writeMode != CTWriteMode.UDP) { ctw.flush(); // wrap up } } catch (Exception e) { System.err.println("CTmousetrack exception: " + e); e.printStackTrace(); } }
From source file:eqtlmappingpipeline.util.ModuleEqtlNeutrophilReplication.java
/** * @param args the command line arguments *///from w w w .j a v a 2 s . c o m public static void main(String[] args) throws IOException, LdCalculatorException { System.out.println(HEADER); System.out.println(); System.out.flush(); //flush to make sure header is before errors try { Thread.sleep(25); //Allows flush to complete } catch (InterruptedException ex) { } CommandLineParser parser = new PosixParser(); final CommandLine commandLine; try { commandLine = parser.parse(OPTIONS, args, true); } catch (ParseException ex) { System.err.println("Invalid command line arguments: " + ex.getMessage()); System.err.println(); new HelpFormatter().printHelp(" ", OPTIONS); System.exit(1); return; } final String[] genotypesBasePaths = commandLine.getOptionValues("g"); final RandomAccessGenotypeDataReaderFormats genotypeDataType; final String replicationQtlFilePath = commandLine.getOptionValue("e"); final String interactionQtlFilePath = commandLine.getOptionValue("i"); final String outputFilePath = commandLine.getOptionValue("o"); final double ldCutoff = Double.parseDouble(commandLine.getOptionValue("ld")); final int window = Integer.parseInt(commandLine.getOptionValue("w")); System.out.println("Genotype: " + Arrays.toString(genotypesBasePaths)); System.out.println("Interaction file: " + interactionQtlFilePath); System.out.println("Replication file: " + replicationQtlFilePath); System.out.println("Output: " + outputFilePath); System.out.println("LD: " + ldCutoff); System.out.println("Window: " + window); try { if (commandLine.hasOption("G")) { genotypeDataType = RandomAccessGenotypeDataReaderFormats .valueOf(commandLine.getOptionValue("G").toUpperCase()); } else { if (genotypesBasePaths[0].endsWith(".vcf")) { System.err.println( "Only vcf.gz is supported. Please see manual on how to do create a vcf.gz file."); System.exit(1); return; } try { genotypeDataType = RandomAccessGenotypeDataReaderFormats .matchFormatToPath(genotypesBasePaths[0]); } catch (GenotypeDataException e) { System.err .println("Unable to determine input 1 type based on specified path. Please specify -G"); System.exit(1); return; } } } catch (IllegalArgumentException e) { System.err.println("Error parsing --genotypesFormat \"" + commandLine.getOptionValue("G") + "\" is not a valid input data format"); System.exit(1); return; } final RandomAccessGenotypeData genotypeData; try { genotypeData = genotypeDataType.createFilteredGenotypeData(genotypesBasePaths, 100, null, null, null, 0.8); } catch (TabixFileNotFoundException e) { LOGGER.fatal("Tabix file not found for input data at: " + e.getPath() + "\n" + "Please see README on how to create a tabix file"); System.exit(1); return; } catch (IOException e) { LOGGER.fatal("Error reading input data: " + e.getMessage(), e); System.exit(1); return; } catch (IncompatibleMultiPartGenotypeDataException e) { LOGGER.fatal("Error combining the impute genotype data files: " + e.getMessage(), e); System.exit(1); return; } catch (GenotypeDataException e) { LOGGER.fatal("Error reading input data: " + e.getMessage(), e); System.exit(1); return; } ChrPosTreeMap<ArrayList<ReplicationQtl>> replicationQtls = new ChrPosTreeMap<>(); CSVReader replicationQtlReader = new CSVReader(new FileReader(replicationQtlFilePath), '\t'); replicationQtlReader.readNext();//skip header String[] replicationLine; while ((replicationLine = replicationQtlReader.readNext()) != null) { try { GeneticVariant variant = genotypeData.getSnpVariantByPos(replicationLine[REPLICATION_SNP_CHR_COL], Integer.parseInt(replicationLine[REPLICATION_SNP_POS_COL])); if (variant == null) { continue; } ReplicationQtl replicationQtl = new ReplicationQtl(replicationLine[REPLICATION_SNP_CHR_COL], Integer.parseInt(replicationLine[REPLICATION_SNP_POS_COL]), replicationLine[REPLICATION_GENE_COL], Double.parseDouble(replicationLine[REPLICATION_BETA_COL]), variant.getAlternativeAlleles().get(0).getAlleleAsString()); ArrayList<ReplicationQtl> posReplicationQtls = replicationQtls.get(replicationQtl.getChr(), replicationQtl.getPos()); if (posReplicationQtls == null) { posReplicationQtls = new ArrayList<>(); replicationQtls.put(replicationQtl.getChr(), replicationQtl.getPos(), posReplicationQtls); } posReplicationQtls.add(replicationQtl); } catch (Exception e) { System.out.println(Arrays.toString(replicationLine)); throw e; } } int interactionSnpNotInGenotypeData = 0; int noReplicationQtlsInWindow = 0; int noReplicationQtlsInLd = 0; int multipleReplicationQtlsInLd = 0; int replicationTopSnpNotInGenotypeData = 0; final CSVWriter outputWriter = new CSVWriter(new FileWriter(new File(outputFilePath)), '\t', '\0'); final String[] outputLine = new String[14]; int c = 0; outputLine[c++] = "Chr"; outputLine[c++] = "Pos"; outputLine[c++] = "SNP"; outputLine[c++] = "Gene"; outputLine[c++] = "Module"; outputLine[c++] = "DiscoveryZ"; outputLine[c++] = "ReplicationZ"; outputLine[c++] = "DiscoveryZCorrected"; outputLine[c++] = "ReplicationZCorrected"; outputLine[c++] = "DiscoveryAlleleAssessed"; outputLine[c++] = "ReplicationAlleleAssessed"; outputLine[c++] = "bestLd"; outputLine[c++] = "bestLd_dist"; outputLine[c++] = "nextLd"; outputWriter.writeNext(outputLine); HashSet<String> notFound = new HashSet<>(); CSVReader interactionQtlReader = new CSVReader(new FileReader(interactionQtlFilePath), '\t'); interactionQtlReader.readNext();//skip header String[] interactionQtlLine; while ((interactionQtlLine = interactionQtlReader.readNext()) != null) { String snp = interactionQtlLine[1]; String chr = interactionQtlLine[2]; int pos = Integer.parseInt(interactionQtlLine[3]); String gene = interactionQtlLine[4]; String alleleAssessed = interactionQtlLine[9]; String module = interactionQtlLine[12]; double discoveryZ = Double.parseDouble(interactionQtlLine[10]); GeneticVariant interactionQtlVariant = genotypeData.getSnpVariantByPos(chr, pos); if (interactionQtlVariant == null) { System.err.println("Interaction QTL SNP not found in genotype data: " + chr + ":" + pos); ++interactionSnpNotInGenotypeData; continue; } ReplicationQtl bestMatch = null; double bestMatchR2 = Double.NaN; Ld bestMatchLd = null; double nextBestR2 = Double.NaN; ArrayList<ReplicationQtl> sameSnpQtls = replicationQtls.get(chr, pos); if (sameSnpQtls != null) { for (ReplicationQtl sameSnpQtl : sameSnpQtls) { if (sameSnpQtl.getGene().equals(gene)) { bestMatch = sameSnpQtl; bestMatchR2 = 1; } } } NavigableMap<Integer, ArrayList<ReplicationQtl>> potentionalReplicationQtls = replicationQtls .getChrRange(chr, pos - window, true, pos + window, true); for (ArrayList<ReplicationQtl> potentialReplicationQtls : potentionalReplicationQtls.values()) { for (ReplicationQtl potentialReplicationQtl : potentialReplicationQtls) { if (!potentialReplicationQtl.getGene().equals(gene)) { continue; } GeneticVariant potentialReplicationQtlVariant = genotypeData .getSnpVariantByPos(potentialReplicationQtl.getChr(), potentialReplicationQtl.getPos()); if (potentialReplicationQtlVariant == null) { notFound.add(potentialReplicationQtl.getChr() + ":" + potentialReplicationQtl.getPos()); ++replicationTopSnpNotInGenotypeData; continue; } Ld ld = interactionQtlVariant.calculateLd(potentialReplicationQtlVariant); double r2 = ld.getR2(); if (r2 > 1) { r2 = 1; } if (bestMatch == null) { bestMatch = potentialReplicationQtl; bestMatchR2 = r2; bestMatchLd = ld; } else if (r2 > bestMatchR2) { bestMatch = potentialReplicationQtl; nextBestR2 = bestMatchR2; bestMatchR2 = r2; bestMatchLd = ld; } } } double replicationZ = Double.NaN; double replicationZCorrected = Double.NaN; double discoveryZCorrected = Double.NaN; String replicationAlleleAssessed = null; if (bestMatch != null) { replicationZ = bestMatch.getBeta(); replicationAlleleAssessed = bestMatch.getAssessedAllele(); if (pos != bestMatch.getPos()) { String commonHap = null; double commonHapFreq = -1; for (Map.Entry<String, Double> hapFreq : bestMatchLd.getHaplotypesFreq().entrySet()) { double f = hapFreq.getValue(); if (f > commonHapFreq) { commonHapFreq = f; commonHap = hapFreq.getKey(); } } String[] commonHapAlleles = StringUtils.split(commonHap, '/'); discoveryZCorrected = commonHapAlleles[0].equals(alleleAssessed) ? discoveryZ : discoveryZ * -1; replicationZCorrected = commonHapAlleles[1].equals(replicationAlleleAssessed) ? replicationZ : replicationZ * -1; } else { discoveryZCorrected = discoveryZ; replicationZCorrected = alleleAssessed.equals(replicationAlleleAssessed) ? replicationZ : replicationZ * -1; } } c = 0; outputLine[c++] = chr; outputLine[c++] = String.valueOf(pos); outputLine[c++] = snp; outputLine[c++] = gene; outputLine[c++] = module; outputLine[c++] = String.valueOf(discoveryZ); outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(replicationZ); outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(discoveryZCorrected); outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(replicationZCorrected); outputLine[c++] = alleleAssessed; outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(bestMatch.getAssessedAllele()); outputLine[c++] = String.valueOf(bestMatchR2); outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(Math.abs(pos - bestMatch.getPos())); outputLine[c++] = String.valueOf(nextBestR2); outputWriter.writeNext(outputLine); } outputWriter.close(); for (String e : notFound) { System.err.println("Not found: " + e); } System.out.println("interactionSnpNotInGenotypeData: " + interactionSnpNotInGenotypeData); System.out.println("noReplicationQtlsInWindow: " + noReplicationQtlsInWindow); System.out.println("noReplicationQtlsInLd: " + noReplicationQtlsInLd); System.out.println("multipleReplicationQtlsInLd: " + multipleReplicationQtlsInLd); System.out.println("replicationTopSnpNotInGenotypeData: " + replicationTopSnpNotInGenotypeData); }
From source file:discovery.compression.kdd2011.ratio.RatioCompressionReport.java
public static void main(String[] args) throws GraphReadingException, IOException, java.text.ParseException { opts.addOption("r", true, "Goal compression ratio"); // opts.addOption( "a", // true, // "Algorithm used for compression. The default and only currently available option is \"greedy\""); //opts.addOption("cost-output",true,"Output file for costs, default is costs.txt"); //opts.addOption("cost-format",true,"Output format for "); opts.addOption("ctype", true, "Connectivity type: global or local, default is global."); opts.addOption("connectivity", false, "enables output for connectivity. Connectivity info will be written to connectivity.txt"); opts.addOption("output_bmg", true, "Write bmg file with groups to given file."); opts.addOption("algorithm", true, "Algorithm to use, one of: greedy random1 random2 bruteforce slowgreedy"); opts.addOption("hop2", false, "Only try to merge nodes that have common neighbors"); opts.addOption("kmedoids", false, "Enables output for kmedoids clustering"); opts.addOption("kmedoids_k", true, "Number of clusters to be used in kmedoids. Default is 3"); opts.addOption("kmedoids_output", true, "Output file for kmedoid clusters. Default is clusters.txt. This file will be overwritten."); opts.addOption("norefresh", false, "Use old style merging: all connectivities are not refreshed when merging"); opts.addOption("edge_attribute", true, "Attribute from bmgraph used as edge weight"); opts.addOption("only_times", false, "Only write times.txt"); //opts.addOption("no_metrics",false,"Exit after compression, don't calculate any metrics or produce output bmg for the compression."); CommandLineParser parser = new PosixParser(); CommandLine cmd = null;/*from ww w .j ava 2 s. c o m*/ try { cmd = parser.parse(opts, args); } catch (ParseException e) { e.printStackTrace(); System.exit(0); } boolean connectivity = false; double ratio = 0; boolean hop2 = cmd.hasOption("hop2"); RatioCompression compression = new GreedyRatioCompression(hop2); if (cmd.hasOption("connectivity")) connectivity = true; ConnectivityType ctype = ConnectivityType.GLOBAL; CompressionMergeModel mergeModel = new PathAverageMergeModel(); if (cmd.hasOption("ctype")) { String ctypeStr = cmd.getOptionValue("ctype"); if (ctypeStr.equals("local")) { ctype = ConnectivityType.LOCAL; mergeModel = new EdgeAverageMergeModel(); } else if (ctypeStr.equals("global")) { ctype = ConnectivityType.GLOBAL; mergeModel = new PathAverageMergeModel(); } else { System.out.println(PROGRAM_NAME + ": unknown connectivity type " + ctypeStr); printHelp(); } } if (cmd.hasOption("norefresh")) mergeModel = new PathAverageMergeModelNorefresh(); if (cmd.hasOption("algorithm")) { String alg = cmd.getOptionValue("algorithm"); if (alg.equals("greedy")) { compression = new GreedyRatioCompression(hop2); } else if (alg.equals("random1")) { compression = new RandomRatioCompression(hop2); } else if (alg.equals("random2")) { compression = new SmartRandomRatioCompression(hop2); } else if (alg.equals("bruteforce")) { compression = new BruteForceCompression(hop2, ctype == ConnectivityType.LOCAL); } else if (alg.equals("slowgreedy")) { compression = new SlowGreedyRatioCompression(hop2); } else { System.out.println("algorithm must be one of: greedy random1 random2 bruteforce slowgreedy"); printHelp(); } } compression.setMergeModel(mergeModel); if (cmd.hasOption("r")) { ratio = Double.parseDouble(cmd.getOptionValue("r")); } else { System.out.println(PROGRAM_NAME + ": compression ratio not defined"); printHelp(); } if (cmd.hasOption("help")) { printHelp(); } String infile = null; if (cmd.getArgs().length != 0) { infile = cmd.getArgs()[0]; } else { printHelp(); } boolean kmedoids = false; int kmedoidsK = 3; String kmedoidsOutput = "clusters.txt"; if (cmd.hasOption("kmedoids")) kmedoids = true; if (cmd.hasOption("kmedoids_k")) kmedoidsK = Integer.parseInt(cmd.getOptionValue("kmedoids_k")); if (cmd.hasOption("kmedoids_output")) kmedoidsOutput = cmd.getOptionValue("kmedoids_output"); String edgeAttrib = "goodness"; if (cmd.hasOption("edge_attribute")) edgeAttrib = cmd.getOptionValue("edge_attribute"); // This program should directly use bmgraph-java to read and // DefaultGraph should have a constructor that takes a BMGraph as an // argument. //VisualGraph vg = new VisualGraph(infile, edgeAttrib, false); //System.out.println("vg read"); //SimpleVisualGraph origSG = new SimpleVisualGraph(vg); BMGraph bmg = BMGraphUtils.readBMGraph(infile); int origN = bmg.getNodes().size(); //for(int i=0;i<origN;i++) //System.out.println(i+"="+origSG.getVisualNode(i)); System.out.println("bmgraph read"); BMNode[] i2n = new BMNode[origN]; HashMap<BMNode, Integer> n2i = new HashMap<BMNode, Integer>(); { int pi = 0; for (BMNode nod : bmg.getNodes()) { n2i.put(nod, pi); i2n[pi++] = nod; } } DefaultGraph dg = new DefaultGraph(); for (BMEdge e : bmg.getEdges()) { dg.addEdge(n2i.get(e.getSource()), n2i.get(e.getTarget()), Double.parseDouble(e.get(edgeAttrib))); } DefaultGraph origDG = dg.copy(); System.out.println("inputs read"); RatioCompression nopCompressor = new RatioCompression.DefaultRatioCompression(); ResultGraph nopResult = nopCompressor.compressGraph(dg, 1); long start = System.currentTimeMillis(); ResultGraph result = compression.compressGraph(dg, ratio); long timeSpent = System.currentTimeMillis() - start; double seconds = timeSpent * 0.001; BufferedWriter timesWriter = new BufferedWriter(new FileWriter("times.txt", true)); timesWriter.append("" + seconds + "\n"); timesWriter.close(); if (cmd.hasOption("only_times")) { System.out.println("Compression done, exiting."); System.exit(0); } BufferedWriter costsWriter = new BufferedWriter(new FileWriter("costs.txt", true)); costsWriter.append("" + nopResult.getCompressorCosts() + " " + result.getCompressorCosts() + "\n"); costsWriter.close(); double[][] origProb; double[][] compProb; int[] group = new int[origN]; for (int i = 0; i < result.partition.size(); i++) for (int x : result.partition.get(i)) group[x] = i; if (ctype == ConnectivityType.LOCAL) { origProb = new double[origN][origN]; compProb = new double[origN][origN]; DefaultGraph g = result.uncompressedGraph(); for (int i = 0; i < origN; i++) { for (int j = 0; j < origN; j++) { origProb[i][j] = dg.getEdgeWeight(i, j); compProb[i][j] = g.getEdgeWeight(i, j); } } System.out.println("Writing edge-dissimilarity"); } else { origProb = ProbDijkstra.getProbMatrix(origDG); compProb = new double[origN][origN]; System.out.println("nodeCount = " + result.graph.getNodeCount()); double[][] ccProb = ProbDijkstra.getProbMatrix(result.graph); System.out.println("ccProb.length = " + ccProb.length); System.out.println("ccProb[0].length = " + ccProb[0].length); for (int i = 0; i < origN; i++) { for (int j = 0; j < origN; j++) { if (group[i] == group[j]) compProb[i][j] = result.graph.getEdgeWeight(group[i], group[j]); else { int gj = group[j]; int gi = group[i]; compProb[i][j] = ccProb[group[i]][group[j]]; } } } System.out.println("Writing best-path-dissimilarity"); //compProb = ProbDijkstra.getProbMatrix(result.uncompressedGraph()); } { BufferedWriter connWr = null;// if (connectivity) { connWr = new BufferedWriter(new FileWriter("connectivity.txt", true)); } double totalDiff = 0; for (int i = 0; i < origN; i++) { for (int j = i + 1; j < origN; j++) { double diff = Math.abs(origProb[i][j] - compProb[i][j]); //VisualNode ni = origSG.getVisualNode(i); //VisualNode nj = origSG.getVisualNode(j); BMNode ni = i2n[i]; BMNode nj = i2n[j]; if (connectivity) connWr.append(ni + "\t" + nj + "\t" + origProb[i][j] + "\t" + compProb[i][j] + "\t" + diff + "\n"); totalDiff += diff * diff; } } if (connectivity) { connWr.append("\n"); connWr.close(); } totalDiff = Math.sqrt(totalDiff); BufferedWriter dissWr = new BufferedWriter(new FileWriter("dissimilarity.txt", true)); dissWr.append("" + totalDiff + "\n"); dissWr.close(); } if (cmd.hasOption("output_bmg")) { BMGraph outgraph = new BMGraph(); String outputfile = cmd.getOptionValue("output_bmg"); HashMap<Integer, BMNode> nodes = new HashMap<Integer, BMNode>(); for (int i = 0; i < result.partition.size(); i++) { ArrayList<Integer> g = result.partition.get(i); if (g.size() == 0) continue; BMNode node = new BMNode("Supernode_" + i); HashMap<String, String> attributes = new HashMap<String, String>(); StringBuffer contents = new StringBuffer(); for (int x : g) contents.append(i2n[x] + ","); contents.delete(contents.length() - 1, contents.length()); attributes.put("nodes", contents.toString()); attributes.put("self-edge", "" + result.graph.getEdgeWeight(i, i)); node.setAttributes(attributes); nodes.put(i, node); outgraph.ensureHasNode(node); } for (int i = 0; i < result.partition.size(); i++) { if (result.partition.get(i).size() == 0) continue; for (int x : result.graph.getNeighbors(i)) { if (x < i) continue; BMNode from = nodes.get(i); BMNode to = nodes.get(x); if (from == null || to == null) { System.out.println(from + "->" + to); System.out.println(i + "->" + x); System.out.println(""); } BMEdge e = new BMEdge(nodes.get(i), nodes.get(x), "notype"); e.setAttributes(new HashMap<String, String>()); e.put("goodness", "" + result.graph.getEdgeWeight(i, x)); outgraph.ensureHasEdge(e); } } BMGraphUtils.writeBMGraph(outgraph, outputfile); } // k medoids! if (kmedoids) { //KMedoidsResult clustersOrig=KMedoids.runKMedoids(origProb,kmedoidsK); if (ctype == ConnectivityType.LOCAL) { compProb = ProbDijkstra.getProbMatrix(result.uncompressedGraph()); } //KMedoidsResult compClusters = KMedoids.runKMedoids(ProbDijkstra.getProbMatrix(result.graph),kmedoidsK); KMedoidsResult clustersComp = KMedoids.runKMedoids(compProb, kmedoidsK); BufferedWriter bw = new BufferedWriter(new FileWriter(kmedoidsOutput)); for (int i = 0; i < origN; i++) { int g = group[i]; //bw.append(origSG.getVisualNode(i).getBMNode()+" "+compClusters.clusters[g]+"\n"); bw.append(i2n[i] + " " + clustersComp.clusters[i] + "\n"); } bw.close(); } System.exit(0); }
From source file:lemontree.modulenetwork.RunCli.java
/** * Parse command line options and run LeMoNe * /* w w w .j a va 2 s .co m*/ * @param args command-line arguments string * */ public static void main(String[] args) { // set dummy values for those parameters, they'll be filled later String task = null; String gene_file = null; String data_file = null; String reg_file = null; String tree_file = null; String output_file = null; String range = null; int num_steps = 0; int burn_in = 0; int sample_steps = 0; String cluster_file = null; String go_annot_file = null; String go_ref_file = null; String top_regulators = null; String map_file = null; String go_ontology_file = null; String draw_experiment_color = null; // set default values for those parameters, users can override them double alpha = 0.1; double beta = 0.1; double mu = 0.0; double lambda = 0.1; double score_gain = 0.0; int init_num_clust = 0; int num_runs = 1; boolean use_bayesian_score = true; int num_reg = 10; double beta_reg = 20; String go_p_value = "0.05"; String go_namespace = "biological_process"; boolean use_global_mean = false; boolean use_regulator_mean = false; int cut_level = 0; double min_weight = 0.25; int min_clust_size = 10; int min_clust_score = 2; Boolean node_clustering = true; boolean draw_experiment_names = true; // create the different options Options opts = new Options(); opts.addOption("task", true, "task to perform"); opts.addOption("gene_file", true, "gene file"); opts.addOption("data_file", true, "data file (genes)"); opts.addOption("reg_file", true, "regulators file"); opts.addOption("tree_file", true, "tree file"); opts.addOption("output_file", true, "output file name"); opts.addOption("num_steps", true, "number of steps (Gibbs sampler)"); opts.addOption("burn_in", true, "number of burn-in steps (Gibbs sampler)"); opts.addOption("sample_steps", true, "sample steps interval (Gibbs sampler)"); opts.addOption("cluster_file", true, "cluster file name"); opts.addOption("num_clust", true, "number of clusters"); opts.addOption("alpha", true, "alpha0 parameter value"); opts.addOption("beta", true, "beta0 parameter value"); opts.addOption("mu", true, "mu0 parameter value"); opts.addOption("lambda", true, "lambda0 parameter value"); opts.addOption("score_gain", true, "score gain cutoff value"); opts.addOption("init_num_clust", true, "initial number of clusters (Gibbs sampler)"); opts.addOption("num_runs", true, "number of runs (Gibbs sampler)"); opts.addOption("num_reg", true, "maximum number of regulators assigned for each node"); opts.addOption("beta_reg", true, "beta parameter value for regulators assignment"); opts.addOption("num_split", true, "number of splits for the module set"); opts.addOption("prefix", true, "java command prefix for the split option command line"); opts.addOption("range", true, "module set range for the assignment of the regulators"); opts.addOption("go_annot_file", true, "GO custom annotation file"); opts.addOption("go_ontology_file", true, "GO ontology file name"); opts.addOption("go_ref_file", true, "GO refence gene list file name"); opts.addOption("go_p_value", true, "GO p-value cutoff"); opts.addOption("go_namespace", true, "GO namespace"); opts.addOption("go_annot_def", false, "GO annotation file flag"); opts.addOption("matlab", false, "Matlab format for output files"); opts.addOption("help", false, "help"); opts.addOption("h", false, "help"); opts.addOption("top_regulators", true, "Top regulators file name"); opts.addOption("use_global_mean", false, "Use global mean for the figures"); opts.addOption("use_regulator_mean", false, "Use regulator mean for the figures"); opts.addOption("all_regulators", false, "Print all regulators"); opts.addOption("map_file", true, "Gene names map file"); opts.addOption("cut_level", true, "Regulation tree cut level"); opts.addOption("min_weight", true, "Tight clusters minimum weight"); opts.addOption("min_clust_size", true, "Tight clusters minimum cluster size"); opts.addOption("min_clust_score", true, "Tight clusters minimum cluster score"); opts.addOption("node_clustering", true, "Perform node clustering (true) or edge clustering (false)"); opts.addOption("draw_experiment_names", true, "Draw experiment names in the figures"); opts.addOption("draw_experiment_color", true, "Draw experiment color codes in the figures"); // build a parser object and parse the command line (!) CommandLineParser parser = new PosixParser(); try { CommandLine cmd = parser.parse(opts, args); if (cmd.hasOption("min_weight")) min_weight = Double.parseDouble(cmd.getOptionValue("min_weight")); if (cmd.hasOption("min_clust_size")) min_clust_size = Integer.parseInt(cmd.getOptionValue("min_clust_size")); if (cmd.hasOption("min_clust_score")) min_clust_score = Integer.parseInt(cmd.getOptionValue("min_clust_score")); if (cmd.hasOption("task")) task = cmd.getOptionValue("task"); if (cmd.hasOption("data_file")) data_file = cmd.getOptionValue("data_file"); if (cmd.hasOption("tree_file")) tree_file = cmd.getOptionValue("tree_file"); if (cmd.hasOption("gene_file")) gene_file = cmd.getOptionValue("gene_file"); if (cmd.hasOption("reg_file")) reg_file = cmd.getOptionValue("reg_file"); if (cmd.hasOption("output_file")) output_file = cmd.getOptionValue("output_file"); if (cmd.hasOption("cluster_file")) cluster_file = cmd.getOptionValue("cluster_file"); if (cmd.hasOption("alpha")) alpha = Double.parseDouble(cmd.getOptionValue("alpha")); if (cmd.hasOption("beta")) beta = Double.parseDouble(cmd.getOptionValue("beta")); if (cmd.hasOption("lambda")) lambda = Double.parseDouble(cmd.getOptionValue("lambda")); if (cmd.hasOption("mu")) mu = Double.parseDouble(cmd.getOptionValue("mu")); if (cmd.hasOption("score_gain")) score_gain = Double.parseDouble(cmd.getOptionValue("score_gain")); if (cmd.hasOption("num_steps")) num_steps = Integer.parseInt(cmd.getOptionValue("num_steps")); if (cmd.hasOption("burn_in")) burn_in = Integer.parseInt(cmd.getOptionValue("burn_in")); if (cmd.hasOption("sample_steps")) sample_steps = Integer.parseInt(cmd.getOptionValue("sample_steps")); if (cmd.hasOption("init_num_clust")) init_num_clust = Integer.parseInt(cmd.getOptionValue("init_num_clust")); if (cmd.hasOption("num_reg")) num_reg = Integer.parseInt(cmd.getOptionValue("num_reg")); if (cmd.hasOption("beta_reg")) beta_reg = Double.parseDouble(cmd.getOptionValue("beta_reg")); if (cmd.hasOption("range")) range = cmd.getOptionValue("range"); if (cmd.hasOption("go_annot_file")) go_annot_file = cmd.getOptionValue("go_annot_file"); if (cmd.hasOption("go_ontology_file")) go_ontology_file = cmd.getOptionValue("go_ontology_file"); if (cmd.hasOption("go_ref_file")) go_ref_file = cmd.getOptionValue("go_ref_file"); if (cmd.hasOption("go_p_value")) go_p_value = cmd.getOptionValue("go_p_value"); if (cmd.hasOption("go_namespace")) go_namespace = cmd.getOptionValue("go_namespace"); if (cmd.hasOption("top_regulators")) top_regulators = cmd.getOptionValue("top_regulators"); if (cmd.hasOption("use_global_mean")) use_global_mean = true; if (cmd.hasOption("use_regulator_mean")) use_regulator_mean = true; if (cmd.hasOption("map_file")) map_file = cmd.getOptionValue("map_file"); if (cmd.hasOption("cut_level")) cut_level = Integer.parseInt(cmd.getOptionValue("cut_level")); if (cmd.hasOption("node_clustering")) if (cmd.getOptionValue("node_clustering").equalsIgnoreCase("false")) node_clustering = false; if (cmd.hasOption("draw_experiment_names")) if (cmd.getOptionValue("draw_experiment_names").equalsIgnoreCase("false")) draw_experiment_names = false; if (cmd.hasOption("draw_experiment_color")) draw_experiment_color = cmd.getOptionValue("draw_experiment_color"); } catch (ParseException exp) { System.out.println("Error while parsing command line:"); System.out.println(); exp.printStackTrace(); System.exit(1); } // print header printBanner(); // something has to be done, we need a task to be set if (task == null) Die("Error: task option must be set."); // --------------------------------------------------------------- // ganesh task: 2-way clustering of genes using the gibbs sampler // --------------------------------------------------------------- if (task.equalsIgnoreCase("ganesh")) { // those parameters must be set if (data_file == null) Die("Error: data_file option must be set."); if (output_file == null) Die("Error: output_file option must be set."); // set default values if the user did not change them if (num_runs == 0) num_runs = 1; if (num_steps == 0) num_steps = 100; if (burn_in == 0) burn_in = 50; if (sample_steps == 0) sample_steps = 100; System.out.println("Parameters"); System.out.println("----------"); System.out.println("task: " + task); System.out.println("data_file: " + data_file); System.out.println("gene_file: " + gene_file); System.out.println("output_file: " + output_file); System.out.println("lambda: " + lambda); System.out.println("mu: " + mu); System.out.println("alpha: " + alpha); System.out.println("beta: " + beta); System.out.println("num_steps: " + num_steps); System.out.println("burn_in: " + burn_in); System.out.println("sample_steps: " + sample_steps); System.out.println("score_gain: " + score_gain); // Create ModuleNetwork object ModuleNetwork M = new ModuleNetwork(); M.setNormalGammaPriors(lambda, mu, alpha, beta); M.readExpressionMatrix(data_file, gene_file); M.setNormalGammaPriors(lambda, mu, alpha, beta); // Gibbs sample different module sets with one tree per module M.gibbsSamplerGenes(init_num_clust, num_runs, num_steps, burn_in, sample_steps, score_gain, use_bayesian_score); // write results to text file M.writeClusters(output_file); } //------------------------------------------------------------------------------------- // tight_clusters task: node clustering to produce tight clusters //------------------------------------------------------------------------------------- else if (task.equalsIgnoreCase("tight_clusters")) { if (data_file == null) Die("Error: data_file option must be set."); if (cluster_file == null) Die("Error: cluster_file option must be set."); if (output_file == null) Die("Error: output_file option must be set."); System.out.println("Parameters"); System.out.println("----------"); System.out.println("task: " + task); System.out.println("data_file: " + data_file); System.out.println("cluster_file: " + cluster_file); System.out.println("output_file: " + output_file); System.out.println("node_clustering: " + node_clustering); System.out.println("min_weight: " + min_weight); System.out.println("min_clust_size: " + min_clust_size); System.out.println("min_clust_score: " + min_clust_score); System.out.println(); ModuleNetwork M = new ModuleNetwork(); M.readExpressionMatrix(data_file, null); M.readMultipleClusters(cluster_file); // find tight clusters with node clustering algorithm CentroidClustering cc = new CentroidClustering(M, node_clustering, min_weight, min_clust_size, min_clust_score); cc.doCentroidClustering(); cc.printClusters(output_file); } //------------------------------------------------------------------------------------- // regulators task: learn regulation programs (gibbs sampling exp. + assign regulators) //------------------------------------------------------------------------------------- else if (task.equalsIgnoreCase("regulators")) { // those parameters must be set if (data_file == null) Die("Error: data_file option must be set."); if (reg_file == null) Die("Error: reg_file option must be set."); if (cluster_file == null) Die("Error: cluster_file option must be set."); if (output_file == null) Die("Error: output_file option must be set."); // set default values if the user did not change them if (num_steps == 0) num_steps = 1100; if (burn_in == 0) burn_in = 100; if (sample_steps == 0) sample_steps = 100; System.out.println("Parameters"); System.out.println("----------"); System.out.println("task: " + task); System.out.println("data_file: " + data_file); System.out.println("reg_file: " + reg_file); System.out.println("cluster_file: " + cluster_file); System.out.println("output_file: " + output_file); System.out.println("lambda: " + lambda); System.out.println("mu: " + mu); System.out.println("alpha: " + alpha); System.out.println("beta: " + beta); System.out.println("num_runs: " + num_runs); System.out.println("num_steps: " + num_steps); System.out.println("burn_in: " + burn_in); System.out.println("sample_steps: " + sample_steps); System.out.println("score_gain: " + score_gain); System.out.println("num_reg: " + num_reg); // create module network object ModuleNetwork M = new ModuleNetwork(); M.setNormalGammaPriors(lambda, mu, alpha, beta); M.readExpressionMatrix(data_file, null); M.readClusters(cluster_file); M.readRegulators(reg_file); M.initStatisticsAndScore(); M.setDataMeanAndSDFromModuleset(); // cluster experiments using the gibbs sampler M.gibbsSamplerExpts(num_runs, num_steps, burn_in, sample_steps, score_gain, use_bayesian_score); // assign regulators M.assignRegulatorsNoAcyclStoch(beta_reg, num_reg); // write results as text file with all regulators, top 1%, random regulators and regulations trees as xml M.printRegulators(output_file + ".allreg.txt", true, false); M.printRegulators(output_file + ".topreg.txt", false, false); M.printRandomRegulators(output_file + ".randomreg.txt", false); M.writeRegTreeXML(output_file + ".xml.gz"); } //---------------------------------------------------------- // experiments task: cluster conditions using gibbs sampling //---------------------------------------------------------- else if (task.equalsIgnoreCase("experiments")) { // those parameters must be set if (data_file == null) Die("Error: data_file option must be set."); if (cluster_file == null) Die("Error: cluster_file option must be set."); if (output_file == null) Die("Error: output_file option must be set."); // set default values if the user did not change them if (num_steps == 0) num_steps = 1100; if (burn_in == 0) burn_in = 100; if (sample_steps == 0) sample_steps = 100; System.out.println("Parameters"); System.out.println("----------"); System.out.println("task: " + task); System.out.println("data_file: " + data_file); System.out.println("cluster_file: " + cluster_file); System.out.println("output_file: " + output_file); System.out.println("lambda: " + lambda); System.out.println("mu: " + mu); System.out.println("alpha: " + alpha); System.out.println("beta: " + beta); System.out.println("num_steps: " + num_steps); System.out.println("burn_in: " + burn_in); System.out.println("sample_steps: " + sample_steps); System.out.println("score_gain: " + score_gain); // read data and clusters ModuleNetwork M = new ModuleNetwork(); M.setNormalGammaPriors(lambda, mu, alpha, beta); M.readExpressionMatrix(data_file, null); M.readClusters(cluster_file); M.initStatisticsAndScore(); // cluster experiments using the gibbs sampler M.gibbsSamplerExpts(num_runs, num_steps, burn_in, sample_steps, score_gain, use_bayesian_score); // write results as xml file M.writeRegTreeXML(output_file); } //--------------------------------------------------------------- // split_reg task: assign regulators for a given range of modules //--------------------------------------------------------------- else if (task.equalsIgnoreCase("split_reg")) { // those parameters must be set if (data_file == null) Die("Error: data_file option must be set."); if (reg_file == null) Die("Error: reg_file option must be set."); if (cluster_file == null) Die("Error: cluster_file option must be set."); if (tree_file == null) Die("Error: tree_file option must be set."); if (output_file == null) Die("Error: output_file option must be set."); if (range == null) Die("Error: range option must be set."); System.out.println("Parameters"); System.out.println("----------"); System.out.println("task: " + task); System.out.println("data_file: " + data_file); System.out.println("reg_file: " + reg_file); System.out.println("cluster_file: " + cluster_file); System.out.println("output_file: " + output_file); System.out.println("num_reg: " + num_reg); System.out.println("beta_reg: " + beta_reg); System.out.println("range: " + range); ModuleNetwork M = new ModuleNetwork(); M.setNormalGammaPriors(lambda, mu, alpha, beta); M.readExpressionMatrix(data_file, null); M.readClusters(cluster_file); M.readRegulators(reg_file); M.initStatisticsAndScore(); M.readRegTreeXML(tree_file); String[] val = range.split(":"); int start_module = Integer.parseInt(val[0]); int stop_module = Integer.parseInt(val[1]); // assign regulators M.assignRegulatorsNoAcyclStoch(beta_reg, num_reg, start_module, stop_module); // write results M.printRegulators(output_file + ".allreg.txt", true, false); M.printRandomRegulators(output_file + ".randomreg.txt", false); M.writeRegTreeXML(output_file + ".xml.gz"); } //---------------------------------------------------------------------------- // go_annotation task: GO annotation of a cluster file //---------------------------------------------------------------------------- else if (task.equalsIgnoreCase("go_annotation")) { // those parameters must be set if (cluster_file == null) Die("Error: cluster_file parameter must be set."); if (output_file == null) Die("Error: output_file option must be set."); if (go_annot_file == null) Die("Error: go_annot_file option must be set."); if (go_ontology_file == null) Die("Error: go_ontology_file option must be set."); if (go_ref_file == null) Die("Error: go_ref_file option must be set."); System.out.println("Parameters"); System.out.println("----------"); System.out.println("task: " + task); System.out.println("cluster_file: " + cluster_file); System.out.println("output_file: " + output_file); System.out.println("go_annot_file: " + go_annot_file); System.out.println("go_ontology_file: " + go_ontology_file); System.out.println("go_ref_file: " + go_ref_file); System.out.println("go_p_value: " + go_p_value); System.out.println("go_namespace: " + go_namespace); System.out.println("map_file " + map_file); BiNGO b = new BiNGO(go_annot_file, go_ontology_file, go_p_value, go_namespace); try { b.GOstats(cluster_file, go_ref_file, output_file, map_file); } catch (IOException e) { e.printStackTrace(); } } //---------------------------------------------------------------------------- // figures task: create eps figures for each module //---------------------------------------------------------------------------- else if (task.equalsIgnoreCase("figures")) { // those parameters must be set if (top_regulators == null) Die("Error: top_regulators option must be set."); if (data_file == null) Die("Error: data_file option must be set."); if (reg_file == null) Die("Error: reg_file option must be set."); if (cluster_file == null) Die("Error: cluster_file option must be set."); if (tree_file == null) Die("Error: tree_file option must be set."); System.out.println("Parameters"); System.out.println("----------"); System.out.println("task: " + task); System.out.println("data_file: " + data_file); System.out.println("reg_file: " + reg_file); System.out.println("cluster_file: " + cluster_file); System.out.println("tree_file: " + tree_file); System.out.println("top_regulators: " + top_regulators); System.out.println("use_regulator_mean: " + use_regulator_mean); System.out.println("use_global_mean: " + use_global_mean); System.out.println("map_file: " + map_file); System.out.println("cut_level: " + cut_level); System.out.println("draw_experiment_names: " + draw_experiment_names); System.out.println("draw_experiment_color: " + draw_experiment_color); ModuleNetwork M = new ModuleNetwork(); //read expression data, genes, clusters and regulators from files M.setNormalGammaPriors(lambda, mu, alpha, beta); M.readExpressionMatrix(data_file, null); M.readClusters(cluster_file); M.readRegulators(reg_file); M.initStatisticsAndScore(); M.setDataMeanAndSDFromModuleset(); // read regulation trees from xml file M.readRegTreeXML(tree_file); M.setTestSplits(); // set top regulators for each module M.setTopRegulatorClasses(top_regulators); // calculate mean and sigma for all modules M.setModuleMeanSigma(); M.checkExperiments(); // use module mean (default) or global mean for figures M.setGlobalMeanForFigures(use_global_mean); // use individual regulators mean for figures (default false) M.setRegulatorlMeanForFigures(use_regulator_mean); if (use_regulator_mean == true) M.setRegulatorMeanSigma(); // change gene names if a map file is given if (map_file != null) M.changeGeneNames(map_file); // cut trees to a certain level if (cut_level > 0) { for (Module mod : M.moduleSet) { for (TreeNode t : mod.hierarchicalTrees) { t.testLevel(cut_level); } } } DrawModules dm = new DrawModules(M); if (draw_experiment_color != null) { M.setExperimentColor(draw_experiment_color); dm.enableExperimentColor(); } if (draw_experiment_names == false) { dm.unsetDrawExperimentNames(); } dm.drawAllModules(); } //---------------------------------------------------------------------------- // topdown task: run "old" heuristic algo //---------------------------------------------------------------------------- else if (task.equalsIgnoreCase("topdown")) { int maxParents = 3; double epsConvergence = 1E-3; // Create ModuleNetwork object ModuleNetwork M = new ModuleNetwork(); M.setNormalGammaPriors(lambda, mu, alpha, beta); M.readExpressionMatrix(data_file, gene_file); M.setNormalGammaPriors(lambda, mu, alpha, beta); M.readRegulators(reg_file); // Top-down search M.heuristicSearchMaxTopDown(maxParents, epsConvergence); // write results as xml file M.writeRegTreeXML(output_file); } else { System.out.println("task option '" + task + "' unknown."); System.out.println(); } }
From source file:com.genentech.chemistry.openEye.apps.SDFMCSSNNFinder.java
public static void main(String... args) throws IOException { CommandLineParser parser = new PosixParser(); CommandLine cmd = null;/*from ww w .j a va 2 s . c om*/ try { cmd = parser.parse(options, args); } catch (Exception e) { System.err.println(e.getMessage()); exitWithHelp(); } args = cmd.getArgs(); if (args.length > 0) { exitWithHelp("Unknown param: " + args[0]); } if (cmd.hasOption("d")) { System.err.println("Start debugger and press return:"); new BufferedReader(new InputStreamReader(System.in)).readLine(); } int nCpu = 1; int maxNeighbors = 1; double minSim = 0D; String idTag = cmd.getOptionValue("idTag"); boolean printAll = cmd.hasOption("printAll"); String d = cmd.getOptionValue("nCpu"); if (d != null) nCpu = Integer.parseInt(d); d = cmd.getOptionValue("maxNeighbors"); if (d != null) maxNeighbors = Integer.parseInt(d); d = cmd.getOptionValue("minSimilarity"); if (d != null) minSim = Double.parseDouble(d); String countAboveSimilarityStr = cmd.getOptionValue("countSimilarAbove"); String inFile = cmd.getOptionValue("in"); String outFile = cmd.getOptionValue("out"); String refFile = cmd.getOptionValue("ref"); String tabOutput = cmd.getOptionValue("tabOutput"); boolean outputDuplicates = cmd.hasOption("outputDuplicates"); if (outputDuplicates && tabOutput != null) exitWithHelp("-outputDuplicates will not work with outputVTab"); if (outputDuplicates && refFile == null) exitWithHelp("-outputDuplicates requires -ref "); if ("tab".equalsIgnoreCase(tabOutput) && refFile != null) exitWithHelp("-tabOutput tab: does not work with reference file"); if ("tab".equalsIgnoreCase(tabOutput) && maxNeighbors == 1) exitWithHelp("-tabOutput tab: does not make sense with -maxNeighbors = 1"); if (cmd.hasOption("countSimilarAbove") && tabOutput != null) exitWithHelp("-countSimilarAbove not supported for tab or vTab output"); if (printAll && !(maxNeighbors > 1 || minSim > 0)) exitWithHelp("printAll only supported if: maxNeighbors > 1 or minSim > 0"); if (printAll && tabOutput != null) System.err.println("WARNING: printAll ignored for tab output!\n"); SimComparatorFactory<OEMolBase, OEMolBase, SimComparator<OEMolBase>> compFact; compFact = getComparatorFactory(cmd); if (refFile == null) { // no reference file; run all by all comparison performMatrixNNSearch(inFile, outFile, tabOutput, compFact, minSim, maxNeighbors, idTag, nCpu, countAboveSimilarityStr, printAll); } else { // refrence file; compare inFile to refFile performReferenceSearch(inFile, refFile, outFile, tabOutput, compFact, minSim, maxNeighbors, idTag, nCpu, countAboveSimilarityStr, outputDuplicates, printAll); } }
From source file:erigo.filepump.FilePump.java
public static void main(String[] argsI) { boolean local_bShowGUI = true; String initial_outputFolder = "."; double initial_filesPerSec = 1.0; int initial_totNumFiles = 1000; String initial_mode_str = "local"; String initial_ftpHost;//from w ww. j av a 2s . c o m String initial_ftpUser; String initial_ftpPassword; // // Parse command line arguments // // We use the Apche Commons CLI library to handle command line // arguments. See https://commons.apache.org/proper/commons-cli/usage.html // for examples, although note that we use the more up-to-date form // (Option.builder) to create Option objects. // // 1. Setup command line options // Options options = new Options(); // Example of a Boolean option (i.e., only the flag, no argument goes with it) options.addOption("h", "help", false, "Print this message."); // The following example is for: -outputfolder <folder> Location of output files Option outputFolderOption = Option.builder("outputfolder").argName("folder").hasArg() .desc("Location of output files; this folder must exist (it will not be created); default = \"" + initial_outputFolder + "\".") .build(); options.addOption(outputFolderOption); Option filesPerSecOption = Option.builder("fps").argName("filespersec").hasArg() .desc("Desired file rate, files/sec; default = " + initial_filesPerSec + ".").build(); options.addOption(filesPerSecOption); Option totNumFilesOption = Option.builder("totnum").argName("num").hasArg().desc( "Total number of output files; use -1 for unlimited number; default = " + initial_totNumFiles + ".") .build(); options.addOption(totNumFilesOption); Option outputModeOption = Option.builder("mode").argName("mode").hasArg() .desc("Specifies output interface, one of <local|ftp|sftp>; default = " + initial_mode_str + ".") .build(); options.addOption(outputModeOption); Option ftpHostOption = Option.builder("ftphost").argName("host").hasArg() .desc("Host name, for FTP or SFTP.").build(); options.addOption(ftpHostOption); Option ftpUsernameOption = Option.builder("ftpuser").argName("user").hasArg() .desc("Username, for FTP or SFTP.").build(); options.addOption(ftpUsernameOption); Option ftpPasswordOption = Option.builder("ftppass").argName("password").hasArg() .desc("Password, for FTP or SFTP.").build(); options.addOption(ftpPasswordOption); Option autoRunOption = new Option("x", "Automatically run at startup."); options.addOption(autoRunOption); // // 2. Parse command line options // CommandLineParser parser = new DefaultParser(); CommandLine line = null; try { line = parser.parse(options, argsI); } catch (ParseException exp) { // oops, something went wrong System.err.println("Command line argument parsing failed: " + exp.getMessage()); return; } // // 3. Retrieve the command line values // if (line.hasOption("help")) { // Display help message and quit HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("FilePump", options); return; } if (line.hasOption("x")) { local_bShowGUI = false; } // Where to write the files to initial_outputFolder = line.getOptionValue("outputfolder", initial_outputFolder); // How many files per second the pump should output try { initial_filesPerSec = Double.parseDouble(line.getOptionValue("fps", "" + initial_filesPerSec)); } catch (NumberFormatException nfe) { System.err.println("\nError parsing \"fps\" (it should be a floating point value):\n" + nfe); return; } // Total number of files to write out; -1 indicates unlimited try { initial_totNumFiles = Integer.parseInt(line.getOptionValue("totnum", "" + initial_totNumFiles)); } catch (NumberFormatException nfe) { System.err.println("\nError parsing \"totnum\" (it should be an integer):\n" + nfe); return; } // Specifies how files will be written out initial_mode_str = line.getOptionValue("mode", initial_mode_str); if (!initial_mode_str.equals("local") && !initial_mode_str.equals("ftp") && !initial_mode_str.equals("sftp")) { System.err.println(new String("\nUnrecognized mode, \"" + initial_mode_str + "\"")); return; } // FTP hostname initial_ftpHost = line.getOptionValue("ftphost", ""); // FTP username initial_ftpUser = line.getOptionValue("ftpuser", ""); // FTP password initial_ftpPassword = line.getOptionValue("ftppass", ""); // Create the FilePump object new FilePump(local_bShowGUI, initial_outputFolder, initial_filesPerSec, initial_totNumFiles, initial_mode_str, initial_ftpHost, initial_ftpUser, initial_ftpPassword); }
From source file:com.bah.applefox.main.Ingest.java
public static void main(String[] args) throws Exception { if (args.length == 1 && args[0].equals("--help")) { System.out.println("Not enough arguments"); System.out.println("Arguments should be in the format <properties file> <command>"); System.out.println("Valid commands:"); System.out.println("\tpr: Calculates Page Rank"); System.out.println("\timageload: Loads Images from URLs"); System.out.println("\tload: Loads Full Text Data"); System.out.println("\tingest: Ingests URLs from given seed"); System.out.println("\tftsample: Creates a Full Text Index Sample HashMap"); System.out.println("\timagesample: Creates an Image Hash and Image Tag Sample HashMap"); }/*from w w w .jav a 2 s . c o m*/ if (args.length > 2) { System.out.println("2 Arguments expected, " + args.length + " given."); } if (args.length < 2) { System.out.println("Not enough arguments"); System.out.println("Arguments should be in the format <properties file> <command>"); System.out.println("Valid commands:"); System.out.println("\tpr: Calculates Page Rank"); System.out.println("\timageload: Loads Images from URLs"); System.out.println("\tload: Loads Full Text Data"); System.out.println("\tingest: Ingests URLs from given seed"); System.out.println("\tftsample: Creates a Full Text Index Sample HashMap"); System.out.println("\timagesample: Creates an Image Hash and Image Tag Sample HashMap"); } injector = Guice.createInjector(new IngesterModule()); // The properties object to read from the configuration file Properties properties = new Properties(); try { // Load configuration file from the command line properties.load(new FileInputStream(args[0])); } catch (Exception e) { log.error("ABORT: File not found or could not read from file ->" + e.getMessage()); log.error("Enter the location of the configuration file"); System.exit(1); } // Initialize variables from configuration file // Accumulo Variables INSTANCE_NAME = properties.getProperty("INSTANCE_NAME"); ZK_SERVERS = properties.getProperty("ZK_SERVERS"); USERNAME = properties.getProperty("USERNAME"); PASSWORD = properties.getProperty("PASSWORD"); SPLIT_SIZE = properties.getProperty("SPLIT_SIZE"); NUM_ITERATIONS = Integer.parseInt(properties.getProperty("NUM_ITERATIONS")); NUM_NODES = Integer.parseInt(properties.getProperty("NUM_NODES")); // General Search Variables MAX_NGRAMS = Integer.parseInt(properties.getProperty("MAX_NGRAMS")); GENERAL_STOP = properties.getProperty("GENERAL_STOP"); // Full Text Variables FT_DATA_TABLE = properties.getProperty("FT_DATA_TABLE"); FT_SAMPLE = properties.getProperty("FT_SAMPLE"); FT_CHECKED_TABLE = properties.getProperty("FT_CHECKED_TABLE"); FT_DIVS_FILE = properties.getProperty("FT_DIVS_FILE"); FT_SPLIT_SIZE = properties.getProperty("FT_SPLIT_SIZE"); // Web Crawler Variables URL_TABLE = properties.getProperty("URL_TABLE"); SEED = properties.getProperty("SEED"); USER_AGENT = properties.getProperty("USER_AGENT"); URL_SPLIT_SIZE = properties.getProperty("URL_SPLIT_SIZE"); // Page Rank Variables PR_TABLE_PREFIX = properties.getProperty("PR_TABLE_PREFIX"); PR_URL_MAP_TABLE_PREFIX = properties.getProperty("PR_URL_MAP_TABLE_PREFIX"); PR_OUT_LINKS_COUNT_TABLE = properties.getProperty("PR_OUT_LINKS_COUNT_TABLE"); PR_FILE = properties.getProperty("PR_FILE"); PR_DAMPENING_FACTOR = Double.parseDouble(properties.getProperty("PR_DAMPENING_FACTOR")); PR_ITERATIONS = Integer.parseInt(properties.getProperty("PR_ITERATIONS")); PR_SPLIT_SIZE = properties.getProperty("PR_SPLIT_SIZE"); // Image Variables IMG_HASH_TABLE = properties.getProperty("IMG_HASH_TABLE"); IMG_CHECKED_TABLE = properties.getProperty("IMG_CHECKED_TABLE"); IMG_TAG_TABLE = properties.getProperty("IMG_TAG_TABLE"); IMG_HASH_SAMPLE_TABLE = properties.getProperty("IMG_HASH_SAMPLE_TABLE"); IMG_TAG_SAMPLE_TABLE = properties.getProperty("IMG_TAG_SAMPLE_TABLE"); IMG_SPLIT_SIZE = properties.getProperty("IMG_SPLIT_SIZE"); // Future Use: // Work Directory in HDFS WORK_DIR = properties.getProperty("WORK_DIR"); // Initialize variable from command line RUN = args[1].toLowerCase(); // Set the instance information for AccumuloUtils AccumuloUtils.setInstanceName(INSTANCE_NAME); AccumuloUtils.setInstancePassword(PASSWORD); AccumuloUtils.setUser(USERNAME); AccumuloUtils.setZooserver(ZK_SERVERS); AccumuloUtils.setSplitSize(SPLIT_SIZE); String[] temp = new String[25]; // Accumulo Variables temp[0] = INSTANCE_NAME; temp[1] = ZK_SERVERS; temp[2] = USERNAME; temp[3] = PASSWORD; // Number of Map Tasks temp[4] = Integer.toString((int) Math.ceil(1.75 * NUM_NODES * 2)); // Web Crawler Variables temp[5] = URL_TABLE; temp[6] = USER_AGENT; // Future Use temp[7] = WORK_DIR; // General Search temp[8] = GENERAL_STOP; temp[9] = Integer.toString(MAX_NGRAMS); // Full Text Variables temp[10] = FT_DATA_TABLE; temp[11] = FT_CHECKED_TABLE; // Page Rank Variables temp[12] = PR_URL_MAP_TABLE_PREFIX; temp[13] = PR_TABLE_PREFIX; temp[14] = Double.toString(PR_DAMPENING_FACTOR); temp[15] = PR_OUT_LINKS_COUNT_TABLE; temp[16] = PR_FILE; // Image Variables temp[17] = IMG_HASH_TABLE; temp[18] = IMG_CHECKED_TABLE; temp[19] = IMG_TAG_TABLE; temp[20] = FT_DIVS_FILE; // Table Split Sizes temp[21] = FT_SPLIT_SIZE; temp[22] = IMG_SPLIT_SIZE; temp[23] = URL_SPLIT_SIZE; temp[24] = PR_SPLIT_SIZE; if (RUN.equals("pr")) { // Run PR_ITERATIONS number of iterations for page ranking PageRank.createPageRank(temp, PR_ITERATIONS, URL_SPLIT_SIZE); } else if (RUN.equals("imageload")) { // Load image index AccumuloUtils.setSplitSize(URL_SPLIT_SIZE); ToolRunner.run(new ImageLoader(), temp); } else if (RUN.equals("ingest")) { // Ingest System.out.println("Ingesting"); // Set table split size AccumuloUtils.setSplitSize(URL_SPLIT_SIZE); // Write the seed value to the table BatchWriter w; Value v = new Value(); v.set("0".getBytes()); Mutation m = new Mutation(SEED); m.put("0", "0", v); w = AccumuloUtils.connectBatchWrite(URL_TABLE); w.addMutation(m); for (int i = 0; i < NUM_ITERATIONS; i++) { // Run the ToolRunner for NUM_ITERATIONS iterations ToolRunner.run(CachedConfiguration.getInstance(), injector.getInstance(Ingester.class), temp); } } else if (RUN.equals("load")) { // Parse the URLs and add to the data table AccumuloUtils.setSplitSize(URL_SPLIT_SIZE); BatchWriter w = AccumuloUtils.connectBatchWrite(FT_CHECKED_TABLE); w.close(); AccumuloUtils.setSplitSize(FT_SPLIT_SIZE); w = AccumuloUtils.connectBatchWrite(FT_DATA_TABLE); w.close(); ToolRunner.run(CachedConfiguration.getInstance(), injector.getInstance(Loader.class), temp); } else if (RUN.equals("ftsample")) { // Create a sample table for full text index FTAccumuloSampler ftSampler = new FTAccumuloSampler(FT_SAMPLE, FT_DATA_TABLE, FT_CHECKED_TABLE); ftSampler.createSample(); } else if (RUN.equals("imagesample")) { // Create a sample table for images ImageAccumuloSampler imgHashSampler = new ImageAccumuloSampler(IMG_HASH_SAMPLE_TABLE, IMG_HASH_TABLE, IMG_CHECKED_TABLE); imgHashSampler.createSample(); ImageAccumuloSampler imgTagSampler = new ImageAccumuloSampler(IMG_TAG_SAMPLE_TABLE, IMG_TAG_TABLE, IMG_CHECKED_TABLE); imgTagSampler.createSample(); } else { System.out.println("Invalid argument " + RUN + "."); System.out.println("Valid Arguments:"); System.out.println("\tpr: Calculates Page Rank"); System.out.println("\timageload: Loads Images from URLs"); System.out.println("\tload: Loads Full Text Data"); System.out.println("\tingest: Ingests URLs from given seed"); System.out.println("\tftsample: Creates a Full Text Index Sample HashMap"); System.out.println("\timagesample: Creates an Image Hash and Image Tag Sample HashMap"); } }