List of usage examples for java.io PrintStream println
public void println(Object x)
From source file:it.unimi.dsi.webgraph.algo.HyperBall.java
public static void main(String arg[]) throws IOException, JSAPException, IllegalArgumentException, ClassNotFoundException, IllegalAccessException, InvocationTargetException, InstantiationException, NoSuchMethodException { SimpleJSAP jsap = new SimpleJSAP(HyperBall.class.getName(), "Runs HyperBall on the given graph, possibly computing positive geometric centralities.\n\nPlease note that to compute negative centralities on directed graphs (which is usually what you want) you have to compute positive centralities on the transpose.", new Parameter[] { new FlaggedOption("log2m", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'l', "log2m", "The logarithm of the number of registers."), new FlaggedOption("upperBound", JSAP.LONGSIZE_PARSER, Long.toString(Long.MAX_VALUE), JSAP.NOT_REQUIRED, 'u', "upper-bound", "An upper bound to the number of iterations."), new FlaggedOption("threshold", JSAP.DOUBLE_PARSER, "-1", JSAP.NOT_REQUIRED, 't', "threshold", "A threshold that will be used to stop the computation by relative increment. If it is -1, the iteration will stop only when all registers do not change their value (recommended)."), new FlaggedOption("threads", JSAP.INTSIZE_PARSER, "0", JSAP.NOT_REQUIRED, 'T', "threads", "The number of threads to be used. If 0, the number will be estimated automatically."), new FlaggedOption("granularity", JSAP.INTSIZE_PARSER, Integer.toString(DEFAULT_GRANULARITY), JSAP.NOT_REQUIRED, 'g', "granularity", "The number of node per task in a multicore environment."), new FlaggedOption("bufferSize", JSAP.INTSIZE_PARSER, Util.formatBinarySize(DEFAULT_BUFFER_SIZE), JSAP.NOT_REQUIRED, 'b', "buffer-size", "The size of an I/O buffer in bytes."), new FlaggedOption("neighbourhoodFunction", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'n', "neighbourhood-function", "Store an approximation the neighbourhood function in text format."), new FlaggedOption("sumOfDistances", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'd', "sum-of-distances", "Store an approximation of the sum of distances from each node as a binary list of floats."), new FlaggedOption("harmonicCentrality", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'h', "harmonic-centrality", "Store an approximation of the positive harmonic centrality (the sum of the reciprocals of distances from each node) as a binary list of floats."), new FlaggedOption("discountedGainCentrality", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'z', "discounted-gain-centrality", "A positive discounted gain centrality to be approximated and stored; it is specified as O:F where O is the spec of an object of type Int2DoubleFunction and F is the name of the file where the binary list of floats will be stored. The spec can be either the name of a public field of HyperBall, or a constructor invocation of a class implementing Int2DoubleFunction.") .setAllowMultipleDeclarations(true), new FlaggedOption("closenessCentrality", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'c', "closeness-centrality", "Store an approximation of the positive closeness centrality of each node (the reciprocal of sum of the distances from each node) as a binary list of floats. Terminal nodes will have centrality equal to zero."), new FlaggedOption("linCentrality", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'L', "lin-centrality", "Store an approximation of the positive Lin centrality of each node (the reciprocal of sum of the distances from each node multiplied by the square of the number of nodes reachable from the node) as a binary list of floats. Terminal nodes will have centrality equal to one."), new FlaggedOption("nieminenCentrality", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'N', "nieminen-centrality", "Store an approximation of the positive Nieminen centrality of each node (the square of the number of nodes reachable from each node minus the sum of the distances from the node) as a binary list of floats."), new FlaggedOption("reachable", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'r', "reachable", "Store an approximation of the number of nodes reachable from each node as a binary list of floats."), new FlaggedOption("seed", JSAP.LONG_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'S', "seed", "The random seed."), new Switch("spec", 's', "spec", "The basename is not a basename but rather a specification of the form <ImmutableGraphImplementation>(arg,arg,...)."), new Switch("offline", 'o', "offline", "Do not load the graph in main memory. If this option is used, the graph will be loaded in offline (for one thread) or mapped (for several threads) mode."), new Switch("external", 'e', "external", "Use an external dump file instead of core memory to store new counter values. Note that the file might be very large: you might need to set suitably the Java temporary directory (-Djava.io.tmpdir=DIR)."), new UnflaggedOption("basename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the graph."), new UnflaggedOption("basenamet", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, JSAP.NOT_GREEDY, "The basename of the transpose graph for systolic computations (strongly suggested). If it is equal to <basename>, the graph will be assumed to be symmetric and will be loaded just once."), }); JSAPResult jsapResult = jsap.parse(arg); if (jsap.messagePrinted()) System.exit(1);//from w w w.j a v a 2s . c o m final boolean spec = jsapResult.getBoolean("spec"); final boolean external = jsapResult.getBoolean("external"); final boolean offline = jsapResult.getBoolean("offline"); final String neighbourhoodFunctionFile = jsapResult.getString("neighbourhoodFunction"); final boolean neighbourhoodFunction = jsapResult.userSpecified("neighbourhoodFunction"); final String sumOfDistancesFile = jsapResult.getString("sumOfDistances"); final boolean sumOfDistances = jsapResult.userSpecified("sumOfDistances"); final String harmonicCentralityFile = jsapResult.getString("harmonicCentrality"); final boolean harmonicCentrality = jsapResult.userSpecified("harmonicCentrality"); final String closenessCentralityFile = jsapResult.getString("closenessCentrality"); final boolean closenessCentrality = jsapResult.userSpecified("closenessCentrality"); final String linCentralityFile = jsapResult.getString("linCentrality"); final boolean linCentrality = jsapResult.userSpecified("linCentrality"); final String nieminenCentralityFile = jsapResult.getString("nieminenCentrality"); final boolean nieminenCentrality = jsapResult.userSpecified("nieminenCentrality"); final String reachableFile = jsapResult.getString("reachable"); final boolean reachable = jsapResult.userSpecified("reachable"); final String basename = jsapResult.getString("basename"); final String basenamet = jsapResult.getString("basenamet"); final ProgressLogger pl = new ProgressLogger(LOGGER); final int log2m = jsapResult.getInt("log2m"); final int threads = jsapResult.getInt("threads"); final int bufferSize = jsapResult.getInt("bufferSize"); final int granularity = jsapResult.getInt("granularity"); final long seed = jsapResult.userSpecified("seed") ? jsapResult.getLong("seed") : Util.randomSeed(); final String[] discountedGainCentralitySpec = jsapResult.getStringArray("discountedGainCentrality"); final Int2DoubleFunction[] discountFunction = new Int2DoubleFunction[discountedGainCentralitySpec.length]; final String[] discountedGainCentralityFile = new String[discountedGainCentralitySpec.length]; for (int i = 0; i < discountedGainCentralitySpec.length; i++) { int pos = discountedGainCentralitySpec[i].indexOf(':'); if (pos < 0) throw new IllegalArgumentException("Wrong spec <" + discountedGainCentralitySpec[i] + ">"); discountedGainCentralityFile[i] = discountedGainCentralitySpec[i].substring(pos + 1); String gainSpec = discountedGainCentralitySpec[i].substring(0, pos); Int2DoubleFunction candidateFunction; try { candidateFunction = (Int2DoubleFunction) HyperBall.class.getField(gainSpec).get(null); } catch (SecurityException e) { throw new IllegalArgumentException("Field " + gainSpec + " exists but cannot be accessed", e); } catch (ClassCastException e) { throw new IllegalArgumentException( "Field " + gainSpec + " exists but it is not of type Int2DoubleFunction", e); } catch (NoSuchFieldException e) { candidateFunction = null; } discountFunction[i] = candidateFunction == null ? ObjectParser.fromSpec(gainSpec, Int2DoubleFunction.class) : candidateFunction; } final ImmutableGraph graph = spec ? ObjectParser.fromSpec(basename, ImmutableGraph.class, GraphClassParser.PACKAGE) : offline ? ((numberOfThreads(threads) == 1 && basenamet == null ? ImmutableGraph.loadOffline(basename) : ImmutableGraph.loadMapped(basename, new ProgressLogger()))) : ImmutableGraph.load(basename, new ProgressLogger()); final ImmutableGraph grapht = basenamet == null ? null : basenamet.equals(basename) ? graph : spec ? ObjectParser.fromSpec(basenamet, ImmutableGraph.class, GraphClassParser.PACKAGE) : offline ? ImmutableGraph.loadMapped(basenamet, new ProgressLogger()) : ImmutableGraph.load(basenamet, new ProgressLogger()); final HyperBall hyperBall = new HyperBall(graph, grapht, log2m, pl, threads, bufferSize, granularity, external, sumOfDistances || closenessCentrality || linCentrality || nieminenCentrality, harmonicCentrality, discountFunction, seed); hyperBall.run(jsapResult.getLong("upperBound"), jsapResult.getDouble("threshold")); hyperBall.close(); if (neighbourhoodFunction) { final PrintStream stream = new PrintStream( new FastBufferedOutputStream(new FileOutputStream(neighbourhoodFunctionFile))); for (DoubleIterator i = hyperBall.neighbourhoodFunction.iterator(); i.hasNext();) stream.println(BigDecimal.valueOf(i.nextDouble()).toPlainString()); stream.close(); } if (sumOfDistances) BinIO.storeFloats(hyperBall.sumOfDistances, sumOfDistancesFile); if (harmonicCentrality) BinIO.storeFloats(hyperBall.sumOfInverseDistances, harmonicCentralityFile); for (int i = 0; i < discountedGainCentralitySpec.length; i++) BinIO.storeFloats(hyperBall.discountedCentrality[i], discountedGainCentralityFile[i]); if (closenessCentrality) { final int n = graph.numNodes(); final DataOutputStream dos = new DataOutputStream( new FastBufferedOutputStream(new FileOutputStream(closenessCentralityFile))); for (int i = 0; i < n; i++) dos.writeFloat(hyperBall.sumOfDistances[i] == 0 ? 0 : 1 / hyperBall.sumOfDistances[i]); dos.close(); } if (linCentrality) { final int n = graph.numNodes(); final DataOutputStream dos = new DataOutputStream( new FastBufferedOutputStream(new FileOutputStream(linCentralityFile))); for (int i = 0; i < n; i++) { // Lin's index for isolated nodes is by (our) definition one (it's smaller than any other node). if (hyperBall.sumOfDistances[i] == 0) dos.writeFloat(1); else { final double count = hyperBall.count(i); dos.writeFloat((float) (count * count / hyperBall.sumOfDistances[i])); } } dos.close(); } if (nieminenCentrality) { final int n = graph.numNodes(); final DataOutputStream dos = new DataOutputStream( new FastBufferedOutputStream(new FileOutputStream(nieminenCentralityFile))); for (int i = 0; i < n; i++) { final double count = hyperBall.count(i); dos.writeFloat((float) (count * count - hyperBall.sumOfDistances[i])); } dos.close(); } if (reachable) { final int n = graph.numNodes(); final DataOutputStream dos = new DataOutputStream( new FastBufferedOutputStream(new FileOutputStream(reachableFile))); for (int i = 0; i < n; i++) dos.writeFloat((float) hyperBall.count(i)); dos.close(); } }
From source file:io.anserini.search.SearchTweets.java
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(new Option(RM3_OPTION, "apply relevance feedback with rm3")); options.addOption(/*w w w . j a va2 s . c o m*/ OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return") .create(NUM_RESULTS_OPTION)); options.addOption(OptionBuilder.withArgName("file").hasArg() .withDescription("file containing topics in TREC format").create(QUERIES_OPTION)); options.addOption(OptionBuilder.withArgName("similarity").hasArg() .withDescription("similarity to use (BM25, LM)").create(SIMILARITY_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(QUERIES_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(SearchTweets.class.getName(), options); System.exit(-1); } File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION)); if (!indexLocation.exists()) { System.err.println("Error: " + indexLocation + " does not exist!"); System.exit(-1); } String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG; String topicsFile = cmdline.getOptionValue(QUERIES_OPTION); int numResults = 1000; try { if (cmdline.hasOption(NUM_RESULTS_OPTION)) { numResults = Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION)); } } catch (NumberFormatException e) { System.err.println("Invalid " + NUM_RESULTS_OPTION + ": " + cmdline.getOptionValue(NUM_RESULTS_OPTION)); System.exit(-1); } String similarity = "LM"; if (cmdline.hasOption(SIMILARITY_OPTION)) { similarity = cmdline.getOptionValue(SIMILARITY_OPTION); } PrintStream out = new PrintStream(System.out, true, "UTF-8"); IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexLocation.getAbsolutePath()))); IndexSearcher searcher = new IndexSearcher(reader); if (similarity.equalsIgnoreCase("BM25")) { searcher.setSimilarity(new BM25Similarity()); } else if (similarity.equalsIgnoreCase("LM")) { searcher.setSimilarity(new LMDirichletSimilarity(2500.0f)); } MicroblogTopicSet topics = MicroblogTopicSet.fromFile(new File(topicsFile)); for (MicroblogTopic topic : topics) { Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, topic.getQueryTweetTime(), true, true); Query query = AnalyzerUtils.buildBagOfWordsQuery(StatusField.TEXT.name, IndexTweets.ANALYZER, topic.getQuery()); TopDocs rs = searcher.search(query, filter, numResults); RerankerContext context = new RerankerContext(searcher, query, topic.getQuery(), filter); RerankerCascade cascade = new RerankerCascade(context); if (cmdline.hasOption(RM3_OPTION)) { cascade.add(new Rm3Reranker(IndexTweets.ANALYZER, StatusField.TEXT.name)); cascade.add(new RemoveRetweetsTemporalTiebreakReranker()); } else { cascade.add(new RemoveRetweetsTemporalTiebreakReranker()); } ScoredDocuments docs = cascade.run(ScoredDocuments.fromTopDocs(rs, searcher)); for (int i = 0; i < docs.documents.length; i++) { String qid = topic.getId().replaceFirst("^MB0*", ""); out.println(String.format("%s Q0 %s %d %f %s", qid, docs.documents[i].getField(StatusField.ID.name).numericValue(), (i + 1), docs.scores[i], runtag)); } } reader.close(); out.close(); }
From source file:co.cask.cdap.cli.CLIMain.java
public static void main(String[] args) { final PrintStream output = System.out; Options options = getOptions();/*from w w w.jav a 2 s . co m*/ CLIMainArgs cliMainArgs = CLIMainArgs.parse(args, options); CommandLineParser parser = new BasicParser(); try { CommandLine command = parser.parse(options, cliMainArgs.getOptionTokens()); if (command.hasOption(HELP_OPTION.getOpt())) { usage(); System.exit(0); } LaunchOptions launchOptions = LaunchOptions.builder() .setUri(command.getOptionValue(URI_OPTION.getOpt(), getDefaultURI().toString())) .setDebug(command.hasOption(DEBUG_OPTION.getOpt())) .setVerifySSL(parseBooleanOption(command, VERIFY_SSL_OPTION, DEFAULT_VERIFY_SSL)) .setAutoconnect(parseBooleanOption(command, AUTOCONNECT_OPTION, DEFAULT_AUTOCONNECT)).build(); String scriptFile = command.getOptionValue(SCRIPT_OPTION.getOpt(), ""); boolean hasScriptFile = command.hasOption(SCRIPT_OPTION.getOpt()); String[] commandArgs = cliMainArgs.getCommandTokens(); try { ClientConfig clientConfig = ClientConfig.builder().setConnectionConfig(null).build(); final CLIConfig cliConfig = new CLIConfig(clientConfig, output, new AltStyleTableRenderer()); CLIMain cliMain = new CLIMain(launchOptions, cliConfig); CLI cli = cliMain.getCLI(); cliMain.tryAutoconnect(); CLIConnectionConfig connectionConfig = new CLIConnectionConfig( cliConfig.getClientConfig().getConnectionConfig(), Id.Namespace.DEFAULT, null); cliMain.updateCLIPrompt(connectionConfig); if (hasScriptFile) { File script = cliMain.getFilePathResolver().resolvePathToFile(scriptFile); if (!script.exists()) { output.println("ERROR: Script file '" + script.getAbsolutePath() + "' does not exist"); System.exit(1); } List<String> scriptLines = Files.readLines(script, Charsets.UTF_8); for (String scriptLine : scriptLines) { output.print(cliMain.getPrompt(connectionConfig)); output.println(scriptLine); cli.execute(scriptLine, output); output.println(); } } else if (commandArgs.length == 0) { cli.startInteractiveMode(output); } else { cli.execute(Joiner.on(" ").join(commandArgs), output); } } catch (Exception e) { e.printStackTrace(output); } } catch (ParseException e) { output.println(e.getMessage()); usage(); } }
From source file:edu.msu.cme.rdp.probematch.cli.PrimerMatch.java
public static void main(String[] args) throws Exception { PrintStream out = new PrintStream(System.out); int maxDist = Integer.MAX_VALUE; try {/*from w w w .java 2s . co m*/ CommandLine line = new PosixParser().parse(options, args); if (line.hasOption("outFile")) { out = new PrintStream(new File(line.getOptionValue("outFile"))); } if (line.hasOption("maxDist")) { maxDist = Integer.valueOf(line.getOptionValue("maxDist")); } args = line.getArgs(); if (args.length != 2) { throw new Exception("Unexpected number of command line arguments"); } } catch (Exception e) { System.err.println("Error: " + e.getMessage()); new HelpFormatter().printHelp("PrimerMatch <primer_list | primer_file> <seq_file>", options); return; } List<PatternBitMask64> primers = new ArrayList(); if (new File(args[0]).exists()) { File primerFile = new File(args[0]); SequenceFormat seqformat = SeqUtils.guessFileFormat(primerFile); if (seqformat.equals(SequenceFormat.FASTA)) { SequenceReader reader = new SequenceReader(primerFile); Sequence seq; while ((seq = reader.readNextSequence()) != null) { primers.add(new PatternBitMask64(seq.getSeqString(), true, seq.getSeqName())); } reader.close(); } else { BufferedReader reader = new BufferedReader(new FileReader(args[0])); String line; while ((line = reader.readLine()) != null) { line = line.trim(); if (!line.equals("")) { primers.add(new PatternBitMask64(line, true)); } } reader.close(); } } else { for (String primer : args[0].split(",")) { primers.add(new PatternBitMask64(primer, true)); } } SeqReader seqReader = new SequenceReader(new File(args[1])); Sequence seq; String primerRegion; out.println("#seqname\tdesc\tprimer_index\tprimer_name\tposition\tmismatches\tseq_primer_region"); while ((seq = seqReader.readNextSequence()) != null) { for (int index = 0; index < primers.size(); index++) { PatternBitMask64 primer = primers.get(index); BitVector64Result results = BitVector64.process(seq.getSeqString().toCharArray(), primer, maxDist); for (BitVector64Match result : results.getResults()) { primerRegion = seq.getSeqString().substring( Math.max(0, result.getPosition() - primer.getPatternLength()), result.getPosition()); if (result.getPosition() < primer.getPatternLength()) { for (int pad = result.getPosition(); pad < primer.getPatternLength(); pad++) { primerRegion = "x" + primerRegion; } } out.println(seq.getSeqName() + "\t" + seq.getDesc() + "\t" + (index + 1) + "\t" + primer.getPrimerName() + "\t" + result.getPosition() + "\t" + result.getScore() + "\t" + primerRegion); } } } out.close(); seqReader.close(); }
From source file:nab.detectors.htmjava.HTMModel.java
/** * Launch htm.java NAB detector/*from w ww .j a v a2 s . c o m*/ * * Usage: * As a standalone application (for debug purpose only): * * java -jar htm.java-nab.jar "{\"modelParams\":{....}}" < nab_data.csv > anomalies.out * * For complete list of command line options use: * * java -jar htm.java-nab.jar --help * * As a NAB detector (see 'htmjava_detector.py'): * * python run.py --detect --score --normalize -d htmjava * * Logging options, see "log4j.properties": * * - "LOGLEVEL": Controls log output (default: "OFF") * - "LOGGER": Either "CONSOLE" or "FILE" (default: "CONSOLE") * - "LOGFILE": Log file destination (default: "htmjava.log") * * For example: * * java -DLOGLEVEL=TRACE -DLOGGER=FILE -jar htm.java-nab.jar "{\"modelParams\":{....}}" < nab_data.csv > anomalies.out * */ @SuppressWarnings("resource") public static void main(String[] args) { try { LOGGER.trace("main({})", Arrays.asList(args)); // Parse command line args OptionParser parser = new OptionParser(); parser.nonOptions("OPF parameters object (JSON)"); parser.acceptsAll(Arrays.asList("p", "params"), "OPF parameters file (JSON).\n(default: first non-option argument)").withOptionalArg() .ofType(File.class); parser.acceptsAll(Arrays.asList("i", "input"), "Input data file (csv).\n(default: stdin)") .withOptionalArg().ofType(File.class); parser.acceptsAll(Arrays.asList("o", "output"), "Output results file (csv).\n(default: stdout)") .withOptionalArg().ofType(File.class); parser.acceptsAll(Arrays.asList("s", "skip"), "Header lines to skip").withOptionalArg() .ofType(Integer.class).defaultsTo(0); parser.acceptsAll(Arrays.asList("h", "?", "help"), "Help"); OptionSet options = parser.parse(args); if (args.length == 0 || options.has("h")) { parser.printHelpOn(System.out); return; } // Get in/out files final PrintStream output; final InputStream input; if (options.has("i")) { input = new FileInputStream((File) options.valueOf("i")); } else { input = System.in; } if (options.has("o")) { output = new PrintStream((File) options.valueOf("o")); } else { output = System.out; } // Parse OPF Model Parameters JsonNode params; ObjectMapper mapper = new ObjectMapper(); if (options.has("p")) { params = mapper.readTree((File) options.valueOf("p")); } else if (options.nonOptionArguments().isEmpty()) { try { input.close(); } catch (Exception ignore) { } if (options.has("o")) { try { output.flush(); output.close(); } catch (Exception ignore) { } } throw new IllegalArgumentException("Expecting OPF parameters. See 'help' for more information"); } else { params = mapper.readTree((String) options.nonOptionArguments().get(0)); } // Number of header lines to skip int skip = (int) options.valueOf("s"); // Force timezone to UTC DateTimeZone.setDefault(DateTimeZone.UTC); // Create NAB Network Model HTMModel model = new HTMModel(params); Network network = model.getNetwork(); network.observe().subscribe((inference) -> { double score = inference.getAnomalyScore(); int record = inference.getRecordNum(); LOGGER.trace("record = {}, score = {}", record, score); // Output raw anomaly score output.println(score); }, (error) -> { LOGGER.error("Error processing data", error); }, () -> { LOGGER.trace("Done processing data"); if (LOGGER.isDebugEnabled()) { model.showDebugInfo(); } }); network.start(); // Pipe data to network Publisher publisher = model.getPublisher(); BufferedReader in = new BufferedReader(new InputStreamReader(input)); String line; while ((line = in.readLine()) != null && line.trim().length() > 0) { // Skip header lines if (skip > 0) { skip--; continue; } publisher.onNext(line); } publisher.onComplete(); in.close(); LOGGER.trace("Done publishing data"); } catch (IOException e) { e.printStackTrace(); } }
From source file:edu.msu.cme.rdp.seqmatch.cli.SeqMatchMain.java
public static void main(String[] args) throws Exception { if (args.length == 0) { System.err.println("USAGE: SeqMatchMain [train|seqmatch] <args>"); return;// w w w. j a va 2 s . com } String cmd = args[0]; args = Arrays.copyOfRange(args, 1, args.length); if (cmd.equals("train")) { if (args.length != 2) { System.err.println("USAGE: train <reference sequences> <trainee_out_file_prefix>" + "\nMultiple trainee output files might be created, each containing maximum " + Trainee.MAX_NUM_SEQ + " sequences"); return; } File refSeqs = new File(args[0]); File traineeFileOut = new File(args[1]); //maybe more than 1 trainee files need to be created, depending on the number of seqs CreateMultiMatchFromFile.getMultiTrainee(refSeqs, traineeFileOut); } else if (cmd.equals("seqmatch")) { File refFile = null; File queryFile = null; HashMap<String, String> descMap = new HashMap<String, String>(); PrintStream out = new PrintStream(System.out); int knn = 20; float minSab = .5f; try { CommandLine line = new PosixParser().parse(options, args); if (line.hasOption("knn")) { knn = Integer.parseInt(line.getOptionValue("knn")); } if (line.hasOption("sab")) { minSab = Float.parseFloat(line.getOptionValue("sab")); } if (line.hasOption("desc")) { descMap = readDesc(new File(line.getOptionValue("desc"))); } if (line.hasOption("outFile")) { out = new PrintStream(new File(line.getOptionValue("outFile"))); } args = line.getArgs(); if (args.length != 2) { throw new Exception("Unexpected number of command line arguments"); } refFile = new File(args[0]); queryFile = new File(args[1]); } catch (Exception e) { new HelpFormatter().printHelp("seqmatch <refseqs | trainee_file_or_dir> <query_file>\n" + " trainee_file_or_dir is a single trainee file or a directory containing multiple trainee files", options); System.err.println("Error: " + e.getMessage()); return; } SeqMatch seqmatch = null; if (refFile.isDirectory()) { // a directory of trainee files List<SeqMatch> engineList = new ArrayList<SeqMatch>(); for (File f : refFile.listFiles()) { if (!f.isHidden()) { TwowaySeqMatch match = new TwowaySeqMatch(new SeqMatchEngine(new StorageTrainee(f))); engineList.add(match); } } seqmatch = new MultiTraineeSeqMatch(engineList); } else { // a single fasta file or trainee file if (SeqUtils.guessFileFormat(refFile) == SequenceFormat.UNKNOWN) { seqmatch = CLISeqMatchFactory.trainTwowaySeqMatch(new StorageTrainee(refFile)); } else { seqmatch = CreateMultiMatchFromFile.getMultiMatch(refFile); } } out.println("query name\tmatch seq\torientation\tS_ab score\tunique oligomers\tdescription"); SeqReader reader = new SequenceReader(queryFile); Sequence seq; while ((seq = reader.readNextSequence()) != null) { SeqMatchResultSet resultSet = seqmatch.match(seq, knn); for (SeqMatchResult result : resultSet) { char r = '+'; if (result.isReverse()) { r = '-'; } if (result.getScore() > minSab) { out.println(seq.getSeqName() + "\t" + result.getSeqName() + "\t" + r + "\t" + result.getScore() + "\t" + resultSet.getQueryWordCount() + "\t" + descMap.get(result.getSeqName())); } } } out.close(); } else { throw new IllegalArgumentException("USAGE: SeqMatchMain [train|seqmatch] <args>"); } }
From source file:edu.msu.cme.rdp.probematch.cli.SliceToPrimer.java
public static void main(String[] args) throws Exception { //args = "--fedit-dist 4 --redit-dist=4 -k --max-length=400 --min-length=280 -o java_sliced_edit4.fasta TGCGAYCCSAARGCBGACTC ATSGCCATCATYTCRCCGGA /scratch/fishjord/tae_kwon_primer_match/all_genomes.fasta".split(" "); PatternBitMask64[] fprimers;// w w w . j av a 2s. co m String[] fprimerStrs, rprimerStrs; PatternBitMask64[] rprimers; FastaWriter seqOut; PrintStream statsOut; int fEdit = 3; int rEdit = 3; int minLength = Integer.MIN_VALUE; int maxLength = Integer.MAX_VALUE; boolean allowAmbiguities = true; boolean keepPrimers = false; SequenceReader inSeqs; try { CommandLine line = new PosixParser().parse(options, args); if (line.hasOption("edit-dist")) { fEdit = rEdit = Integer.parseInt(line.getOptionValue("edit-dist")); if (line.hasOption("redit-dist") || line.hasOption("fedit-dist")) { throw new Exception("edit-dist, [fedit-dist, redit-dist] are mutually exclusive"); } } if (line.hasOption("fedit-dist")) { fEdit = Integer.parseInt(line.getOptionValue("fedit-dist")); } if (line.hasOption("no-ambiguities")) { allowAmbiguities = false; } if (line.hasOption("keep-primers")) { keepPrimers = true; } if (line.hasOption("redit-dist")) { rEdit = Integer.parseInt(line.getOptionValue("redit-dist")); } if (line.hasOption("seq-out")) { seqOut = new FastaWriter(new File(line.getOptionValue("seq-out"))); } else { throw new Exception("Must specify seq-out"); } if (line.hasOption("stats-out")) { statsOut = new PrintStream(new File(line.getOptionValue("stats-out"))); } else { statsOut = System.out; } if (line.hasOption("min-length")) { minLength = Integer.parseInt(line.getOptionValue("min-length")); } if (line.hasOption("max-length")) { maxLength = Integer.parseInt(line.getOptionValue("max-length")); } args = line.getArgs(); if (args.length != 3) { throw new Exception("Unexpected number of command line arguments"); } fprimers = translateStringPrimers(args[0].split(","), allowAmbiguities, false); fprimerStrs = args[0].split(","); rprimers = translateStringPrimers(args[1].split(","), allowAmbiguities, true); rprimerStrs = args[1].split(","); inSeqs = new SequenceReader(new File(args[2])); } catch (Exception e) { new HelpFormatter().printHelp("SliceToPrimer [options] <f,p,r,i,m,e,r> <r,p,r,i,m,e,r> <in_seq_file>", options); System.err.println("ERROR: " + e.getMessage()); return; } Sequence seq; statsOut.println( "orig_seqid\tsliced_seqid\tfprimer\tstart\tend\tscore\trprimer\tstart\tend\tscore\tlength"); ScoringMatrix sccoringMatrix = ScoringMatrix.getDefaultNuclMatrix(); DPMAligner[] faligners = new DPMAligner[fprimers.length]; for (int index = 0; index < faligners.length; index++) { faligners[index] = new DPMAligner(fprimerStrs[index], Integer.MAX_VALUE); } try { while ((seq = inSeqs.readNextSequence()) != null) { Set<PrimerMatch> fprimerMatches = new HashSet(); Set<PrimerMatch> rprimerMatches = new HashSet(); for (int index = 0; index < fprimers.length; index++) { PatternBitMask64 primer = fprimers[index]; for (BitVector64Match r : BitVector64.process(seq.getSeqString().toCharArray(), primer, fEdit) .getResults()) { PrimerMatch match = new PrimerMatch(); match.start = r.getPosition() - (primer.getPatternLength() + r.getScore()); match.end = r.getPosition(); match.score = r.getScore(); match.primerIndex = index; fprimerMatches.add(match); } } for (int index = 0; index < rprimers.length; index++) { PatternBitMask64 primer = rprimers[index]; for (BitVector64Match r : BitVector64.process(seq.getSeqString().toCharArray(), primer, rEdit) .getResults()) { PrimerMatch match = new PrimerMatch(); match.start = r.getPosition() - (primer.getPatternLength() + r.getScore()); match.end = r.getPosition(); match.score = r.getScore(); match.primerIndex = index; rprimerMatches.add(match); } } if (fprimerMatches.isEmpty() || rprimerMatches.isEmpty()) { statsOut.println(seq.getSeqName() + "\tEither/or no forward/reverse primer hits"); continue; } for (PrimerMatch fmatch : fprimerMatches) { PrimerMatch bestReverse = null; int bestScore = Integer.MAX_VALUE; for (PrimerMatch rmatch : rprimerMatches) { if (rmatch.start > fmatch.end && rmatch.start - fmatch.end < bestScore) { bestReverse = rmatch; bestScore = rmatch.start - fmatch.end; } } if (bestReverse == null) { statsOut.println(seq.getSeqName() + "\tNo reverse primer before " + fmatch.end); continue; } String slicedSeq = null; if (keepPrimers) { slicedSeq = seq.getSeqString().substring(fmatch.start, bestReverse.end); } else { slicedSeq = seq.getSeqString().substring(fmatch.end, bestReverse.start); } String seqid = seq.getSeqName() + "_" + fmatch.primerIndex + "_" + fmatch.start; if (slicedSeq.length() > minLength && slicedSeq.length() < maxLength) { seqOut.writeSeq(seqid, "", slicedSeq); } DPMAlignment seqs = faligners[fmatch.primerIndex] .align(seq.getSeqString().substring(fmatch.start, fmatch.end)); System.err.println(">" + seqid); System.err.println(fprimerStrs[fmatch.primerIndex]); System.err.println(seq.getSeqString().substring(fmatch.start, fmatch.end)); System.err.println(); System.err.println(seqs.getAlignedMatchFragment()); System.err.println(seqs.getAlignedProbe()); System.err.println(); statsOut.println(seq.getSeqName() + "\t" + seqid + "\t" + fmatch.primerIndex + "\t" + fmatch.start + "\t" + fmatch.end + "\t" + fmatch.score + "\t" + bestReverse.primerIndex + "\t" + bestReverse.start + "\t" + bestReverse.end + "\t" + bestReverse.score + "\t" + slicedSeq.length()); } } } catch (Exception e) { e.printStackTrace(); } finally { statsOut.close(); seqOut.close(); } }
From source file:com.genentech.struchk.oeStruchk.OEStruchk.java
/** * Command line interface to {@link OEStruchk}. *///from w w w . ja va 2 s.c om public static void main(String[] args) throws ParseException, JDOMException, IOException { long start = System.currentTimeMillis(); int nMessages = 0; int nErrors = 0; int nStruct = 0; System.err.printf("OEChem Version: %s\n", oechem.OEChemGetVersion()); // create command line Options object Options options = new Options(); Option opt = new Option("f", true, "specify the configuration file name"); opt.setRequired(false); options.addOption(opt); opt = new Option("noMsg", false, "Do not add any additional sd-tags to the sdf file"); options.addOption(opt); opt = new Option("printRules", true, "Print HTML listing all the rules to filename."); options.addOption(opt); opt = new Option("errorsAsWarnings", false, "Treat errors as warnings."); options.addOption(opt); opt = new Option("stopForDebug", false, "Stop and read from stdin for user tu start debugger."); options.addOption(opt); CommandLineParser parser = new PosixParser(); CommandLine cmd = parser.parse(options, args); args = cmd.getArgs(); if (cmd.hasOption("stopForDebug")) { BufferedReader localRdr = new BufferedReader(new InputStreamReader(System.in)); System.err.print("Please press return:"); localRdr.readLine(); } URL confFile; if (cmd.hasOption("f")) { confFile = new File(cmd.getOptionValue("f")).toURI().toURL(); } else { confFile = getResourceURL(OEStruchk.class, "Struchk.xml"); } boolean errorsAsWarnings = cmd.hasOption("errorsAsWarnings"); if (cmd.hasOption("printRules")) { String fName = cmd.getOptionValue("printRules"); PrintStream out = new PrintStream(new BufferedOutputStream(new FileOutputStream(fName))); OEStruchk structFlagAssigner = new OEStruchk(confFile, CHECKConfig.ASSIGNStructFlag, errorsAsWarnings); structFlagAssigner.printRules(out); out.close(); return; } if (args.length < 1) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("oeStruck", options); throw new Error("missing input file\n"); } BufferedReader in = null; try { in = new BufferedReader(new FileReader(args[0])); StringBuilder mol = new StringBuilder(); StringBuilder data = new StringBuilder(); PrintStream out = System.out; if (args.length > 1) out = new PrintStream(args[1]); // create OEStruchk from config file OEStruchk structFlagAssigner = new OEStruchk(confFile, CHECKConfig.ASSIGNStructFlag, errorsAsWarnings); OEStruchk structFlagChecker = new OEStruchk(confFile, CHECKConfig.CHECKStructFlag, errorsAsWarnings); Pattern sFlagPat = Pattern.compile("<StructFlag>\\s*([^\\n\\r]+)"); String line; boolean inMolFile = true; boolean atEnd = false; while (!atEnd) { if ((line = in.readLine()) == null) { if ("".equals(mol.toString().trim())) break; if (!inMolFile) throw new Error("Invalid end of sd file!"); line = "$$$$"; atEnd = true; } if (line.startsWith("$$$$")) { OEStruchk oeStruchk; StructureFlag sFlag = null; Matcher mat = sFlagPat.matcher(data); if (!mat.find()) { oeStruchk = structFlagAssigner; } else { oeStruchk = structFlagChecker; sFlag = StructureFlag.fromString(mat.group(1)); } if (!oeStruchk.applyRules(mol.toString(), null, sFlag)) nErrors++; out.print(oeStruchk.getTransformedMolfile(null)); out.print(data); if (!cmd.hasOption("noMsg")) { List<Message> msgs = oeStruchk.getStructureMessages(null); if (msgs.size() > 0) { nMessages += msgs.size(); out.println("> <errors_oe2>"); for (Message msg : msgs) out.printf("%s: %s\n", msg.getLevel(), msg.getText()); out.println(); } //System.err.println(oeStruchk.getTransformedMolfile("substance")); out.printf("> <outStereo>\n%s\n\n", oeStruchk.getStructureFlag().getName()); out.printf("> <TISM>\n%s\n\n", oeStruchk.getTransformedIsoSmiles(null)); out.printf("> <TSMI>\n%s\n\n", oeStruchk.getTransformedSmiles(null)); out.printf("> <pISM>\n%s\n\n", oeStruchk.getTransformedIsoSmiles("parent")); out.printf("> <salt>\n%s\n\n", oeStruchk.getSaltCode()); out.printf("> <stereoCounts>\n%s.%s\n\n", oeStruchk.countChiralCentersStr(), oeStruchk.countStereoDBondStr()); } out.println(line); nStruct++; mol.setLength(0); data.setLength(0); inMolFile = true; } else if (!inMolFile || line.startsWith(">")) { inMolFile = false; data.append(line).append("\n"); } else { mol.append(line).append("\n"); } } structFlagAssigner.delete(); structFlagChecker.delete(); } catch (Exception e) { throw new Error(e); } finally { System.err.printf("Checked %d structures %d errors, %d messages in %dsec\n", nStruct, nErrors, nMessages, (System.currentTimeMillis() - start) / 1000); if (in != null) in.close(); } if (cmd.hasOption("stopForDebug")) { BufferedReader localRdr = new BufferedReader(new InputStreamReader(System.in)); System.err.print("Please press return:"); localRdr.readLine(); } }
From source file:net.massbank.validator.RecordValidator.java
public static void main(String[] args) { RequestDummy request;//from ww w. ja v a2s . c o m PrintStream out = System.out; Options lvOptions = new Options(); lvOptions.addOption("h", "help", false, "show this help."); lvOptions.addOption("r", "recdata", true, "points to the recdata directory containing massbank records. Reads all *.txt files in there."); CommandLineParser lvParser = new BasicParser(); CommandLine lvCmd = null; try { lvCmd = lvParser.parse(lvOptions, args); if (lvCmd.hasOption('h')) { printHelp(lvOptions); return; } } catch (org.apache.commons.cli.ParseException pvException) { System.out.println(pvException.getMessage()); } String recDataPath = lvCmd.getOptionValue("recdata"); // --------------------------------------------- // ???? // --------------------------------------------- final String baseUrl = MassBankEnv.get(MassBankEnv.KEY_BASE_URL); final String dbRootPath = "./"; final String dbHostName = MassBankEnv.get(MassBankEnv.KEY_DB_HOST_NAME); final String tomcatTmpPath = "."; final String tmpPath = (new File(tomcatTmpPath + sdf.format(new Date()))).getPath() + File.separator; GetConfig conf = new GetConfig(baseUrl); int recVersion = 2; String selDbName = ""; Object up = null; // Was: file Upload boolean isResult = true; String upFileName = ""; boolean upResult = false; DatabaseAccess db = null; try { // ---------------------------------------------------- // ??? // ---------------------------------------------------- // if (FileUpload.isMultipartContent(request)) { // (new File(tmpPath)).mkdir(); // String os = System.getProperty("os.name"); // if (os.indexOf("Windows") == -1) { // isResult = FileUtil.changeMode("777", tmpPath); // if (!isResult) { // out.println(msgErr("[" + tmpPath // + "] chmod failed.")); // return; // } // } // up = new FileUpload(request, tmpPath); // } // ---------------------------------------------------- // ?DB???? // ---------------------------------------------------- List<String> dbNameList = Arrays.asList(conf.getDbName()); ArrayList<String> dbNames = new ArrayList<String>(); dbNames.add(""); File[] dbDirs = (new File(dbRootPath)).listFiles(); if (dbDirs != null) { for (File dbDir : dbDirs) { if (dbDir.isDirectory()) { int pos = dbDir.getName().lastIndexOf("\\"); String dbDirName = dbDir.getName().substring(pos + 1); pos = dbDirName.lastIndexOf("/"); dbDirName = dbDirName.substring(pos + 1); if (dbNameList.contains(dbDirName)) { // DB???massbank.conf???DB???? dbNames.add(dbDirName); } } } } if (dbDirs == null || dbNames.size() == 0) { out.println(msgErr("[" + dbRootPath + "] directory not exist.")); return; } Collections.sort(dbNames); // ---------------------------------------------------- // ? // ---------------------------------------------------- // if (FileUpload.isMultipartContent(request)) { // HashMap<String, String[]> reqParamMap = new HashMap<String, // String[]>(); // reqParamMap = up.getRequestParam(); // if (reqParamMap != null) { // for (Map.Entry<String, String[]> req : reqParamMap // .entrySet()) { // if (req.getKey().equals("ver")) { // try { // recVersion = Integer // .parseInt(req.getValue()[0]); // } catch (NumberFormatException nfe) { // } // } else if (req.getKey().equals("db")) { // selDbName = req.getValue()[0]; // } // } // } // } else { // if (request.getParameter("ver") != null) { // try { // recVersion = Integer.parseInt(request // .getParameter("ver")); // } catch (NumberFormatException nfe) { // } // } // selDbName = request.getParameter("db"); // } // if (selDbName == null || selDbName.equals("") // || !dbNames.contains(selDbName)) { // selDbName = dbNames.get(0); // } // --------------------------------------------- // // --------------------------------------------- out.println("Database: "); for (int i = 0; i < dbNames.size(); i++) { String dbName = dbNames.get(i); out.print("dbName"); if (dbName.equals(selDbName)) { out.print(" selected"); } if (i == 0) { out.println("------------------"); } else { out.println(dbName); } } out.println("Record Version : "); out.println(recVersion); out.println("Record Archive :"); // --------------------------------------------- // // --------------------------------------------- // HashMap<String, Boolean> upFileMap = up.doUpload(); // if (upFileMap != null) { // for (Map.Entry<String, Boolean> e : upFileMap.entrySet()) { // upFileName = e.getKey(); // upResult = e.getValue(); // break; // } // if (upFileName.equals("")) { // out.println(msgErr("please select file.")); // isResult = false; // } else if (!upResult) { // out.println(msgErr("[" + upFileName // + "] upload failed.")); // isResult = false; // } else if (!upFileName.endsWith(ZIP_EXTENSION) // && !upFileName.endsWith(MSBK_EXTENSION)) { // out.println(msgErr("please select [" // + UPLOAD_RECDATA_ZIP // + "] or [" // + UPLOAD_RECDATA_MSBK + "].")); // up.deleteFile(upFileName); // isResult = false; // } // } else { // out.println(msgErr("server error.")); // isResult = false; // } // up.deleteFileItem(); // if (!isResult) { // return; // } // --------------------------------------------- // ??? // --------------------------------------------- // final String upFilePath = (new File(tmpPath + File.separator // + upFileName)).getPath(); // isResult = FileUtil.unZip(upFilePath, tmpPath); // if (!isResult) { // out.println(msgErr("[" // + upFileName // + "] extraction failed. possibility of time-out.")); // return; // } // --------------------------------------------- // ?? // --------------------------------------------- final String recPath = (new File(dbRootPath + File.separator + selDbName)).getPath(); File tmpRecDir = new File(recDataPath); if (!tmpRecDir.isDirectory()) { tmpRecDir.mkdirs(); } // --------------------------------------------- // ??? // --------------------------------------------- // data? // final String recDataPath = (new File(tmpPath + File.separator // + RECDATA_DIR_NAME)).getPath() // + File.separator; // // if (!(new File(recDataPath)).isDirectory()) { // if (upFileName.endsWith(ZIP_EXTENSION)) { // out.println(msgErr("[" // + RECDATA_DIR_NAME // + "] directory is not included in the up-loading file.")); // } else if (upFileName.endsWith(MSBK_EXTENSION)) { // out.println(msgErr("The uploaded file is not record data.")); // } // return; // } // --------------------------------------------- // DB // --------------------------------------------- // db = new DatabaseAccess(dbHostName, selDbName); // isResult = db.open(); // if (!isResult) { // db.close(); // out.println(msgErr("not connect to database.")); // return; // } // --------------------------------------------- // ?? // --------------------------------------------- TreeMap<String, String> resultMap = validationRecord(db, out, recDataPath, recPath, recVersion); if (resultMap.size() == 0) { return; } // --------------------------------------------- // ? // --------------------------------------------- isResult = dispResult(out, resultMap); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { if (db != null) { db.close(); } File tmpDir = new File(tmpPath); if (tmpDir.exists()) { FileUtil.removeDir(tmpDir.getPath()); } } }
From source file:edu.msu.cme.rdp.graph.utils.ContigMerger.java
public static void main(String[] args) throws IOException { final BufferedReader hmmgsResultReader; final IndexedSeqReader nuclContigReader; final double minBits; final int minProtLength; final Options options = new Options(); final PrintStream out; final ProfileHMM hmm; final FastaWriter protSeqOut; final FastaWriter nuclSeqOut; final boolean prot; final boolean all; final String shortSampleName; options.addOption("a", "all", false, "Generate all combinations for multiple paths, instead of just the best"); options.addOption("b", "min-bits", true, "Minimum bits score"); options.addOption("l", "min-length", true, "Minimum length"); options.addOption("s", "short_samplename", true, "short sample name, to be used as part of contig identifiers. This allow analyzing contigs together from different samples in downstream analysis "); options.addOption("o", "out", true, "Write output to file instead of stdout"); try {/* w ww.ja va 2s .c o m*/ CommandLine line = new PosixParser().parse(options, args); if (line.hasOption("min-bits")) { minBits = Double.valueOf(line.getOptionValue("min-bits")); } else { minBits = Double.NEGATIVE_INFINITY; } if (line.hasOption("min-length")) { minProtLength = Integer.valueOf(line.getOptionValue("min-length")); } else { minProtLength = 0; } if (line.hasOption("short_samplename")) { shortSampleName = line.getOptionValue("short_samplename") + "_"; } else { shortSampleName = ""; } if (line.hasOption("out")) { out = new PrintStream(line.getOptionValue("out")); } else { out = System.err; } all = line.hasOption("all"); args = line.getArgs(); if (args.length != 3) { throw new Exception("Unexpected number of arguments"); } hmmgsResultReader = new BufferedReader(new FileReader(new File(args[1]))); nuclContigReader = new IndexedSeqReader(new File(args[2])); hmm = HMMER3bParser.readModel(new File(args[0])); prot = (hmm.getAlphabet() == SequenceType.Protein); if (prot) { protSeqOut = new FastaWriter(new File("prot_merged.fasta")); } else { protSeqOut = null; } nuclSeqOut = new FastaWriter(new File("nucl_merged.fasta")); } catch (Exception e) { new HelpFormatter().printHelp("USAGE: ContigMerger [options] <hmm> <hmmgs_file> <nucl_contig>", options); System.err.println("Error: " + e.getMessage()); System.exit(1); throw new RuntimeException("I hate you javac"); } String line; SearchDirection lastDir = SearchDirection.left; //So this has an assumption built in //It depends on hmmgs always outputting left fragments, then right //We can't just use the kmer to figure out if we've switched to another starting point //because we allow multiple starting model pos, so two different starting //positions can have the same starting kmer Map<String, Sequence> leftContigs = new HashMap(); Map<String, Sequence> rightContigs = new HashMap(); int contigsMerged = 0; int writtenMerges = 0; long startTime = System.currentTimeMillis(); String kmer = null; String geneName = null; while ((line = hmmgsResultReader.readLine()) != null) { if (line.startsWith("#")) { continue; } String[] lexemes = line.trim().split("\t"); if (lexemes.length != 12 || lexemes[0].equals("-")) { System.err.println("Skipping line: " + line); continue; } //contig_53493 nirk 1500:6:35:16409:3561/1 ADV15048 tcggcgctctacacgttcctgcagcccggg 40 210 70 left -44.692 184 0 int index = 0; String seqid = lexemes[0]; geneName = lexemes[1]; String readid = lexemes[2]; String refid = lexemes[3]; kmer = lexemes[4]; int modelStart = Integer.valueOf(lexemes[5]); int nuclLength = Integer.valueOf(lexemes[6]); int protLength = Integer.valueOf(lexemes[7]); SearchDirection dir = SearchDirection.valueOf(lexemes[8]); if (dir != lastDir) { if (dir == SearchDirection.left) { List<MergedContig> mergedContigs = all ? mergeAllContigs(leftContigs, rightContigs, kmer, geneName, hmm) : mergeContigs(leftContigs, rightContigs, kmer, geneName, hmm); contigsMerged++; for (MergedContig mc : mergedContigs) { String mergedId = shortSampleName + geneName + "_" + mc.leftContig + "_" + mc.rightContig; out.println(mergedId + "\t" + mc.length + "\t" + mc.score); if (mc.score > minBits && mc.length > minProtLength) { if (prot) { protSeqOut.writeSeq(mergedId, mc.protSeq); } nuclSeqOut.writeSeq(mergedId, mc.nuclSeq); writtenMerges++; } } leftContigs.clear(); rightContigs.clear(); } lastDir = dir; } Sequence seq = nuclContigReader.readSeq(seqid); if (dir == SearchDirection.left) { leftContigs.put(seqid, seq); } else if (dir == SearchDirection.right) { rightContigs.put(seqid, seq); } else { throw new IOException("Cannot handle search direction " + dir); } } if (!leftContigs.isEmpty() || !rightContigs.isEmpty()) { List<MergedContig> mergedContigs = all ? mergeAllContigs(leftContigs, rightContigs, kmer, geneName, hmm) : mergeContigs(leftContigs, rightContigs, kmer, geneName, hmm); for (MergedContig mc : mergedContigs) { String mergedId = shortSampleName + mc.gene + "_" + mc.leftContig + "_" + mc.rightContig; out.println(mergedId + "\t" + mc.length + "\t" + mc.score); contigsMerged++; if (mc.score > minBits && mc.length > minProtLength) { if (prot) { protSeqOut.writeSeq(mergedId, mc.protSeq); } nuclSeqOut.writeSeq(mergedId, mc.nuclSeq); writtenMerges++; } } } out.close(); if (prot) { protSeqOut.close(); } nuclSeqOut.close(); System.err.println("Read in " + contigsMerged + " contigs, wrote out " + writtenMerges + " merged contigs in " + ((double) (System.currentTimeMillis() - startTime) / 1000) + "s"); }