List of usage examples for java.io PrintStream close
public void close()
From source file:com.mapr.synth.Synth.java
public static void main(String[] args) throws IOException, CmdLineException, InterruptedException, ExecutionException { final Options opts = new Options(); CmdLineParser parser = new CmdLineParser(opts); try {/*from ww w.ja v a 2s .c o m*/ parser.parseArgument(args); } catch (CmdLineException e) { System.err.println("Usage: " + "[ -count <number>G|M|K ] " + "-schema schema-file " + "[-quote DOUBLE_QUOTE|BACK_SLASH|OPTIMISTIC] " + "[-format JSON|TSV|CSV|XML ] " + "[-threads n] " + "[-output output-directory-name] "); throw e; } Preconditions.checkArgument(opts.threads > 0 && opts.threads <= 2000, "Must have at least one thread and no more than 2000"); if (opts.threads > 1) { Preconditions.checkArgument(!"-".equals(opts.output), "If more than on thread is used, you have to use -output to set the output directory"); } File outputDir = new File(opts.output); if (!"-".equals(opts.output)) { if (!outputDir.exists()) { Preconditions.checkState(outputDir.mkdirs(), String.format("Couldn't create output directory %s", opts.output)); } Preconditions.checkArgument(outputDir.exists() && outputDir.isDirectory(), String.format("Couldn't create directory %s", opts.output)); } if (opts.schema == null) { throw new IllegalArgumentException("Must specify schema file using [-schema filename] option"); } final SchemaSampler sampler = new SchemaSampler(opts.schema); final AtomicLong rowCount = new AtomicLong(); final List<ReportingWorker> tasks = Lists.newArrayList(); int limit = (opts.count + opts.threads - 1) / opts.threads; int remaining = opts.count; for (int i = 0; i < opts.threads; i++) { final int count = Math.min(limit, remaining); remaining -= count; tasks.add(new ReportingWorker(opts, sampler, rowCount, count, i)); } final double t0 = System.nanoTime() * 1e-9; ExecutorService pool = Executors.newFixedThreadPool(opts.threads); ScheduledExecutorService blinker = Executors.newScheduledThreadPool(1); final AtomicBoolean finalRun = new AtomicBoolean(false); final PrintStream sideLog = new PrintStream(new FileOutputStream("side-log")); Runnable blink = new Runnable() { public double oldT; private long oldN; @Override public void run() { double t = System.nanoTime() * 1e-9; long n = rowCount.get(); System.err.printf("%s\t%d\t%.1f\t%d\t%.1f\t%.3f\n", finalRun.get() ? "F" : "R", opts.threads, t - t0, n, n / (t - t0), (n - oldN) / (t - oldT)); for (ReportingWorker task : tasks) { ReportingWorker.ThreadReport r = task.report(); sideLog.printf("\t%d\t%.2f\t%.2f\t%.2f\t%.1f\t%.1f\n", r.fileNumber, r.threadTime, r.userTime, r.wallTime, r.rows / r.threadTime, r.rows / r.wallTime); } oldN = n; oldT = t; } }; if (!"-".equals(opts.output)) { blinker.scheduleAtFixedRate(blink, 0, 10, TimeUnit.SECONDS); } List<Future<Integer>> results = pool.invokeAll(tasks); int total = 0; for (Future<Integer> result : results) { total += result.get(); } Preconditions.checkState(total == opts.count, String .format("Expected to generate %d lines of output, but actually generated %d", opts.count, total)); pool.shutdownNow(); blinker.shutdownNow(); finalRun.set(true); sideLog.close(); blink.run(); }
From source file:it.unimi.dsi.webgraph.algo.HyperBall.java
public static void main(String arg[]) throws IOException, JSAPException, IllegalArgumentException, ClassNotFoundException, IllegalAccessException, InvocationTargetException, InstantiationException, NoSuchMethodException { SimpleJSAP jsap = new SimpleJSAP(HyperBall.class.getName(), "Runs HyperBall on the given graph, possibly computing positive geometric centralities.\n\nPlease note that to compute negative centralities on directed graphs (which is usually what you want) you have to compute positive centralities on the transpose.", new Parameter[] { new FlaggedOption("log2m", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'l', "log2m", "The logarithm of the number of registers."), new FlaggedOption("upperBound", JSAP.LONGSIZE_PARSER, Long.toString(Long.MAX_VALUE), JSAP.NOT_REQUIRED, 'u', "upper-bound", "An upper bound to the number of iterations."), new FlaggedOption("threshold", JSAP.DOUBLE_PARSER, "-1", JSAP.NOT_REQUIRED, 't', "threshold", "A threshold that will be used to stop the computation by relative increment. If it is -1, the iteration will stop only when all registers do not change their value (recommended)."), new FlaggedOption("threads", JSAP.INTSIZE_PARSER, "0", JSAP.NOT_REQUIRED, 'T', "threads", "The number of threads to be used. If 0, the number will be estimated automatically."), new FlaggedOption("granularity", JSAP.INTSIZE_PARSER, Integer.toString(DEFAULT_GRANULARITY), JSAP.NOT_REQUIRED, 'g', "granularity", "The number of node per task in a multicore environment."), new FlaggedOption("bufferSize", JSAP.INTSIZE_PARSER, Util.formatBinarySize(DEFAULT_BUFFER_SIZE), JSAP.NOT_REQUIRED, 'b', "buffer-size", "The size of an I/O buffer in bytes."), new FlaggedOption("neighbourhoodFunction", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'n', "neighbourhood-function", "Store an approximation the neighbourhood function in text format."), new FlaggedOption("sumOfDistances", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'd', "sum-of-distances", "Store an approximation of the sum of distances from each node as a binary list of floats."), new FlaggedOption("harmonicCentrality", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'h', "harmonic-centrality", "Store an approximation of the positive harmonic centrality (the sum of the reciprocals of distances from each node) as a binary list of floats."), new FlaggedOption("discountedGainCentrality", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'z', "discounted-gain-centrality", "A positive discounted gain centrality to be approximated and stored; it is specified as O:F where O is the spec of an object of type Int2DoubleFunction and F is the name of the file where the binary list of floats will be stored. The spec can be either the name of a public field of HyperBall, or a constructor invocation of a class implementing Int2DoubleFunction.") .setAllowMultipleDeclarations(true), new FlaggedOption("closenessCentrality", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'c', "closeness-centrality", "Store an approximation of the positive closeness centrality of each node (the reciprocal of sum of the distances from each node) as a binary list of floats. Terminal nodes will have centrality equal to zero."), new FlaggedOption("linCentrality", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'L', "lin-centrality", "Store an approximation of the positive Lin centrality of each node (the reciprocal of sum of the distances from each node multiplied by the square of the number of nodes reachable from the node) as a binary list of floats. Terminal nodes will have centrality equal to one."), new FlaggedOption("nieminenCentrality", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'N', "nieminen-centrality", "Store an approximation of the positive Nieminen centrality of each node (the square of the number of nodes reachable from each node minus the sum of the distances from the node) as a binary list of floats."), new FlaggedOption("reachable", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'r', "reachable", "Store an approximation of the number of nodes reachable from each node as a binary list of floats."), new FlaggedOption("seed", JSAP.LONG_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'S', "seed", "The random seed."), new Switch("spec", 's', "spec", "The basename is not a basename but rather a specification of the form <ImmutableGraphImplementation>(arg,arg,...)."), new Switch("offline", 'o', "offline", "Do not load the graph in main memory. If this option is used, the graph will be loaded in offline (for one thread) or mapped (for several threads) mode."), new Switch("external", 'e', "external", "Use an external dump file instead of core memory to store new counter values. Note that the file might be very large: you might need to set suitably the Java temporary directory (-Djava.io.tmpdir=DIR)."), new UnflaggedOption("basename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the graph."), new UnflaggedOption("basenamet", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, JSAP.NOT_GREEDY, "The basename of the transpose graph for systolic computations (strongly suggested). If it is equal to <basename>, the graph will be assumed to be symmetric and will be loaded just once."), }); JSAPResult jsapResult = jsap.parse(arg); if (jsap.messagePrinted()) System.exit(1);//from w ww. j a v a 2 s . c om final boolean spec = jsapResult.getBoolean("spec"); final boolean external = jsapResult.getBoolean("external"); final boolean offline = jsapResult.getBoolean("offline"); final String neighbourhoodFunctionFile = jsapResult.getString("neighbourhoodFunction"); final boolean neighbourhoodFunction = jsapResult.userSpecified("neighbourhoodFunction"); final String sumOfDistancesFile = jsapResult.getString("sumOfDistances"); final boolean sumOfDistances = jsapResult.userSpecified("sumOfDistances"); final String harmonicCentralityFile = jsapResult.getString("harmonicCentrality"); final boolean harmonicCentrality = jsapResult.userSpecified("harmonicCentrality"); final String closenessCentralityFile = jsapResult.getString("closenessCentrality"); final boolean closenessCentrality = jsapResult.userSpecified("closenessCentrality"); final String linCentralityFile = jsapResult.getString("linCentrality"); final boolean linCentrality = jsapResult.userSpecified("linCentrality"); final String nieminenCentralityFile = jsapResult.getString("nieminenCentrality"); final boolean nieminenCentrality = jsapResult.userSpecified("nieminenCentrality"); final String reachableFile = jsapResult.getString("reachable"); final boolean reachable = jsapResult.userSpecified("reachable"); final String basename = jsapResult.getString("basename"); final String basenamet = jsapResult.getString("basenamet"); final ProgressLogger pl = new ProgressLogger(LOGGER); final int log2m = jsapResult.getInt("log2m"); final int threads = jsapResult.getInt("threads"); final int bufferSize = jsapResult.getInt("bufferSize"); final int granularity = jsapResult.getInt("granularity"); final long seed = jsapResult.userSpecified("seed") ? jsapResult.getLong("seed") : Util.randomSeed(); final String[] discountedGainCentralitySpec = jsapResult.getStringArray("discountedGainCentrality"); final Int2DoubleFunction[] discountFunction = new Int2DoubleFunction[discountedGainCentralitySpec.length]; final String[] discountedGainCentralityFile = new String[discountedGainCentralitySpec.length]; for (int i = 0; i < discountedGainCentralitySpec.length; i++) { int pos = discountedGainCentralitySpec[i].indexOf(':'); if (pos < 0) throw new IllegalArgumentException("Wrong spec <" + discountedGainCentralitySpec[i] + ">"); discountedGainCentralityFile[i] = discountedGainCentralitySpec[i].substring(pos + 1); String gainSpec = discountedGainCentralitySpec[i].substring(0, pos); Int2DoubleFunction candidateFunction; try { candidateFunction = (Int2DoubleFunction) HyperBall.class.getField(gainSpec).get(null); } catch (SecurityException e) { throw new IllegalArgumentException("Field " + gainSpec + " exists but cannot be accessed", e); } catch (ClassCastException e) { throw new IllegalArgumentException( "Field " + gainSpec + " exists but it is not of type Int2DoubleFunction", e); } catch (NoSuchFieldException e) { candidateFunction = null; } discountFunction[i] = candidateFunction == null ? ObjectParser.fromSpec(gainSpec, Int2DoubleFunction.class) : candidateFunction; } final ImmutableGraph graph = spec ? ObjectParser.fromSpec(basename, ImmutableGraph.class, GraphClassParser.PACKAGE) : offline ? ((numberOfThreads(threads) == 1 && basenamet == null ? ImmutableGraph.loadOffline(basename) : ImmutableGraph.loadMapped(basename, new ProgressLogger()))) : ImmutableGraph.load(basename, new ProgressLogger()); final ImmutableGraph grapht = basenamet == null ? null : basenamet.equals(basename) ? graph : spec ? ObjectParser.fromSpec(basenamet, ImmutableGraph.class, GraphClassParser.PACKAGE) : offline ? ImmutableGraph.loadMapped(basenamet, new ProgressLogger()) : ImmutableGraph.load(basenamet, new ProgressLogger()); final HyperBall hyperBall = new HyperBall(graph, grapht, log2m, pl, threads, bufferSize, granularity, external, sumOfDistances || closenessCentrality || linCentrality || nieminenCentrality, harmonicCentrality, discountFunction, seed); hyperBall.run(jsapResult.getLong("upperBound"), jsapResult.getDouble("threshold")); hyperBall.close(); if (neighbourhoodFunction) { final PrintStream stream = new PrintStream( new FastBufferedOutputStream(new FileOutputStream(neighbourhoodFunctionFile))); for (DoubleIterator i = hyperBall.neighbourhoodFunction.iterator(); i.hasNext();) stream.println(BigDecimal.valueOf(i.nextDouble()).toPlainString()); stream.close(); } if (sumOfDistances) BinIO.storeFloats(hyperBall.sumOfDistances, sumOfDistancesFile); if (harmonicCentrality) BinIO.storeFloats(hyperBall.sumOfInverseDistances, harmonicCentralityFile); for (int i = 0; i < discountedGainCentralitySpec.length; i++) BinIO.storeFloats(hyperBall.discountedCentrality[i], discountedGainCentralityFile[i]); if (closenessCentrality) { final int n = graph.numNodes(); final DataOutputStream dos = new DataOutputStream( new FastBufferedOutputStream(new FileOutputStream(closenessCentralityFile))); for (int i = 0; i < n; i++) dos.writeFloat(hyperBall.sumOfDistances[i] == 0 ? 0 : 1 / hyperBall.sumOfDistances[i]); dos.close(); } if (linCentrality) { final int n = graph.numNodes(); final DataOutputStream dos = new DataOutputStream( new FastBufferedOutputStream(new FileOutputStream(linCentralityFile))); for (int i = 0; i < n; i++) { // Lin's index for isolated nodes is by (our) definition one (it's smaller than any other node). if (hyperBall.sumOfDistances[i] == 0) dos.writeFloat(1); else { final double count = hyperBall.count(i); dos.writeFloat((float) (count * count / hyperBall.sumOfDistances[i])); } } dos.close(); } if (nieminenCentrality) { final int n = graph.numNodes(); final DataOutputStream dos = new DataOutputStream( new FastBufferedOutputStream(new FileOutputStream(nieminenCentralityFile))); for (int i = 0; i < n; i++) { final double count = hyperBall.count(i); dos.writeFloat((float) (count * count - hyperBall.sumOfDistances[i])); } dos.close(); } if (reachable) { final int n = graph.numNodes(); final DataOutputStream dos = new DataOutputStream( new FastBufferedOutputStream(new FileOutputStream(reachableFile))); for (int i = 0; i < n; i++) dos.writeFloat((float) hyperBall.count(i)); dos.close(); } }
From source file:io.anserini.search.SearchTweets.java
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(new Option(RM3_OPTION, "apply relevance feedback with rm3")); options.addOption(//from www. ja v a 2 s. c o m OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return") .create(NUM_RESULTS_OPTION)); options.addOption(OptionBuilder.withArgName("file").hasArg() .withDescription("file containing topics in TREC format").create(QUERIES_OPTION)); options.addOption(OptionBuilder.withArgName("similarity").hasArg() .withDescription("similarity to use (BM25, LM)").create(SIMILARITY_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(QUERIES_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(SearchTweets.class.getName(), options); System.exit(-1); } File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION)); if (!indexLocation.exists()) { System.err.println("Error: " + indexLocation + " does not exist!"); System.exit(-1); } String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG; String topicsFile = cmdline.getOptionValue(QUERIES_OPTION); int numResults = 1000; try { if (cmdline.hasOption(NUM_RESULTS_OPTION)) { numResults = Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION)); } } catch (NumberFormatException e) { System.err.println("Invalid " + NUM_RESULTS_OPTION + ": " + cmdline.getOptionValue(NUM_RESULTS_OPTION)); System.exit(-1); } String similarity = "LM"; if (cmdline.hasOption(SIMILARITY_OPTION)) { similarity = cmdline.getOptionValue(SIMILARITY_OPTION); } PrintStream out = new PrintStream(System.out, true, "UTF-8"); IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexLocation.getAbsolutePath()))); IndexSearcher searcher = new IndexSearcher(reader); if (similarity.equalsIgnoreCase("BM25")) { searcher.setSimilarity(new BM25Similarity()); } else if (similarity.equalsIgnoreCase("LM")) { searcher.setSimilarity(new LMDirichletSimilarity(2500.0f)); } MicroblogTopicSet topics = MicroblogTopicSet.fromFile(new File(topicsFile)); for (MicroblogTopic topic : topics) { Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, topic.getQueryTweetTime(), true, true); Query query = AnalyzerUtils.buildBagOfWordsQuery(StatusField.TEXT.name, IndexTweets.ANALYZER, topic.getQuery()); TopDocs rs = searcher.search(query, filter, numResults); RerankerContext context = new RerankerContext(searcher, query, topic.getQuery(), filter); RerankerCascade cascade = new RerankerCascade(context); if (cmdline.hasOption(RM3_OPTION)) { cascade.add(new Rm3Reranker(IndexTweets.ANALYZER, StatusField.TEXT.name)); cascade.add(new RemoveRetweetsTemporalTiebreakReranker()); } else { cascade.add(new RemoveRetweetsTemporalTiebreakReranker()); } ScoredDocuments docs = cascade.run(ScoredDocuments.fromTopDocs(rs, searcher)); for (int i = 0; i < docs.documents.length; i++) { String qid = topic.getId().replaceFirst("^MB0*", ""); out.println(String.format("%s Q0 %s %d %f %s", qid, docs.documents[i].getField(StatusField.ID.name).numericValue(), (i + 1), docs.scores[i], runtag)); } } reader.close(); out.close(); }
From source file:io.bfscan.clueweb12.BuildWarcTrecIdMapping.java
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("bz2 Wikipedia XML dump file") .create(INPUT_OPTION));/*from w w w . ja va 2s . com*/ options.addOption( OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg() .withDescription("maximum number of documents to index").create(MAX_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of indexing threads") .create(THREADS_OPTION)); options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment")); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(BuildWarcTrecIdMapping.class.getCanonicalName(), options); System.exit(-1); } String indexPath = cmdline.getOptionValue(INDEX_OPTION); int maxdocs = cmdline.hasOption(MAX_OPTION) ? Integer.parseInt(cmdline.getOptionValue(MAX_OPTION)) : Integer.MAX_VALUE; int threads = cmdline.hasOption(THREADS_OPTION) ? Integer.parseInt(cmdline.getOptionValue(THREADS_OPTION)) : DEFAULT_NUM_THREADS; long startTime = System.currentTimeMillis(); String path = cmdline.getOptionValue(INPUT_OPTION); PrintStream out = new PrintStream(System.out, true, "UTF-8"); Directory dir = FSDirectory.open(new File(indexPath)); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, ANALYZER); config.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, config); LOG.info("Creating index at " + indexPath); LOG.info("Indexing with " + threads + " threads"); FileInputStream fis = null; BufferedReader br = null; try { fis = new FileInputStream(new File(path)); byte[] ignoreBytes = new byte[2]; fis.read(ignoreBytes); // "B", "Z" bytes from commandline tools br = new BufferedReader(new InputStreamReader(new CBZip2InputStream(fis), "UTF8")); ExecutorService executor = Executors.newFixedThreadPool(threads); int cnt = 0; String s; while ((s = br.readLine()) != null) { Runnable worker = new AddDocumentRunnable(writer, s); executor.execute(worker); cnt++; if (cnt % 1000000 == 0) { LOG.info(cnt + " articles added"); } if (cnt >= maxdocs) { break; } } executor.shutdown(); // Wait until all threads are finish while (!executor.isTerminated()) { } LOG.info("Total of " + cnt + " articles indexed."); if (cmdline.hasOption(OPTIMIZE_OPTION)) { LOG.info("Merging segments..."); writer.forceMerge(1); LOG.info("Done!"); } LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms"); } catch (Exception e) { e.printStackTrace(); } finally { writer.close(); dir.close(); out.close(); br.close(); fis.close(); } }
From source file:org.apache.ctakes.ytex.kernel.metric.ConceptSimilarityServiceImpl.java
@SuppressWarnings("static-access") public static void main(String args[]) throws IOException { Options options = new Options(); options.addOption(OptionBuilder.withArgName("concepts").hasArg().withDescription( "concept pairs or a file containing concept pairs. To specify pairs on command line, separate concepts by comma, concept pairs by semicolon. For file, separate concepts by comma or tab, each concept pair on a new line.") .isRequired(true).create("concepts")); options.addOption(OptionBuilder.withArgName("metrics").hasArg().withDescription( "comma-separated list of metrics. Valid metrics: " + Arrays.asList(SimilarityMetricEnum.values())) .isRequired(true).create("metrics")); options.addOption(OptionBuilder.withArgName("out").hasArg() .withDescription("file to write oputput to. if not specified, output sent to stdout.") .create("out")); options.addOption(OptionBuilder.withArgName("lcs") .withDescription("output lcs and path for each concept pair").create("lcs")); try {/*from ww w .java 2 s . c o m*/ CommandLineParser parser = new GnuParser(); CommandLine line = parser.parse(options, args); String concepts = line.getOptionValue("concepts"); String metrics = line.getOptionValue("metrics"); String out = line.getOptionValue("out"); boolean lcs = line.hasOption("lcs"); PrintStream os = null; try { if (out != null) { os = new PrintStream(new BufferedOutputStream(new FileOutputStream(out))); } else { os = System.out; } List<ConceptPair> conceptPairs = parseConcepts(concepts); List<SimilarityMetricEnum> metricList = parseMetrics(metrics); ConceptSimilarityService simSvc = SimSvcContextHolder.getApplicationContext() .getBean(ConceptSimilarityService.class); List<SimilarityInfo> simInfos = lcs ? new ArrayList<SimilarityInfo>(conceptPairs.size()) : null; List<ConceptPairSimilarity> conceptSimMap = simSvc.similarity(conceptPairs, metricList, null, lcs); printSimilarities(conceptPairs, conceptSimMap, metricList, simInfos, lcs, os); // try { // Thread.sleep(60*1000); // } catch (InterruptedException e) { // e.printStackTrace(); // } } finally { if (out != null) { try { os.close(); } catch (Exception e) { } } } } catch (ParseException pe) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("java " + ConceptSimilarityServiceImpl.class.getName() + " get concept similiarity", options); } }
From source file:com.basistech.lucene.tools.LuceneQueryTool.java
public static void main(String[] args) throws IOException, org.apache.lucene.queryparser.classic.ParseException { String charsetName = Charset.defaultCharset().name(); if (!"UTF-8".equals(charsetName)) { // Really only a problem on mac, where the default charset is MacRoman, // and it cannot be changed via the system Locale. System.err.println(String.format("defaultCharset is %s, but we require UTF-8.", charsetName)); System.err.println("Set -Dfile.encoding=UTF-8 on the Java command line, or"); System.err.println("set JAVA_TOOL_OPTIONS=-Dfile.encoding=UTF-8 in the environment."); System.exit(1);/*from w ww.j av a 2 s . c om*/ } Options options = LuceneQueryTool.createOptions(); CommandLineParser parser = new GnuParser(); CommandLine cmdline = null; try { cmdline = parser.parse(options, args); validateOptions(options, args); } catch (org.apache.commons.cli.ParseException e) { System.err.println(e.getMessage()); usage(options); System.exit(1); } String[] remaining = cmdline.getArgs(); if (remaining != null && remaining.length > 0) { System.err.println("unknown extra args found: " + Lists.newArrayList(remaining)); usage(options); System.exit(1); } String[] indexPaths = cmdline.getOptionValues("index"); IndexReader[] readers = new IndexReader[indexPaths.length]; for (int i = 0; i < indexPaths.length; i++) { readers[i] = DirectoryReader.open(FSDirectory.open(new File(indexPaths[i]))); } IndexReader reader = new MultiReader(readers, true); LuceneQueryTool that = new LuceneQueryTool(reader); String opt; opt = cmdline.getOptionValue("query-limit"); if (opt != null) { that.setQueryLimit(Integer.parseInt(opt)); } opt = cmdline.getOptionValue("output-limit"); if (opt != null) { that.setOutputLimit(Integer.parseInt(opt)); } opt = cmdline.getOptionValue("analyzer"); if (opt != null) { that.setAnalyzer(opt); } opt = cmdline.getOptionValue("query-field"); if (opt != null) { that.setDefaultField(opt); } opt = cmdline.getOptionValue("output"); PrintStream out = null; if (opt != null) { out = new PrintStream(new FileOutputStream(new File(opt)), true); that.setOutputStream(out); } if (cmdline.hasOption("show-id")) { that.setShowId(true); } if (cmdline.hasOption("show-hits")) { that.setShowHits(true); } if (cmdline.hasOption("show-score")) { that.setShowScore(true); } if (cmdline.hasOption("sort-fields")) { that.setSortFields(true); } if (cmdline.hasOption("suppress-names")) { that.setSuppressNames(true); } if (cmdline.hasOption("tabular")) { that.setTabular(true); } String[] opts; opts = cmdline.getOptionValues("fields"); if (opts != null) { that.setFieldNames(Lists.newArrayList(opts)); } opt = cmdline.getOptionValue("regex"); if (opt != null) { Pattern p = Pattern.compile("^(.*?):/(.*)/$"); Matcher m = p.matcher(opt); if (m.matches()) { that.setRegex(m.group(1), Pattern.compile(m.group(2))); } else { System.err.println("Invalid regex, should be field:/regex/"); usage(options); System.exit(1); } } opts = cmdline.getOptionValues("query"); that.run(opts); if (out != null) { out.close(); } reader.close(); }
From source file:cc.wikitools.lucene.IndexWikipediaDump.java
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("bz2 Wikipedia XML dump file") .create(INPUT_OPTION));//from w ww .j a v a 2 s .co m options.addOption( OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg() .withDescription("maximum number of documents to index").create(MAX_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of indexing threads") .create(THREADS_OPTION)); options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment")); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(IndexWikipediaDump.class.getCanonicalName(), options); System.exit(-1); } String indexPath = cmdline.getOptionValue(INDEX_OPTION); int maxdocs = cmdline.hasOption(MAX_OPTION) ? Integer.parseInt(cmdline.getOptionValue(MAX_OPTION)) : Integer.MAX_VALUE; int threads = cmdline.hasOption(THREADS_OPTION) ? Integer.parseInt(cmdline.getOptionValue(THREADS_OPTION)) : DEFAULT_NUM_THREADS; long startTime = System.currentTimeMillis(); String path = cmdline.getOptionValue(INPUT_OPTION); PrintStream out = new PrintStream(System.out, true, "UTF-8"); WikiClean cleaner = new WikiCleanBuilder().withTitle(true).build(); Directory dir = FSDirectory.open(new File(indexPath)); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, ANALYZER); config.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, config); LOG.info("Creating index at " + indexPath); LOG.info("Indexing with " + threads + " threads"); try { WikipediaBz2DumpInputStream stream = new WikipediaBz2DumpInputStream(path); ExecutorService executor = Executors.newFixedThreadPool(threads); int cnt = 0; String page; while ((page = stream.readNext()) != null) { String title = cleaner.getTitle(page); // These are heuristic specifically for filtering out non-articles in enwiki-20120104. if (title.startsWith("Wikipedia:") || title.startsWith("Portal:") || title.startsWith("File:")) { continue; } if (page.contains("#REDIRECT") || page.contains("#redirect") || page.contains("#Redirect")) { continue; } Runnable worker = new AddDocumentRunnable(writer, cleaner, page); executor.execute(worker); cnt++; if (cnt % 10000 == 0) { LOG.info(cnt + " articles added"); } if (cnt >= maxdocs) { break; } } executor.shutdown(); // Wait until all threads are finish while (!executor.isTerminated()) { } LOG.info("Total of " + cnt + " articles indexed."); if (cmdline.hasOption(OPTIMIZE_OPTION)) { LOG.info("Merging segments..."); writer.forceMerge(1); LOG.info("Done!"); } LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms"); } catch (Exception e) { e.printStackTrace(); } finally { writer.close(); dir.close(); out.close(); } }
From source file:ca.ualberta.exemplar.core.Exemplar.java
public static void main(String[] rawArgs) throws FileNotFoundException, UnsupportedEncodingException { CommandLineParser cli = new BasicParser(); Options options = new Options(); options.addOption("h", "help", false, "shows this message"); options.addOption("b", "benchmark", true, "expects input to be a benchmark file (type = binary | nary)"); options.addOption("p", "parser", true, "defines which parser to use (parser = stanford | malt)"); CommandLine line = null;/* w w w. j ava 2 s . c o m*/ try { line = cli.parse(options, rawArgs); } catch (ParseException exp) { System.err.println(exp.getMessage()); System.exit(1); } String[] args = line.getArgs(); String parserName = line.getOptionValue("parser", "malt"); if (line.hasOption("help")) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("sh ./exemplar", options); System.exit(0); } if (args.length != 2) { System.out.println("error: exemplar requires an input file and output file."); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("sh ./exemplar <input> <output>", options); System.exit(0); } File input = new File(args[0]); File output = new File(args[1]); String benchmarkType = line.getOptionValue("benchmark", ""); if (!benchmarkType.isEmpty()) { if (benchmarkType.equals("binary")) { BenchmarkBinary evaluation = new BenchmarkBinary(input, output, parserName); evaluation.runAndTime(); System.exit(0); } else { if (benchmarkType.equals("nary")) { BenchmarkNary evaluation = new BenchmarkNary(input, output, parserName); evaluation.runAndTime(); System.exit(0); } else { System.out.println("error: benchmark option has to be either 'binary' or 'nary'."); System.exit(0); } } } Parser parser = null; if (parserName.equals("stanford")) { parser = new ParserStanford(); } else { if (parserName.equals("malt")) { parser = new ParserMalt(); } else { System.out.println(parserName + " is not a valid parser."); System.exit(0); } } System.out.println("Starting EXEMPLAR..."); RelationExtraction exemplar = null; try { exemplar = new RelationExtraction(parser); } catch (FileNotFoundException e) { e.printStackTrace(); } BlockingQueue<String> inputQueue = new ArrayBlockingQueue<String>(QUEUE_SIZE); PlainTextReader reader = null; reader = new PlainTextReader(inputQueue, input); Thread readerThread = new Thread(reader); readerThread.start(); PrintStream statementsOut = null; try { statementsOut = new PrintStream(output, "UTF-8"); } catch (FileNotFoundException e1) { e1.printStackTrace(); System.exit(0); } catch (UnsupportedEncodingException e1) { e1.printStackTrace(); System.exit(0); } statementsOut.println("Subjects\tRelation\tObjects\tNormalized Relation\tSentence"); while (true) { String doc = null; try { doc = inputQueue.take(); } catch (InterruptedException e) { e.printStackTrace(); } if (doc.isEmpty()) { break; } List<RelationInstance> instances = exemplar.extractRelations(doc); for (RelationInstance instance : instances) { // Output SUBJ arguments in a separate field, for clarity boolean first = true; for (Argument arg : instance.getArguments()) { if (arg.argumentType.equals("SUBJ")) { if (first) { first = false; } else { statementsOut.print(",,"); } statementsOut.print(arg.argumentType + ":" + arg.entityId); } } // Output the original relation statementsOut.print("\t" + instance.getOriginalRelation() + "\t"); // Output the DOBJ arguments, followed by POBJ first = true; for (Argument arg : instance.getArguments()) { if (arg.argumentType.equals("DOBJ")) { if (first) { first = false; } else { statementsOut.print(",,"); } statementsOut.print(arg.argumentType + ":" + arg.entityId); } } for (Argument arg : instance.getArguments()) { if (arg.argumentType.startsWith("POBJ")) { if (first) { first = false; } else { statementsOut.print(",,"); } statementsOut.print(arg.argumentType + ":" + arg.entityId); } } statementsOut.print("\t" + instance.getNormalizedRelation()); statementsOut.print("\t" + instance.getSentence()); statementsOut.println(); } } System.out.println("Done!"); statementsOut.close(); }
From source file:edu.msu.cme.rdp.kmer.cli.KmerCoverage.java
/** * This program maps the kmers from reads to kmers on each contig, * writes the mean, median coverage of each contig to a file * writes the kmer abundance to a file//w ww .ja v a 2 s.c o m * @param args * @throws IOException */ public static void main(String[] args) throws IOException, InterruptedException { int kmerSize = 45; final int maxThreads; final int maxTasks = 1000; final PrintStream match_reads_out; try { CommandLine cmdLine = new PosixParser().parse(options, args); args = cmdLine.getArgs(); if (args.length < 5) { throw new Exception("Unexpected number of arguments"); } kmerSize = Integer.parseInt(args[0]); if (kmerSize > Kmer.max_nucl_kmer_size) { throw new Exception("kmerSize should be less than " + Kmer.max_nucl_kmer_size); } if (cmdLine.hasOption("match_reads_out")) { match_reads_out = new PrintStream(cmdLine.getOptionValue("match_reads_out")); } else { match_reads_out = null; } if (cmdLine.hasOption("threads")) { maxThreads = Integer.valueOf(cmdLine.getOptionValue("threads")); if (maxThreads >= Runtime.getRuntime().availableProcessors()) { System.err.println(" Runtime.getRuntime().availableProcessors() " + Runtime.getRuntime().availableProcessors()); } } else { maxThreads = 1; } final KmerCoverage kmerCoverage = new KmerCoverage(kmerSize, new SequenceReader(new File(args[1]))); final AtomicInteger outstandingTasks = new AtomicInteger(); ExecutorService service = Executors.newFixedThreadPool(maxThreads); Sequence seq; // parse one file at a time for (int index = 4; index < args.length; index++) { SequenceReader reader = new SequenceReader(new File(args[index])); while ((seq = reader.readNextSequence()) != null) { if (seq.getSeqString().length() < kmerSize) { continue; } final Sequence threadSeq = seq; Runnable r = new Runnable() { public void run() { try { kmerCoverage.processReads(threadSeq, match_reads_out); outstandingTasks.decrementAndGet(); } catch (Exception e) { e.printStackTrace(); } } }; outstandingTasks.incrementAndGet(); service.submit(r); while (outstandingTasks.get() >= maxTasks) ; } reader.close(); } service.shutdown(); service.awaitTermination(1, TimeUnit.DAYS); kmerCoverage.printCovereage(new FileOutputStream(new File(args[2])), new FileOutputStream(new File(args[3]))); if (match_reads_out != null) { match_reads_out.close(); } } catch (Exception e) { new HelpFormatter().printHelp( "KmerCoverage <kmerSize> <query_file> <coverage_out> <abundance_out> <reads_file> <reads_file>...\nmaximum kmerSize " + Kmer.max_nucl_kmer_size, options); e.printStackTrace(); System.exit(1); } }
From source file:edu.msu.cme.rdp.seqmatch.cli.SeqMatchMain.java
public static void main(String[] args) throws Exception { if (args.length == 0) { System.err.println("USAGE: SeqMatchMain [train|seqmatch] <args>"); return;//from ww w.j a va2 s.co m } String cmd = args[0]; args = Arrays.copyOfRange(args, 1, args.length); if (cmd.equals("train")) { if (args.length != 2) { System.err.println("USAGE: train <reference sequences> <trainee_out_file_prefix>" + "\nMultiple trainee output files might be created, each containing maximum " + Trainee.MAX_NUM_SEQ + " sequences"); return; } File refSeqs = new File(args[0]); File traineeFileOut = new File(args[1]); //maybe more than 1 trainee files need to be created, depending on the number of seqs CreateMultiMatchFromFile.getMultiTrainee(refSeqs, traineeFileOut); } else if (cmd.equals("seqmatch")) { File refFile = null; File queryFile = null; HashMap<String, String> descMap = new HashMap<String, String>(); PrintStream out = new PrintStream(System.out); int knn = 20; float minSab = .5f; try { CommandLine line = new PosixParser().parse(options, args); if (line.hasOption("knn")) { knn = Integer.parseInt(line.getOptionValue("knn")); } if (line.hasOption("sab")) { minSab = Float.parseFloat(line.getOptionValue("sab")); } if (line.hasOption("desc")) { descMap = readDesc(new File(line.getOptionValue("desc"))); } if (line.hasOption("outFile")) { out = new PrintStream(new File(line.getOptionValue("outFile"))); } args = line.getArgs(); if (args.length != 2) { throw new Exception("Unexpected number of command line arguments"); } refFile = new File(args[0]); queryFile = new File(args[1]); } catch (Exception e) { new HelpFormatter().printHelp("seqmatch <refseqs | trainee_file_or_dir> <query_file>\n" + " trainee_file_or_dir is a single trainee file or a directory containing multiple trainee files", options); System.err.println("Error: " + e.getMessage()); return; } SeqMatch seqmatch = null; if (refFile.isDirectory()) { // a directory of trainee files List<SeqMatch> engineList = new ArrayList<SeqMatch>(); for (File f : refFile.listFiles()) { if (!f.isHidden()) { TwowaySeqMatch match = new TwowaySeqMatch(new SeqMatchEngine(new StorageTrainee(f))); engineList.add(match); } } seqmatch = new MultiTraineeSeqMatch(engineList); } else { // a single fasta file or trainee file if (SeqUtils.guessFileFormat(refFile) == SequenceFormat.UNKNOWN) { seqmatch = CLISeqMatchFactory.trainTwowaySeqMatch(new StorageTrainee(refFile)); } else { seqmatch = CreateMultiMatchFromFile.getMultiMatch(refFile); } } out.println("query name\tmatch seq\torientation\tS_ab score\tunique oligomers\tdescription"); SeqReader reader = new SequenceReader(queryFile); Sequence seq; while ((seq = reader.readNextSequence()) != null) { SeqMatchResultSet resultSet = seqmatch.match(seq, knn); for (SeqMatchResult result : resultSet) { char r = '+'; if (result.isReverse()) { r = '-'; } if (result.getScore() > minSab) { out.println(seq.getSeqName() + "\t" + result.getSeqName() + "\t" + r + "\t" + result.getScore() + "\t" + resultSet.getQueryWordCount() + "\t" + descMap.get(result.getSeqName())); } } } out.close(); } else { throw new IllegalArgumentException("USAGE: SeqMatchMain [train|seqmatch] <args>"); } }