List of usage examples for java.io InputStreamReader InputStreamReader
public InputStreamReader(InputStream in, CharsetDecoder dec)
From source file:io.bfscan.clueweb12.BuildWarcTrecIdMapping.java
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("bz2 Wikipedia XML dump file") .create(INPUT_OPTION));/* ww w. ja v a 2 s .co m*/ options.addOption( OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg() .withDescription("maximum number of documents to index").create(MAX_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of indexing threads") .create(THREADS_OPTION)); options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment")); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(BuildWarcTrecIdMapping.class.getCanonicalName(), options); System.exit(-1); } String indexPath = cmdline.getOptionValue(INDEX_OPTION); int maxdocs = cmdline.hasOption(MAX_OPTION) ? Integer.parseInt(cmdline.getOptionValue(MAX_OPTION)) : Integer.MAX_VALUE; int threads = cmdline.hasOption(THREADS_OPTION) ? Integer.parseInt(cmdline.getOptionValue(THREADS_OPTION)) : DEFAULT_NUM_THREADS; long startTime = System.currentTimeMillis(); String path = cmdline.getOptionValue(INPUT_OPTION); PrintStream out = new PrintStream(System.out, true, "UTF-8"); Directory dir = FSDirectory.open(new File(indexPath)); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, ANALYZER); config.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, config); LOG.info("Creating index at " + indexPath); LOG.info("Indexing with " + threads + " threads"); FileInputStream fis = null; BufferedReader br = null; try { fis = new FileInputStream(new File(path)); byte[] ignoreBytes = new byte[2]; fis.read(ignoreBytes); // "B", "Z" bytes from commandline tools br = new BufferedReader(new InputStreamReader(new CBZip2InputStream(fis), "UTF8")); ExecutorService executor = Executors.newFixedThreadPool(threads); int cnt = 0; String s; while ((s = br.readLine()) != null) { Runnable worker = new AddDocumentRunnable(writer, s); executor.execute(worker); cnt++; if (cnt % 1000000 == 0) { LOG.info(cnt + " articles added"); } if (cnt >= maxdocs) { break; } } executor.shutdown(); // Wait until all threads are finish while (!executor.isTerminated()) { } LOG.info("Total of " + cnt + " articles indexed."); if (cmdline.hasOption(OPTIMIZE_OPTION)) { LOG.info("Merging segments..."); writer.forceMerge(1); LOG.info("Done!"); } LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms"); } catch (Exception e) { e.printStackTrace(); } finally { writer.close(); dir.close(); out.close(); br.close(); fis.close(); } }
From source file:io.druid.server.sql.SQLRunner.java
public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption("h", "help", false, "help"); options.addOption("v", false, "verbose"); options.addOption("e", "host", true, "endpoint [hostname:port]"); CommandLine cmd = new GnuParser().parse(options, args); if (cmd.hasOption("h")) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("SQLRunner", options); System.exit(2);/* ww w . ja va 2 s. c om*/ } String hostname = cmd.getOptionValue("e", "localhost:8080"); String sql = cmd.getArgs().length > 0 ? cmd.getArgs()[0] : STATEMENT; ObjectMapper objectMapper = new DefaultObjectMapper(); ObjectWriter jsonWriter = objectMapper.writerWithDefaultPrettyPrinter(); CharStream stream = new ANTLRInputStream(sql); DruidSQLLexer lexer = new DruidSQLLexer(stream); TokenStream tokenStream = new CommonTokenStream(lexer); DruidSQLParser parser = new DruidSQLParser(tokenStream); lexer.removeErrorListeners(); parser.removeErrorListeners(); lexer.addErrorListener(ConsoleErrorListener.INSTANCE); parser.addErrorListener(ConsoleErrorListener.INSTANCE); try { DruidSQLParser.QueryContext queryContext = parser.query(); if (parser.getNumberOfSyntaxErrors() > 0) throw new IllegalStateException(); // parser.setBuildParseTree(true); // System.err.println(q.toStringTree(parser)); } catch (Exception e) { String msg = e.getMessage(); if (msg != null) System.err.println(e); System.exit(1); } final Query query; final TypeReference typeRef; boolean groupBy = false; if (parser.groupByDimensions.isEmpty()) { query = Druids.newTimeseriesQueryBuilder().dataSource(parser.getDataSource()) .aggregators(new ArrayList<AggregatorFactory>(parser.aggregators.values())) .postAggregators(parser.postAggregators).intervals(parser.intervals) .granularity(parser.granularity).filters(parser.filter).build(); typeRef = new TypeReference<List<Result<TimeseriesResultValue>>>() { }; } else { query = GroupByQuery.builder().setDataSource(parser.getDataSource()) .setAggregatorSpecs(new ArrayList<AggregatorFactory>(parser.aggregators.values())) .setPostAggregatorSpecs(parser.postAggregators).setInterval(parser.intervals) .setGranularity(parser.granularity).setDimFilter(parser.filter) .setDimensions(new ArrayList<DimensionSpec>(parser.groupByDimensions.values())).build(); typeRef = new TypeReference<List<Row>>() { }; groupBy = true; } String queryStr = jsonWriter.writeValueAsString(query); if (cmd.hasOption("v")) System.err.println(queryStr); URL url = new URL(String.format("http://%s/druid/v2/?pretty", hostname)); final URLConnection urlConnection = url.openConnection(); urlConnection.addRequestProperty("content-type", MediaType.APPLICATION_JSON); urlConnection.getOutputStream().write(StringUtils.toUtf8(queryStr)); BufferedReader stdInput = new BufferedReader( new InputStreamReader(urlConnection.getInputStream(), Charsets.UTF_8)); Object res = objectMapper.readValue(stdInput, typeRef); Joiner tabJoiner = Joiner.on("\t"); if (groupBy) { List<Row> rows = (List<Row>) res; Iterable<String> dimensions = Iterables.transform(parser.groupByDimensions.values(), new Function<DimensionSpec, String>() { @Override public String apply(@Nullable DimensionSpec input) { return input.getOutputName(); } }); System.out.println( tabJoiner.join(Iterables.concat(Lists.newArrayList("timestamp"), dimensions, parser.fields))); for (final Row r : rows) { System.out.println(tabJoiner.join(Iterables.concat( Lists.newArrayList(parser.granularity.toDateTime(r.getTimestampFromEpoch())), Iterables.transform(parser.groupByDimensions.values(), new Function<DimensionSpec, String>() { @Override public String apply(@Nullable DimensionSpec input) { return Joiner.on(",").join(r.getDimension(input.getOutputName())); } }), Iterables.transform(parser.fields, new Function<String, Object>() { @Override public Object apply(@Nullable String input) { return r.getFloatMetric(input); } })))); } } else { List<Result<TimeseriesResultValue>> rows = (List<Result<TimeseriesResultValue>>) res; System.out.println(tabJoiner.join(Iterables.concat(Lists.newArrayList("timestamp"), parser.fields))); for (final Result<TimeseriesResultValue> r : rows) { System.out.println(tabJoiner.join(Iterables.concat(Lists.newArrayList(r.getTimestamp()), Lists.transform(parser.fields, new Function<String, Object>() { @Override public Object apply(@Nullable String input) { return r.getValue().getMetric(input); } })))); } } CloseQuietly.close(stdInput); }
From source file:com.tamingtext.classifier.mlt.TestMoreLikeThis.java
public static void main(String[] args) throws Exception { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option helpOpt = DefaultOptionCreator.helpOption(); Option inputDirOpt = obuilder.withLongName("input").withRequired(true) .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create()) .withDescription("The input directory").withShortName("i").create(); Option modelOpt = obuilder.withLongName("model").withRequired(true) .withArgument(abuilder.withName("index").withMinimum(1).withMaximum(1).create()) .withDescription("The directory containing the index model").withShortName("m").create(); Option categoryFieldOpt = obuilder.withLongName("categoryField").withRequired(true) .withArgument(abuilder.withName("index").withMinimum(1).withMaximum(1).create()) .withDescription("Name of the field containing category information").withShortName("catf") .create();/*from ww w . j a v a 2s. com*/ Option contentFieldOpt = obuilder.withLongName("contentField").withRequired(true) .withArgument(abuilder.withName("index").withMinimum(1).withMaximum(1).create()) .withDescription("Name of the field containing content information").withShortName("contf") .create(); Option maxResultsOpt = obuilder.withLongName("maxResults").withRequired(false) .withArgument(abuilder.withName("gramSize").withMinimum(1).withMaximum(1).create()) .withDescription("Number of results to retrive, default: 10 ").withShortName("r").create(); Option gramSizeOpt = obuilder.withLongName("gramSize").withRequired(false) .withArgument(abuilder.withName("gramSize").withMinimum(1).withMaximum(1).create()) .withDescription("Size of the n-gram. Default Value: 1 ").withShortName("ng").create(); Option typeOpt = obuilder.withLongName("classifierType").withRequired(false) .withArgument(abuilder.withName("classifierType").withMinimum(1).withMaximum(1).create()) .withDescription("Type of classifier: knn|tfidf. Default: bayes").withShortName("type").create(); Group group = gbuilder.withName("Options").withOption(gramSizeOpt).withOption(helpOpt) .withOption(inputDirOpt).withOption(modelOpt).withOption(typeOpt).withOption(contentFieldOpt) .withOption(categoryFieldOpt).withOption(maxResultsOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); parser.setHelpOption(helpOpt); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } String classifierType = (String) cmdLine.getValue(typeOpt); int gramSize = 1; if (cmdLine.hasOption(gramSizeOpt)) { gramSize = Integer.parseInt((String) cmdLine.getValue(gramSizeOpt)); } int maxResults = 10; if (cmdLine.hasOption(maxResultsOpt)) { maxResults = Integer.parseInt((String) cmdLine.getValue(maxResultsOpt)); } String inputPath = (String) cmdLine.getValue(inputDirOpt); String modelPath = (String) cmdLine.getValue(modelOpt); String categoryField = (String) cmdLine.getValue(categoryFieldOpt); String contentField = (String) cmdLine.getValue(contentFieldOpt); MatchMode mode; if ("knn".equalsIgnoreCase(classifierType)) { mode = MatchMode.KNN; } else if ("tfidf".equalsIgnoreCase(classifierType)) { mode = MatchMode.TFIDF; } else { throw new IllegalArgumentException("Unkown classifierType: " + classifierType); } Directory directory = FSDirectory.open(new File(modelPath)); IndexReader indexReader = IndexReader.open(directory); Analyzer analyzer //<co id="mlt.analyzersetup"/> = new EnglishAnalyzer(Version.LUCENE_36); MoreLikeThisCategorizer categorizer = new MoreLikeThisCategorizer(indexReader, categoryField); categorizer.setAnalyzer(analyzer); categorizer.setMatchMode(mode); categorizer.setFieldNames(new String[] { contentField }); categorizer.setMaxResults(maxResults); categorizer.setNgramSize(gramSize); File f = new File(inputPath); if (!f.isDirectory()) { throw new IllegalArgumentException(f + " is not a directory or does not exit"); } File[] inputFiles = FileUtil.buildFileList(f); String line = null; //<start id="lucene.examples.mlt.test"/> final ClassifierResult UNKNOWN = new ClassifierResult("unknown", 1.0); ResultAnalyzer resultAnalyzer = //<co id="co.mlt.ra"/> new ResultAnalyzer(categorizer.getCategories(), UNKNOWN.getLabel()); for (File ff : inputFiles) { //<co id="co.mlt.read"/> BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(ff), "UTF-8")); while ((line = in.readLine()) != null) { String[] parts = line.split("\t"); if (parts.length != 2) { continue; } CategoryHits[] hits //<co id="co.mlt.cat"/> = categorizer.categorize(new StringReader(parts[1])); ClassifierResult result = hits.length > 0 ? hits[0] : UNKNOWN; resultAnalyzer.addInstance(parts[0], result); //<co id="co.mlt.an"/> } in.close(); } System.out.println(resultAnalyzer.toString());//<co id="co.mlt.print"/> /* <calloutlist> <callout arearefs="co.mlt.ra">Create <classname>ResultAnalyzer</classname></callout> <callout arearefs="co.mlt.read">Read Test data</callout> <callout arearefs="co.mlt.cat">Categorize</callout> <callout arearefs="co.mlt.an">Collect Results</callout> <callout arearefs="co.mlt.print">Display Results</callout> </calloutlist> */ //<end id="lucene.examples.mlt.test"/> } catch (OptionException e) { log.error("Error while parsing options", e); } }
From source file:com.dict.crawl.NewsNationalGeographicCrawler.java
public static void main(String[] args) throws Exception { /*string,crawlPath??crawlPath, ????crawlPath/* w w w .jav a2 s .c om*/ */ NewsNationalGeographicCrawler crawler = new NewsNationalGeographicCrawler("data/NewsNationalGeographic"); crawler.setThreads(2); crawler.addSeed("http://ngm.nationalgeographic.com/"); if (BaseCrawler.isNormalTime()) { crawler.addSeed("http://ngm.nationalgeographic.com/archives"); crawler.addSeed("http://ngm.nationalgeographic.com/featurehub"); // //} String jsonUrl = "http://news.nationalgeographic.com/bin/services/news/public/query/content.json?pageSize=20&page=0&contentTypes=news/components/pagetypes/article,news/components/pagetypes/simple-article,news/components/pagetypes/photo-gallery"; URL urls = new URL(jsonUrl); HttpURLConnection urlConnection = (HttpURLConnection) urls.openConnection(); InputStream is = urlConnection.getInputStream(); Reader rd = new InputStreamReader(is, "utf-8"); JsonArray json = new JsonParser().parse(rd).getAsJsonArray(); for (JsonElement jOb : json) { String url = jOb.getAsJsonObject().get("page").getAsJsonObject().get("url").getAsString(); if (url != null && !url.equals("")) crawler.addSeed(url); } } // crawler.addSeed("http://news.nationalgeographic.com/2016/01/160118-mummies-world-bog-egypt-science/"); // List<Map<String, Object>> urls = crawler.jdbcTemplate.queryForList("SELECT id,title,url FROM parser_page where host like '%news.national%' ORDER by id desc;"); // for(int i = 0; i < urls.size(); i++) { // String url = (String) urls.get(i).get("url"); // String title = (String) urls.get(i).get("title"); //// int id = (int) urls.get(i).get("id"); // crawler.addSeed(url); // } // Config Config.WAIT_THREAD_END_TIME = 1000 * 60 * 5;//???kill // Config.TIMEOUT_CONNECT = 1000*10; // Config.TIMEOUT_READ = 1000*30; Config.requestMaxInterval = 1000 * 60 * 20;//??-?>hung //requester??http??requester?http/socks? HttpRequesterImpl requester = (HttpRequesterImpl) crawler.getHttpRequester(); AntiAntiSpiderHelper.defaultUserAgent(requester); // requester.setUserAgent("Mozilla/5.0 (X11; Linux i686; rv:33.0) Gecko/20100101 Firefox/33.0"); // requester.setCookie("CNZZDATA1950488=cnzz_eid%3D739324831-1432460954-null%26ntime%3D1432460954; wdcid=44349d3f2aa96e51; vjuids=-53d395da8.14eca7eed44.0.f17be67e; CNZZDATA3473518=cnzz_eid%3D1882396923-1437965756-%26ntime%3D1440635510; pt_37a49e8b=uid=FuI4KYEfVz5xq7L4nzPd1w&nid=1&vid=r4AhSBmxisCiyeolr3V2Ow&vn=1&pvn=1&sact=1440639037916&to_flag=0&pl=t4NrgYqSK5M357L2nGEQCw*pt*1440639015734; _ga=GA1.3.1121158748.1437970841; __auc=c00a6ac114d85945f01d9c30128; CNZZDATA1975683=cnzz_eid%3D250014133-1432460541-null%26ntime%3D1440733997; CNZZDATA1254041250=2000695407-1442220871-%7C1442306691; pt_7f0a67e8=uid=6lmgYeZ3/jSObRMeK-t27A&nid=0&vid=lEKvEtZyZdd0UC264UyZnQ&vn=2&pvn=1&sact=1442306703728&to_flag=0&pl=7GB3sYS/PJDo1mY0qeu2cA*pt*1442306703728; 7NSx_98ef_saltkey=P05gN8zn; 7NSx_98ef_lastvisit=1444281282; IframeBodyHeight=256; NTVq_98ef_saltkey=j5PydYru; NTVq_98ef_lastvisit=1444282735; NTVq_98ef_atarget=1; NTVq_98ef_lastact=1444286377%09api.php%09js; 7NSx_98ef_sid=hZyDwc; __utmt=1; __utma=155578217.1121158748.1437970841.1443159326.1444285109.23; __utmb=155578217.57.10.1444285109; __utmc=155578217; __utmz=155578217.1439345650.3.2.utmcsr=travel.chinadaily.com.cn|utmccn=(referral)|utmcmd=referral|utmcct=/; CNZZDATA3089622=cnzz_eid%3D1722311508-1437912344-%26ntime%3D1444286009; wdlast=1444287704; vjlast=1437916393.1444285111.11; 7NSx_98ef_lastact=1444287477%09api.php%09chinadaily; pt_s_3bfec6ad=vt=1444287704638&cad=; pt_3bfec6ad=uid=bo87MAT/HC3hy12HDkBg1A&nid=0&vid=erwHQyFKxvwHXYc4-r6n-w&vn=28&pvn=2&sact=1444287708079&to_flag=0&pl=kkgvLoEHXsCD2gs4VJaWQg*pt*1444287704638; pt_t_3bfec6ad=?id=3bfec6ad.bo87MAT/HC3hy12HDkBg1A.erwHQyFKxvwHXYc4-r6n-w.kkgvLoEHXsCD2gs4VJaWQg.nZJ9Aj/bgfNDIKBXI5TwRQ&stat=167.132.1050.1076.1body%20div%3Aeq%288%29%20ul%3Aeq%280%29%20a%3Aeq%282%29.0.0.1595.3441.146.118&ptif=4"); //?? Mozilla/5.0 (X11; Linux i686; rv:34.0) Gecko/20100101 Firefox/34.0 //c requester.setProxy(" /* //?? RandomProxyGenerator proxyGenerator=new RandomProxyGenerator(); proxyGenerator.addProxy("127.0.0.1",8080,Proxy.Type.SOCKS); requester.setProxyGenerator(proxyGenerator); */ /*??*/ // crawler.setResumable(true); crawler.setResumable(false); crawler.start(2); }
From source file:jackrabbit.app.App.java
public static void main(String[] args) { if (args.length == 0 || args.length == 1 && args[0].equals("-h")) { System.out.println("Usage: java -jar ackrabbit-migration-query-tool-" + VERSION + "-jar-with-dependencies.jar " + "--src src --src-conf conf [--src-repo-path path] [--src-user src_user] [--src-passwd src_pw] " + "[--dest-user dest_user] [--dest-passwd dest_pw] [--dest dest] [--dest-conf conf] [--dest-repo-path path] " + "[--cnd cnd] [--node-limit limit] " + "[--query-type type] [--query query]"); System.out.println("\t --src source repository directory"); System.out.println("\t --src-conf source repository configuration file"); System.out.println("\t --src-repo-path path to source node to copy from; default is \"/\""); System.out.println("\t --src-user source repository login"); System.out.println("\t --src-passwd source repository password"); System.out.println("\t --dest destination repository directory"); System.out.println("\t --dest-conf destination repository configuration file"); System.out.println("\t --dest-repo-path path to destination node to copy to; default is \"/\""); System.out.println("\t --dest-user destination repository login"); System.out.println("\t --dest-passwd destination repository password"); System.out.println(/*from www . jav a 2s . c om*/ "\t --node-limit size to partition nodes with before copying. If it is not supplied, no partitioning is performed"); System.out.println("\t --cnd common node type definition file"); System.out.println( "\t --query query to run in src. If --query is specified, then --dest, --dest-conf, --dest-repo-path and --cnd will be ignored."); System.out.println("\t --query-type query type (sql, xpath, JCR-SQL2); default is JCR-SQL2"); return; } for (int i = 0; i < args.length; i = i + 2) { if (args[i].equals("--src") && i + 1 < args.length) { srcRepoDir = args[i + 1]; } else if (args[i].equals("--dest") && i + 1 < args.length) { destRepoDir = args[i + 1]; } else if (args[i].equals("--src-conf") && i + 1 < args.length) { srcConf = args[i + 1]; } else if (args[i].equals("--dest-conf") && i + 1 < args.length) { destConf = args[i + 1]; } else if (args[i].equals("--src-repo-path") && i + 1 < args.length) { srcRepoPath = args[i + 1]; } else if (args[i].equals("--dest-repo-path") && i + 1 < args.length) { destRepoPath = args[i + 1]; } else if (args[i].equals("--src-user") && i + 1 < args.length) { srcUser = args[i + 1]; } else if (args[i].equals("--src-passwd") && i + 1 < args.length) { srcPasswd = args[i + 1]; } else if (args[i].equals("--dest-user") && i + 1 < args.length) { destUser = args[i + 1]; } else if (args[i].equals("--dest-passwd") && i + 1 < args.length) { destPasswd = args[i + 1]; } else if (args[i].equals("--node-limit") && i + 1 < args.length) { nodeLimit = Long.parseLong(args[i + 1]); } else if (args[i].equals("--cnd") && i + 1 < args.length) { cndPath = args[i + 1]; } else if (args[i].equals("--query") && i + 1 < args.length) { query = args[i + 1]; } else if (args[i].equals("--query-type") && i + 1 < args.length) { queryType = args[i + 1]; } } boolean missingArgs = false; if (srcRepoDir.isEmpty()) { missingArgs = true; log.error("Please specify the --src option."); } if (destRepoDir.isEmpty() && !destConf.isEmpty()) { missingArgs = true; log.error("Please specify the --dest option."); } if (srcConf.isEmpty()) { missingArgs = true; log.error("Please specify the --src-conf option."); } if (destConf.isEmpty() && !destRepoDir.isEmpty()) { missingArgs = true; log.error("Please specify the --dest-conf option."); } if (missingArgs) return; SimpleCredentials credentials = new SimpleCredentials(srcUser, srcPasswd.toCharArray()); SimpleCredentials destCredentials = new SimpleCredentials(destUser, destPasswd.toCharArray()); JackrabbitRepository dest = null; RepositoryFactory destRf = null; RepositoryFactory srcRf = new RepositoryFactoryImpl(srcConf, srcRepoDir); if (!destConf.isEmpty()) { destRf = new RepositoryFactoryImpl(destConf, destRepoDir); } try { final JackrabbitRepository src = srcRf.getRepository(); SessionFactory srcSf = new SessionFactoryImpl(src, credentials); final Session srcSession = srcSf.getSession(); Runtime.getRuntime().addShutdownHook(new Thread() { public void run() { srcSession.logout(); src.shutdown(); } }); if (destConf.isEmpty()) {//query mode BufferedReader in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); if (query.isEmpty()) { while (true) { System.out.print(">"); String line = in.readLine(); if (line == null || line.isEmpty() || line.equalsIgnoreCase("quit") || line.equalsIgnoreCase("exit")) { break; } try { runQuery(srcSession, line, queryType); } catch (RepositoryException e) { log.error(e.getMessage(), e); } } } else { try { runQuery(srcSession, query, queryType); } catch (RepositoryException e) { log.error(e.getMessage(), e); } } return; } dest = destRf.getRepository(); SessionFactory destSf = new SessionFactoryImpl(dest, destCredentials); Session destSession = destSf.getSession(); try { RepositoryManager.registerCustomNodeTypes(destSession, cndPath); if (nodeLimit == 0) NodeCopier.copy(srcSession, destSession, srcRepoPath, destRepoPath, true); else NodeCopier.copy(srcSession, destSession, srcRepoPath, destRepoPath, nodeLimit, true); log.info("Copying " + srcSession.getWorkspace().getName() + " to " + destSession.getWorkspace().getName() + " for " + srcRepoDir + " done."); } catch (ParseException e) { log.error(e.getMessage(), e); } catch (IOException e) { log.error(e.getMessage(), e); } catch (PathNotFoundException e) { log.error(e.getMessage(), e); } catch (RepositoryException e) { log.error(e.getMessage(), e); } List<String> destWkspaces = RepositoryManager.getDestinationWorkspaces(srcSession, destSession); for (String workspace : destWkspaces) { Session wsSession = srcSf.getSession(workspace); Session wsDestSession = destSf.getSession(workspace); try { RepositoryManager.registerCustomNodeTypes(wsDestSession, cndPath); if (nodeLimit == 0) NodeCopier.copy(wsSession, wsDestSession, srcRepoPath, destRepoPath, true); else { NodeCopier.copy(wsSession, wsDestSession, srcRepoPath, destRepoPath, nodeLimit, true); } log.info("Copying " + wsSession.getWorkspace().getName() + " to " + wsDestSession.getWorkspace().getName() + " for " + srcRepoDir + " done."); } catch (IOException e) { log.error(e.getMessage(), e); } catch (ParseException e) { log.error(e.getMessage(), e); } catch (PathNotFoundException e) { log.error(e.getMessage(), e); } catch (RepositoryException e) { log.error(e.getMessage(), e); } finally { wsSession.logout(); wsDestSession.logout(); } } } catch (IOException e) { log.error(e.getMessage(), e); } catch (PathNotFoundException e) { log.error(e.getMessage(), e); } catch (RepositoryException e) { log.error(e.getMessage(), e); } finally { if (dest != null) dest.shutdown(); } }
From source file:com.test.demo.ccbpay.XLSX2CSV.java
public static void main(String[] args) throws Exception { File f = new File( "C:\\Users\\dmall\\Desktop\\???\\1493142812All2018-05-15_2018-05-20.csv"); InputStreamReader reader = new InputStreamReader(new FileInputStream(f), "GBK"); CSVReader csvReader = new CSVReader(reader); String[] csvRow = null; // row long csvDataSize = 0; long pay0515 = 0L; long re0515 = 0L; long cost0515 = 0L; while ((csvRow = csvReader.readNext()) != null) { if (csvRow.length != 24) { continue; }//from w w w . j av a 2s. c om csvDataSize += 1; if (csvDataSize > 1) { if (csvRow[0].contains("2018-05-17")) { pay0515 += BigDecimal.valueOf(Double.valueOf(csvRow[12].substring(1, csvRow[12].length()))) .multiply(new BigDecimal(100)).longValue(); re0515 += BigDecimal.valueOf(Double.valueOf(csvRow[16].substring(1, csvRow[16].length()))) .multiply(new BigDecimal(100)).longValue(); cost0515 += BigDecimal.valueOf(Double.valueOf(csvRow[22].substring(1, csvRow[22].length()))) .multiply(new BigDecimal(100)).longValue(); } } } System.out.println(pay0515); System.out.println(re0515); System.out.println(cost0515); System.out.println(pay0515 - re0515 - cost0515); }
From source file:net.java.sen.tools.MkSenDic.java
/** * Build sen dictionary./*from ww w . j av a 2 s . c om*/ * * @param args * custom dictionary files. see dic/build.xml. */ public static void main(String args[]) { ResourceBundle rb = ResourceBundle.getBundle("dictionary"); DictionaryMaker dm1 = new DictionaryMaker(); DictionaryMaker dm2 = new DictionaryMaker(); DictionaryMaker dm3 = new DictionaryMaker(); // 1st field information of connect file. Vector rule1 = new Vector(); // 2nd field information of connect file. Vector rule2 = new Vector(); // 3rd field information of connect file. Vector rule3 = new Vector(); // 4th field information of connect file. // this field shows cost of morpheme connection // [size3*(x3*size2+x2)+x1] // [size3*(Attr1*size2+Attr2)+Attl] short score[] = new short[20131]; long start = System.currentTimeMillis(); // ///////////////////////////////////////// // // Step1. Loading connetion file. // log.info("(1/7): reading connection matrix ... "); try { log.info("connection file = " + rb.getString("text_connection_file")); log.info("charset = " + rb.getString("dic.charset")); CSVParser csvparser = new CSVParser(new FileInputStream(rb.getString("text_connection_file")), rb.getString("dic.charset")); String t[]; int line = 0; while ((t = csvparser.nextTokens()) != null) { if (t.length < 4) { log.warn("invalid line in " + rb.getString("text_connection_file") + ":" + line); log.warn(rb.getString("text_connection_file") + "may be broken."); break; } dm1.add(t[0]); rule1.add(t[0]); dm2.add(t[1]); rule2.add(t[1]); dm3.add(t[2]); rule3.add(t[2]); if (line == score.length) { score = resize(score); } score[line++] = (short) Integer.parseInt(t[3]); } // ///////////////////////////////////////// // // Step2. Building internal dictionary // log.info("(2/7): building type dictionary ... "); dm1.build(); dm2.build(); dm3.build(); // if you want check specified morpheme, you uncomment and modify // following line: /* * System.out.print("22="); dm3.getById(22); * System.out.print("368="); dm3.getById(368); * * System.out.println(dm3.getDicId("?????*,*,*,*,?")); * DictionaryMaker.debug = true; * System.out.println(dm3.getDicId("?????*,*,*,*,?")); * System.out.println(dm3.getDicIdNoCache("?????*,*,*,*,?")); */ } catch (IOException e) { e.printStackTrace(); System.exit(0); } // ------------------------------------------------- int size1 = dm1.size(); int size2 = dm2.size(); int size3 = dm3.size(); int ruleSize = rule1.size(); short matrix[] = new short[size1 * size2 * size3]; short default_cost = (short) Integer.parseInt(rb.getString("default_connection_cost")); // ///////////////////////////////////////// // // Step3. Writing Connection Matrix // log.info("(3/7): writing conection matrix (" + size1 + " x " + size2 + " x " + size3 + " = " + size1 * size2 * size3 + ") ..."); for (int i = 0; i < (int) (size1 * size2 * size3); i++) matrix[i] = default_cost; for (int i = 0; i < ruleSize; i++) { Vector r1 = dm1.getRuleIdList((String) rule1.get(i)); Vector r2 = dm2.getRuleIdList((String) rule2.get(i)); Vector r3 = dm3.getRuleIdList((String) rule3.get(i)); for (Iterator i1 = r1.iterator(); i1.hasNext();) { int ii1 = ((Integer) i1.next()).intValue(); for (Iterator i2 = r2.iterator(); i2.hasNext();) { int ii2 = ((Integer) i2.next()).intValue(); for (Iterator i3 = r3.iterator(); i3.hasNext();) { int ii3 = ((Integer) i3.next()).intValue(); int pos = size3 * (size2 * ii1 + ii2) + ii3; matrix[pos] = score[i]; } } } } try { DataOutputStream out = new DataOutputStream( new BufferedOutputStream(new FileOutputStream(rb.getString("matrix_file")))); out.writeShort(size1); out.writeShort(size2); out.writeShort(size3); for (int i1 = 0; i1 < size1; i1++) for (int i2 = 0; i2 < size2; i2++) for (int i3 = 0; i3 < size3; i3++) { out.writeShort(matrix[size3 * (size2 * i1 + i2) + i3]); // if (matrix[size3 * (size2 * i1 + i2) + i3] != // default_cost) { // } } out.close(); } catch (IOException e) { e.printStackTrace(); System.exit(0); } matrix = null; score = null; // ------------------------------------------------- int pos_start = Integer.parseInt(rb.getString("pos_start")); int pos_size = Integer.parseInt(rb.getString("pos_size")); int di = 0; int offset = 0; ArrayList dicList = new ArrayList(); // ///////////////////////////////////////// // // Step4. Reading Morpheme Information // log.info("(4/7): reading morpheme information ... "); String t = null; String[] csv = null; try { // writer for feature file. BufferedWriter bw = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(rb.getString("pos_file")), rb.getString("sen.charset"))); log.info("load dic: " + rb.getString("text_dic_file")); BufferedReader dicStream = null; int custom_dic = -1; if (args.length == 0) { dicStream = new BufferedReader(new InputStreamReader( new FileInputStream(rb.getString("text_dic_file")), rb.getString("dic.charset"))); } else { custom_dic = 0; dicStream = new BufferedReader( new InputStreamReader(new FileInputStream(args[custom_dic]), rb.getString("dic.charset"))); } int line = 0; CSVData key_b = new CSVData(); CSVData pos_b = new CSVData(); while (true) { t = dicStream.readLine(); if (t == null) { dicStream.close(); custom_dic++; if (args.length == custom_dic) { break; } else { // read custum dictionary log.info("load dic: " + "args[custum_dic]"); dicStream = new BufferedReader(new InputStreamReader(new FileInputStream(args[custom_dic]), rb.getString("dic.charset"))); } continue; } CSVParser parser = new CSVParser(t); csv = parser.nextTokens(); if (csv.length < (pos_size + pos_start)) { throw new RuntimeException("format error:" + t); } key_b.clear(); pos_b.clear(); for (int i = pos_start; i < (pos_start + pos_size - 1); i++) { key_b.append(csv[i]); pos_b.append(csv[i]); } key_b.append(csv[pos_start + pos_size - 1]); pos_b.append(csv[pos_start + pos_size - 1]); for (int i = pos_start + pos_size; i < (csv.length - 1); i++) { pos_b.append(csv[i]); } pos_b.append(csv[csv.length - 1]); CToken token = new CToken(); token.rcAttr2 = (short) dm1.getDicId(key_b.toString()); token.rcAttr1 = (short) dm2.getDicId(key_b.toString()); token.lcAttr = (short) dm3.getDicId(key_b.toString()); token.posid = 0; token.posID = offset; token.length = (short) csv[0].length(); token.cost = (short) Integer.parseInt(csv[1]); dicList.add(new PairObject(csv[0], token)); byte b[] = pos_b.toString().getBytes(rb.getString("sen.charset")); offset += (b.length + 1); String pos_b_str = pos_b.toString(); bw.write(pos_b_str, 0, pos_b_str.length()); // bw.write(b, 0, b.length); bw.write(0); if (++di % 50000 == 0) log.info("" + di + "... "); } bw.close(); // ----end of writing feature.cha ---- } catch (Exception e) { log.error("Error: " + t); e.printStackTrace(); System.exit(1); } rule1 = null; rule2 = null; rule3 = null; // ///////////////////////////////////////// // // Step5. Sort lexs and write to file // log.info("(5/7): sorting lex... "); int value[] = new int[dicList.size()]; char key[][] = new char[dicList.size()][]; int spos = 0; int dsize = 0; int bsize = 0; String prev = ""; Collections.sort(dicList); // ///////////////////////////////////////// // // Step6. Writing Token Information // log.info("(6/7): writing token... "); try { // writer for token file. DataOutputStream out = new DataOutputStream( new BufferedOutputStream(new FileOutputStream(rb.getString("token_file")))); // writing 'bos' and 'eos' and 'unknown' token. CToken token = new CToken(); token.rcAttr2 = (short) dm1.getDicId(rb.getString("bos_pos")); token.rcAttr1 = (short) dm2.getDicId(rb.getString("bos_pos")); token.lcAttr = (short) dm3.getDicId(rb.getString("bos_pos")); token.write(out); token.rcAttr2 = (short) dm1.getDicId(rb.getString("eos_pos")); token.rcAttr1 = (short) dm2.getDicId(rb.getString("eos_pos")); token.lcAttr = (short) dm3.getDicId(rb.getString("eos_pos")); token.write(out); token.rcAttr2 = (short) dm1.getDicId(rb.getString("unknown_pos")); token.rcAttr1 = (short) dm2.getDicId(rb.getString("unknown_pos")); token.lcAttr = (short) dm3.getDicId(rb.getString("unknown_pos")); token.posID = -1; token.write(out); log.info("key size = " + key.length); for (int i = 0; i < key.length; i++) { String k = (String) ((PairObject) dicList.get(i)).key; if (!prev.equals(k) && i != 0) { key[dsize] = ((String) ((PairObject) dicList.get(spos)).key).toCharArray(); value[dsize] = bsize + (spos << 8); dsize++; bsize = 1; spos = i; } else { bsize++; } prev = (String) ((PairObject) dicList.get(i)).key; ((CToken) (((PairObject) dicList.get(i)).value)).write(out); } out.flush(); out.close(); } catch (Exception e) { e.printStackTrace(); System.exit(1); } key[dsize] = ((String) ((PairObject) dicList.get(spos)).key).toCharArray(); value[dsize] = bsize + (spos << 8); dsize++; dm1 = null; dm2 = null; dm3 = null; dicList = null; // ///////////////////////////////////////// // // Step7. Build Double Array // log.info("(7/7): building Double-Array (size = " + dsize + ") ..."); DoubleArrayTrie da = new DoubleArrayTrie(); da.build(key, null, value, dsize); try { da.save(rb.getString("double_array_file")); } catch (Exception e) { e.printStackTrace(); } log.info("total time = " + (System.currentTimeMillis() - start) / 1000 + "[ms]"); }
From source file:it.unimi.di.big.mg4j.tool.URLMPHVirtualDocumentResolver.java
public static void main(final String[] arg) throws JSAPException, IOException { final SimpleJSAP jsap = new SimpleJSAP(URLMPHVirtualDocumentResolver.class.getName(), "Builds a URL document resolver from a sequence of URIs, extracted typically using ScanMetadata, using a suitable function. You can specify that the list is sorted, in which case it is possible to generate a resolver that occupies less space.", new Parameter[] { new Switch("sorted", 's', "sorted", "URIs are sorted: use a monotone minimal perfect hash function."), new Switch("iso", 'i', "iso", "Use ISO-8859-1 coding internally (i.e., just use the lower eight bits of each character)."), new FlaggedOption("bufferSize", JSAP.INTSIZE_PARSER, "64Ki", JSAP.NOT_REQUIRED, 'b', "buffer-size", "The size of the I/O buffer used to read terms."), new FlaggedOption("class", MG4JClassParser.getParser(), JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'c', "class", "A class used to create the function from URIs to their ranks; defaults to it.unimi.dsi.sux4j.mph.MHWCFunction for non-sorted inputs, and to it.unimi.dsi.sux4j.mph.TwoStepsLcpMonotoneMinimalPerfectHashFunction for sorted inputs."), new FlaggedOption("width", JSAP.INTEGER_PARSER, Integer.toString(Long.SIZE), JSAP.NOT_REQUIRED, 'w', "width", "The width, in bits, of the signatures used to sign the function from URIs to their rank."), new FlaggedOption("termFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'o', "offline", "Read terms from this file (without loading them into core memory) instead of standard input."), new FlaggedOption("uniqueUris", JSAP.INTSIZE_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'U', "unique-uris", "Force URIs to be unique by adding random garbage at the end of duplicates; the argument is an upper bound for the number of URIs that will be read, and will be used to create a Bloom filter."), new UnflaggedOption("resolver", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the resolver.") }); JSAPResult jsapResult = jsap.parse(arg); if (jsap.messagePrinted()) return;/*from www . j a v a 2s .co m*/ final int bufferSize = jsapResult.getInt("bufferSize"); final String resolverName = jsapResult.getString("resolver"); //final Class<?> tableClass = jsapResult.getClass( "class" ); final boolean iso = jsapResult.getBoolean("iso"); String termFile = jsapResult.getString("termFile"); BloomFilter<Void> filter = null; final boolean uniqueURIs = jsapResult.userSpecified("uniqueUris"); if (uniqueURIs) filter = BloomFilter.create(jsapResult.getInt("uniqueUris")); final Collection<? extends CharSequence> collection; if (termFile == null) { ArrayList<MutableString> termList = new ArrayList<MutableString>(); final ProgressLogger pl = new ProgressLogger(); pl.itemsName = "URIs"; final LineIterator termIterator = new LineIterator( new FastBufferedReader(new InputStreamReader(System.in, "UTF-8"), bufferSize), pl); pl.start("Reading URIs..."); MutableString uri; while (termIterator.hasNext()) { uri = termIterator.next(); if (uniqueURIs) makeUnique(filter, uri); termList.add(uri.copy()); } pl.done(); collection = termList; } else { if (uniqueURIs) { // Create temporary file with unique URIs final ProgressLogger pl = new ProgressLogger(); pl.itemsName = "URIs"; pl.start("Copying URIs..."); final LineIterator termIterator = new LineIterator( new FastBufferedReader(new InputStreamReader(new FileInputStream(termFile)), bufferSize), pl); File temp = File.createTempFile(URLMPHVirtualDocumentResolver.class.getName(), ".uniqueuris"); temp.deleteOnExit(); termFile = temp.toString(); final FastBufferedOutputStream outputStream = new FastBufferedOutputStream( new FileOutputStream(termFile), bufferSize); MutableString uri; while (termIterator.hasNext()) { uri = termIterator.next(); makeUnique(filter, uri); uri.writeUTF8(outputStream); outputStream.write('\n'); } pl.done(); outputStream.close(); } collection = new FileLinesCollection(termFile, "UTF-8"); } LOGGER.debug("Building function..."); final int width = jsapResult.getInt("width"); if (jsapResult.getBoolean("sorted")) BinIO.storeObject( new URLMPHVirtualDocumentResolver( new ShiftAddXorSignedStringMap(collection.iterator(), new TwoStepsLcpMonotoneMinimalPerfectHashFunction<CharSequence>(collection, iso ? TransformationStrategies.prefixFreeIso() : TransformationStrategies.prefixFreeUtf16()), width)), resolverName); else BinIO.storeObject( new URLMPHVirtualDocumentResolver(new ShiftAddXorSignedStringMap(collection.iterator(), new MWHCFunction<CharSequence>(collection, iso ? TransformationStrategies.iso() : TransformationStrategies.utf16()), width)), resolverName); LOGGER.debug(" done."); }
From source file:net.sf.extjwnl.cli.ewn.java
public static void main(String[] args) throws IOException, JWNLException { if (args.length < 1) { System.out.println(USAGE); System.exit(0);/*from w ww . j a v a 2 s. c om*/ } //find dictionary Dictionary d = null; File config = new File(defaultConfig); if (!config.exists()) { if (System.getenv().containsKey("WNHOME")) { String wnHomePath = System.getenv().get("WNHOME"); File wnHome = new File(wnHomePath); if (wnHome.exists()) { d = Dictionary.getFileBackedInstance(wnHomePath); } else { log.error("Cannot find dictionary. Make sure " + defaultConfig + " is available or WNHOME variable is set."); } } } else { d = Dictionary.getInstance(new FileInputStream(config)); } if (null != d) { //parse and execute command line if ((-1 < args[0].indexOf('%') && -1 < args[0].indexOf(':')) || "-script".equals(args[0]) || (-1 < args[0].indexOf('#'))) { d.edit(); //edit if ("-script".equals(args[0])) { if (args.length < 2) { log.error("Filename missing for -script command"); System.exit(1); } else { File script = new File(args[1]); if (script.exists()) { //load into args ArrayList<String> newArgs = new ArrayList<String>(); BufferedReader in = new BufferedReader( new InputStreamReader(new FileInputStream(script), "UTF-8")); try { String str; while ((str = in.readLine()) != null) { String[] bits = str.split(" "); StringBuilder tempArg = null; for (String bit : bits) { int quoteCnt = 0; for (int j = 0; j < bit.length(); j++) { if ('"' == bit.charAt(j)) { quoteCnt++; } } if (null != tempArg) { if (0 == quoteCnt) { tempArg.append(" ").append(bit); } else { tempArg.append(" ").append(bit.replaceAll("\"\"", "\"")); if (1 == (quoteCnt % 2)) { newArgs.add( tempArg.toString().substring(1, tempArg.length() - 1)); tempArg = null; } } } else { if (0 == quoteCnt) { newArgs.add(bit); } else { if (1 == (quoteCnt % 2)) { tempArg = new StringBuilder(bit.replaceAll("\"\"", "\"")); } else { newArgs.add(bit.replaceAll("\"\"", "\"")); } } } } if (null != tempArg) { newArgs.add(tempArg.toString()); } } } finally { try { in.close(); } catch (IOException e) { //nop } } args = newArgs.toArray(args); } } } Word workWord = null; String key = null; String lemma = null; int lexFileNum = -1; int lexId = -1; // String headLemma = null; // int headLexId = -1; POS pos = null; String derivation = null; for (int i = 0; i < args.length; i++) { if (null == key && '-' != args[i].charAt(0) && ((-1 < args[i].indexOf('%') && -1 < args[i].indexOf(':')))) { key = args[i]; log.info("Searching " + key + "..."); if (null != key) { workWord = d.getWordBySenseKey(key); } if (null == workWord) { //parse sensekey lemma = key.substring(0, key.indexOf('%')).replace('_', ' '); String posId = key.substring(key.indexOf('%') + 1, key.indexOf(':')); if ("1".equals(posId) || "2".equals(posId) || "3".equals(posId) || "4".equals(posId) || "5".equals(posId)) { pos = POS.getPOSForId(Integer.parseInt(posId)); String lexFileString = key.substring(key.indexOf(':') + 1); if (-1 < lexFileString.indexOf(':')) { lexFileNum = Integer .parseInt(lexFileString.substring(0, lexFileString.indexOf(':'))); if (lexFileString.indexOf(':') + 1 < lexFileString.length()) { String lexIdString = lexFileString .substring(lexFileString.indexOf(':') + 1); if (-1 < lexIdString.indexOf(':')) { lexId = Integer .parseInt(lexIdString.substring(0, lexIdString.indexOf(':'))); // if (lexIdString.indexOf(':') + 1 < lexIdString.length()) { // headLemma = lexIdString.substring(lexIdString.indexOf(':') + 1); // if (-1 < headLemma.indexOf(':')) { // headLemma = headLemma.substring(0, headLemma.indexOf(':')); // if (null != headLemma && !"".equals(headLemma) && lexIdString.lastIndexOf(':') + 1 < lexIdString.length()) { // headLexId = Integer.parseInt(lexIdString.substring(lexIdString.lastIndexOf(':') + 1)); // } // } else { // log.error("Malformed sensekey " + key); // System.exit(1); // } // } } else { log.error("Malformed sensekey " + key); System.exit(1); } } else { log.error("Malformed sensekey " + key); System.exit(1); } } else { log.error("Malformed sensekey " + key); System.exit(1); } } else { log.error("Malformed sensekey " + key); System.exit(1); } } } else if (-1 < args[i].indexOf('#')) { if (2 < args[i].length()) { derivation = args[i].substring(2); if (null == derivation) { log.error("Missing derivation"); System.exit(1); } else { pos = POS.getPOSForKey(args[i].substring(0, 1)); if (null == pos) { log.error("POS " + args[i] + " is not recognized for derivation " + derivation); System.exit(1); } } } } if ("-add".equals(args[i])) { if (null == key) { log.error("Missing sensekey"); System.exit(1); } if (null != workWord) { log.error("Duplicate sensekey " + workWord.getSenseKey()); System.exit(1); } log.info("Creating " + pos.getLabel() + " synset..."); Synset tempSynset = d.createSynset(pos); log.info("Creating word " + lemma + "..."); workWord = new Word(d, tempSynset, 1, lemma); workWord.setLexId(lexId); tempSynset.getWords().add(workWord); tempSynset.setLexFileNum(lexFileNum); key = null; } if ("-remove".equals(args[i])) { if (null == workWord) { log.error("Missing sensekey"); System.exit(1); } else { d.removeSynset(workWord.getSynset()); workWord = null; key = null; } } if ("-addword".equals(args[i])) { if (null == workWord) { log.error("Missing sensekey"); System.exit(1); } else { i++; if (i < args.length && '-' != args[i].charAt(0)) { Word tempWord = new Word(d, workWord.getSynset(), workWord.getSynset().getWords().size() + 1, args[i]); workWord.getSynset().getWords().add(tempWord); key = null; } else { log.error( "Missing word for addword command for sensekey " + workWord.getSenseKey()); System.exit(1); } } } if ("-removeword".equals(args[i])) { if (null == workWord) { log.error("Missing sensekey"); System.exit(1); } else { workWord.getSynset().getWords().remove(workWord); key = null; } } if ("-setgloss".equals(args[i])) { if (null == workWord) { log.error("Missing sensekey"); System.exit(1); } else { i++; if (i < args.length && '-' != args[i].charAt(0)) { workWord.getSynset().setGloss(args[i]); key = null; } else { log.error("Missing gloss for setgloss command for sensekey " + workWord.getSenseKey()); System.exit(1); } } } if ("-setadjclus".equals(args[i])) { if (null == workWord) { log.error("Missing sensekey"); System.exit(1); } else { i++; if (i < args.length && '-' != args[i].charAt(0)) { workWord.getSynset().setIsAdjectiveCluster(Boolean.parseBoolean(args[i])); key = null; } else { log.error("Missing flag for setadjclus command for sensekey " + workWord.getSenseKey()); System.exit(1); } } } if ("-setverbframe".equals(args[i])) { if (null == workWord) { log.error("Missing sensekey"); System.exit(1); } else { i++; if (i < args.length) { if (workWord instanceof Verb) { Verb verb = (Verb) workWord; if ('-' == args[i].charAt(0)) { verb.getVerbFrameFlags().clear(Integer.parseInt(args[i].substring(1))); } else { verb.getVerbFrameFlags().set(Integer.parseInt(args[i])); } } else { log.error("Word at " + workWord.getSenseKey() + " should be verb"); System.exit(1); } key = null; } else { log.error("Missing index for setverbframe command for sensekey " + workWord.getSenseKey()); System.exit(1); } } } if ("-setverbframeall".equals(args[i])) { if (null == workWord) { log.error("Missing sensekey"); System.exit(1); } else { i++; if (i < args.length) { if (workWord.getSynset() instanceof VerbSynset) { if ('-' == args[i].charAt(0)) { workWord.getSynset().getVerbFrameFlags() .clear(Integer.parseInt(args[i].substring(1))); } else { workWord.getSynset().getVerbFrameFlags().set(Integer.parseInt(args[i])); } } else { log.error("Synset at " + workWord.getSenseKey() + " should be verb"); System.exit(1); } key = null; } else { log.error("Missing index for setverbframeall command for sensekey " + workWord.getSenseKey()); System.exit(1); } } } if ("-setlexfile".equals(args[i])) { if (null == workWord) { log.error("Missing sensekey"); System.exit(1); } else { i++; if (i < args.length && '-' != args[i].charAt(0)) { if (-1 < args[i].indexOf('.')) { workWord.getSynset() .setLexFileNum(LexFileNameLexFileIdMap.getMap().get(args[i])); } else { workWord.getSynset().setLexFileNum(Integer.parseInt(args[i])); } } else { log.error("Missing file number or name for setlexfile command for sensekey " + workWord.getSenseKey()); System.exit(1); } } } if ("-addptr".equals(args[i])) { if (null == workWord) { log.error("Missing sensekey"); System.exit(1); } else { i++; if (i < args.length) { Word targetWord = d.getWordBySenseKey(args[i]); if (null != targetWord) { i++; if (i < args.length) { PointerType pt = PointerType.getPointerTypeForKey(args[i]); if (null != pt) { Pointer p; if (pt.isLexical()) { p = new Pointer(pt, workWord, targetWord); } else { p = new Pointer(pt, workWord.getSynset(), targetWord.getSynset()); } if (!workWord.getSynset().getPointers().contains(p)) { workWord.getSynset().getPointers().add(p); } else { log.error("Duplicate pointer of type " + pt + " to " + targetWord.getSenseKey() + " in addptr command for sensekey " + workWord.getSenseKey()); System.exit(1); } } else { log.error("Invalid pointer type at " + args[i] + " in addptr command for sensekey " + workWord.getSenseKey()); System.exit(1); } } else { log.error("Missing pointer type at " + args[i] + " in addptr command for sensekey " + workWord.getSenseKey()); System.exit(1); } } else { log.error("Missing target at " + args[i] + " in addptr command for sensekey " + workWord.getSenseKey()); System.exit(1); } key = null; } else { log.error("Missing sensekey for addptr command for sensekey " + workWord.getSenseKey()); System.exit(1); } } } if ("-removeptr".equals(args[i])) { if (null == workWord) { log.error("Missing sensekey"); System.exit(1); } else { i++; if (i < args.length) { Word targetWord = d.getWordBySenseKey(args[i]); if (null != targetWord) { i++; if (i < args.length) { PointerType pt = PointerType.getPointerTypeForKey(args[i]); if (null != pt) { Pointer p; if (pt.isLexical()) { p = new Pointer(pt, workWord, targetWord); } else { p = new Pointer(pt, workWord.getSynset(), targetWord.getSynset()); } if (workWord.getSynset().getPointers().contains(p)) { workWord.getSynset().getPointers().remove(p); } else { log.error("Missing pointer of type " + pt + " to " + targetWord.getSenseKey() + " in removeptr command for sensekey " + workWord.getSenseKey()); System.exit(1); } } else { log.error("Invalid pointer type at " + args[i] + " in removeptr command for sensekey " + workWord.getSenseKey()); System.exit(1); } } else { log.error("Missing pointer type at " + args[i] + " in removeptr command for sensekey " + workWord.getSenseKey()); System.exit(1); } } else { log.error("Missing target at " + args[i] + " in removeptr command for sensekey " + workWord.getSenseKey()); System.exit(1); } key = null; } else { log.error("Missing sensekey for removeptr command for sensekey " + workWord.getSenseKey()); System.exit(1); } } } if ("-setlexid".equals(args[i])) { if (null == workWord) { log.error("Missing sensekey"); System.exit(1); } else { i++; if (i < args.length && '-' != args[i].charAt(0)) { workWord.setLexId(Integer.parseInt(args[i])); key = null; } else { log.error("Missing lexid for setlexid command for sensekey " + workWord.getSenseKey()); System.exit(1); } } } if ("-setusecount".equals(args[i])) { if (null == workWord) { log.error("Missing sensekey"); System.exit(1); } else { i++; if (i < args.length && '-' != args[i].charAt(0)) { workWord.setUseCount(Integer.parseInt(args[i])); key = null; } else { log.error("Missing count for setusecount command for sensekey " + workWord.getSenseKey()); System.exit(1); } } } if ("-addexc".equals(args[i])) { i++; if (i < args.length && '-' != args[i].charAt(0)) { String baseform = args[i]; Exc e = d.getException(pos, derivation); if (null != e) { if (null != e.getExceptions()) { if (!e.getExceptions().contains(baseform)) { e.getExceptions().add(baseform); } } } else { ArrayList<String> list = new ArrayList<String>(1); list.add(baseform); d.createException(pos, derivation, list); } derivation = null; } else { log.error("Missing baseform for addexc command for derivation " + derivation); System.exit(1); } } if ("-removeexc".equals(args[i])) { Exc e = d.getException(pos, derivation); if (null != e) { i++; if (i < args.length && '-' != args[i].charAt(0)) { String baseform = args[i]; if (null != e.getExceptions()) { if (e.getExceptions().contains(baseform)) { e.getExceptions().remove(baseform); } if (0 == e.getExceptions().size()) { d.removeException(e); } } } else { d.removeException(e); } } else { log.error("Missing derivation " + derivation); System.exit(1); } derivation = null; } } d.save(); } else { //browse String key = args[0]; if (1 == args.length) { for (POS pos : POS.getAllPOS()) { IndexWord iw = d.getIndexWord(pos, key); if (null == iw) { System.out.println("\nNo information available for " + pos.getLabel() + " " + key); } else { System.out.println( "\nInformation available for " + iw.getPOS().getLabel() + " " + iw.getLemma()); printAvailableInfo(iw); } if (null != d.getMorphologicalProcessor()) { List<String> forms = d.getMorphologicalProcessor().lookupAllBaseForms(pos, key); if (null != forms) { for (String form : forms) { if (!key.equals(form)) { iw = d.getIndexWord(pos, form); if (null != iw) { System.out.println("\nInformation available for " + iw.getPOS().getLabel() + " " + iw.getLemma()); printAvailableInfo(iw); } } } } } } } else { boolean needHelp = false; boolean needGloss = false; boolean needLex = false; boolean needOffset = false; boolean needSenseNum = false; boolean needSenseKeys = false; int needSense = 0; for (String arg : args) { if ("-h".equals(arg)) { needHelp = true; } if ("-g".equals(arg)) { needGloss = true; } if ("-a".equals(arg)) { needLex = true; } if ("-o".equals(arg)) { needOffset = true; } if ("-s".equals(arg)) { needSenseNum = true; } if ("-k".equals(arg)) { needSenseKeys = true; } if (arg.startsWith("-n") && 2 < arg.length()) { needSense = Integer.parseInt(arg.substring(2)); } } for (String arg : args) { if (arg.startsWith("-ants") && 6 == arg.length()) { if (needHelp) { System.out.println( "Display synsets containing direct antonyms of the search string.\n" + "\n" + "Direct antonyms are a pair of words between which there is an\n" + "associative bond built up by co-occurrences.\n" + "\n" + "Antonym synsets are preceded by \"=>\"."); } POS p = POS.getPOSForKey(arg.substring(5)); IndexWord iw = d.lookupIndexWord(p, key); if (null != iw) { System.out.println("\nAntonyms of " + p.getLabel() + " " + iw.getLemma()); tracePointers(iw, PointerType.ANTONYM, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); } } //ants if (arg.startsWith("-hype") && 6 == arg.length()) { if (needHelp) { System.out.println( "Recursively display hypernym (superordinate) tree for the search\n" + "string.\n" + "\n" + "Hypernym is the generic term used to designate a whole class of\n" + "specific instances. Y is a hypernym of X if X is a (kind of) Y.\n" + "\n" + "Hypernym synsets are preceded by \"=>\", and are indented from\n" + "the left according to their level in the hierarchy."); } POS p = POS.getPOSForKey(arg.substring(5)); IndexWord iw = d.lookupIndexWord(p, key); if (null != iw) { System.out.println("\nHypernyms of " + p.getLabel() + " " + iw.getLemma()); tracePointers(iw, PointerType.HYPERNYM, PointerUtils.INFINITY, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); } } //hype if (arg.startsWith("-hypo") && 6 == arg.length()) { if (needHelp) { System.out.println( "Display immediate hyponyms (subordinates) for the search string.\n" + "\n" + "Hyponym is the generic term used to designate a member of a class.\n" + "X is a hyponym of Y if X is a (kind of) Y.\n" + "\n" + "Hyponym synsets are preceded by \"=>\"."); } POS p = POS.getPOSForKey(arg.substring(5)); IndexWord iw = d.lookupIndexWord(p, key); if (null != iw) { System.out.println("\nHyponyms of " + p.getLabel() + " " + iw.getLemma()); tracePointers(iw, PointerType.HYPONYM, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); } } //hypo if (arg.startsWith("-tree") && 6 == arg.length()) { if (needHelp) { System.out.println( "Display hyponym (subordinate) tree for the search string. This is\n" + "a recursive search that finds the hyponyms of each hyponym. \n" + "\n" + "Hyponym is the generic term used to designate a member of a class.\n" + "X is a hyponym of Y if X is a (kind of) Y. \n" + "\n" + "Hyponym synsets are preceded by \"=>\", and are indented from the left\n" + "according to their level in the hierarchy."); } POS p = POS.getPOSForKey(arg.substring(5)); IndexWord iw = d.lookupIndexWord(p, key); if (null != iw) { System.out.println("\nHyponyms of " + p.getLabel() + " " + iw.getLemma()); tracePointers(iw, PointerType.HYPONYM, PointerUtils.INFINITY, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); } } //tree if (arg.startsWith("-enta") && 6 == arg.length()) { if (needHelp) { System.out.println( "Recursively display entailment relations of the search string.\n" + "\n" + "The action represented by the verb X entails Y if X cannot be done\n" + "unless Y is, or has been, done.\n" + "\n" + "Entailment synsets are preceded by \"=>\", and are indented from the left\n" + "according to their level in the hierarchy."); } POS p = POS.getPOSForKey(arg.substring(5)); IndexWord iw = d.lookupIndexWord(p, key); if (null != iw) { System.out.println("\nEntailment of " + p.getLabel() + " " + iw.getLemma()); tracePointers(iw, PointerType.ENTAILMENT, PointerUtils.INFINITY, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); } } //enta if (arg.startsWith("-syns") && 6 == arg.length()) { POS p = POS.getPOSForKey(arg.substring(5)); IndexWord iw = d.lookupIndexWord(p, key); if (null != iw) { System.out.println("\nSynonyms of " + p.getLabel() + " " + iw.getLemma()); if (POS.ADJECTIVE == p) { if (needHelp) { System.out.println( "Display synonyms and synsets related to synsets containing\n" + "the search string. If the search string is in a head synset\n" + "the 'cluster's' satellite synsets are displayed. If the search\n" + "string is in a satellite synset, its head synset is displayed.\n" + "If the search string is a pertainym the word or synset that it\n" + "pertains to is displayed.\n" + "\n" + "A cluster is a group of adjective synsets that are organized around\n" + "antonymous pairs or triplets. An adjective cluster contains two or more\n" + "head synsets that contan antonyms. Each head synset has one or more\n" + "satellite synsets.\n" + "\n" + "A head synset contains at least one word that has a direct antonym\n" + "in another head synset of the same cluster.\n" + "\n" + "A satellite synset represents a concept that is similar in meaning to\n" + "the concept represented by its head synset.\n" + "\n" + "Direct antonyms are a pair of words between which there is an\n" + "associative bond built up by co-occurrences.\n" + "\n" + "Direct antonyms are printed in parentheses following the adjective.\n" + "The position of an adjective in relation to the noun may be restricted\n" + "to the prenominal, postnominal or predicative position. Where present\n" + "these restrictions are noted in parentheses.\n" + "\n" + "A pertainym is a relational adjective, usually defined by such phrases\n" + "as \"of or pertaining to\" and that does not have an antonym. It pertains\n" + "to a noun or another pertainym.\n" + "\n" + "Senses contained in head synsets are displayed above the satellites,\n" + "which are indented and preceded by \"=>\". Senses contained in\n" + "satellite synsets are displayed with the head synset below. The head\n" + "synset is preceded by \"=>\".\n" + "\n" + "Pertainym senses display the word or synsets that the search string\n" + "pertains to."); } tracePointers(iw, PointerType.SIMILAR_TO, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); tracePointers(iw, PointerType.PARTICIPLE_OF, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); } if (POS.ADVERB == p) { if (needHelp) { System.out.println( "Display synonyms and synsets related to synsets containing\n" + "the search string. If the search string is a pertainym the word\n" + "or synset that it pertains to is displayed.\n" + "\n" + "A pertainym is a relational adverb that is derived from an adjective.\n" + "\n" + "Pertainym senses display the word that the search string is derived from\n" + "and the adjective synset that contains the word. If the adjective synset\n" + "is a satellite synset, its head synset is also displayed."); } tracePointers(iw, PointerType.PERTAINYM, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); } if (POS.NOUN == p || POS.VERB == p) { if (needHelp) { System.out.println( "Recursively display hypernym (superordinate) tree for the search\n" + "string.\n" + "\n" + "Hypernym is the generic term used to designate a whole class of\n" + "specific instances. Y is a hypernym of X if X is a (kind of) Y.\n" + "\n" + "Hypernym synsets are preceded by \"=>\", and are indented from\n" + "the left according to their level in the hierarchy."); } tracePointers(iw, PointerType.HYPERNYM, PointerUtils.INFINITY, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); } } } //syns if (arg.startsWith("-smem") && 6 == arg.length()) { if (needHelp) { System.out.println("Display all holonyms of the search string.\n" + "\n" + "A holonym is the name of the whole of which the 'meronym' names a part.\n" + "Y is a holonym of X if X is a part of Y.\n" + "\n" + "A meronym is the name of a constituent part, the substance of, or a\n" + "member of something. X is a meronym of Y if X is a part of Y."); } POS p = POS.getPOSForKey(arg.substring(5)); IndexWord iw = d.lookupIndexWord(p, key); if (null != iw) { System.out.println("\nMember Holonyms of " + p.getLabel() + " " + iw.getLemma()); tracePointers(iw, PointerType.MEMBER_HOLONYM, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); } } //smem if (arg.startsWith("-ssub") && 6 == arg.length()) { if (needHelp) { System.out.println("Display all holonyms of the search string.\n" + "\n" + "A holonym is the name of the whole of which the 'meronym' names a part.\n" + "Y is a holonym of X if X is a part of Y.\n" + "\n" + "A meronym is the name of a constituent part, the substance of, or a\n" + "member of something. X is a meronym of Y if X is a part of Y."); } POS p = POS.getPOSForKey(arg.substring(5)); IndexWord iw = d.lookupIndexWord(p, key); if (null != iw) { System.out.println("\nSubstance Holonyms of " + p.getLabel() + " " + iw.getLemma()); tracePointers(iw, PointerType.SUBSTANCE_HOLONYM, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); } } //ssub if (arg.startsWith("-sprt") && 6 == arg.length()) { if (needHelp) { System.out.println("Display all holonyms of the search string.\n" + "\n" + "A holonym is the name of the whole of which the 'meronym' names a part.\n" + "Y is a holonym of X if X is a part of Y.\n" + "\n" + "A meronym is the name of a constituent part, the substance of, or a\n" + "member of something. X is a meronym of Y if X is a part of Y."); } POS p = POS.getPOSForKey(arg.substring(5)); IndexWord iw = d.lookupIndexWord(p, key); if (null != iw) { System.out.println("\nPart Holonyms of " + p.getLabel() + " " + iw.getLemma()); tracePointers(iw, PointerType.PART_HOLONYM, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); } } //sprt if (arg.startsWith("-memb") && 6 == arg.length()) { if (needHelp) { System.out.println("Display all meronyms of the search string. \n" + "\n" + "A meronym is the name of a constituent part, the substance of, or a\n" + "member of something. X is a meronym of Y if X is a part of Y.\n" + "\n" + "A holonym is the name of the whole of which the meronym names a part.\n" + "Y is a holonym of X if X is a part of Y."); } POS p = POS.getPOSForKey(arg.substring(5)); IndexWord iw = d.lookupIndexWord(p, key); if (null != iw) { System.out.println("\nMember Meronyms of " + p.getLabel() + " " + iw.getLemma()); tracePointers(iw, PointerType.MEMBER_MERONYM, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); } } //memb if (arg.startsWith("-subs") && 6 == arg.length()) { if (needHelp) { System.out.println("Display all meronyms of the search string. \n" + "\n" + "A meronym is the name of a constituent part, the substance of, or a\n" + "member of something. X is a meronym of Y if X is a part of Y.\n" + "\n" + "A holonym is the name of the whole of which the meronym names a part.\n" + "Y is a holonym of X if X is a part of Y."); } POS p = POS.getPOSForKey(arg.substring(5)); IndexWord iw = d.lookupIndexWord(p, key); if (null != iw) { System.out.println("\nSubstance Meronyms of " + p.getLabel() + " " + iw.getLemma()); tracePointers(iw, PointerType.SUBSTANCE_MERONYM, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); } } //subs if (arg.startsWith("-part") && 6 == arg.length()) { if (needHelp) { System.out.println("Display all meronyms of the search string. \n" + "\n" + "A meronym is the name of a constituent part, the substance of, or a\n" + "member of something. X is a meronym of Y if X is a part of Y.\n" + "\n" + "A holonym is the name of the whole of which the meronym names a part.\n" + "Y is a holonym of X if X is a part of Y."); } POS p = POS.getPOSForKey(arg.substring(5)); IndexWord iw = d.lookupIndexWord(p, key); if (null != iw) { System.out.println("\nPart Meronyms of " + p.getLabel() + " " + iw.getLemma()); tracePointers(iw, PointerType.PART_MERONYM, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); } } //part if (arg.startsWith("-mero") && 6 == arg.length()) { if (needHelp) { System.out.println("Display all meronyms of the search string. \n" + "\n" + "A meronym is the name of a constituent part, the substance of, or a\n" + "member of something. X is a meronym of Y if X is a part of Y.\n" + "\n" + "A holonym is the name of the whole of which the meronym names a part.\n" + "Y is a holonym of X if X is a part of Y."); } POS p = POS.getPOSForKey(arg.substring(5)); IndexWord iw = d.lookupIndexWord(p, key); if (null != iw) { System.out.println("\nMeronyms of " + p.getLabel() + " " + iw.getLemma()); tracePointers(iw, PointerType.MEMBER_MERONYM, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); tracePointers(iw, PointerType.SUBSTANCE_MERONYM, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); tracePointers(iw, PointerType.PART_MERONYM, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); } } //mero if (arg.startsWith("-holo") && 6 == arg.length()) { if (needHelp) { System.out.println("Display all holonyms of the search string.\n" + "\n" + "A holonym is the name of the whole of which the 'meronym' names a part.\n" + "Y is a holonym of X if X is a part of Y.\n" + "\n" + "A meronym is the name of a constituent part, the substance of, or a\n" + "member of something. X is a meronym of Y if X is a part of Y."); } POS p = POS.getPOSForKey(arg.substring(5)); IndexWord iw = d.lookupIndexWord(p, key); if (null != iw) { System.out.println("\nHolonyms of " + p.getLabel() + " " + iw.getLemma()); tracePointers(iw, PointerType.MEMBER_HOLONYM, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); tracePointers(iw, PointerType.SUBSTANCE_HOLONYM, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); tracePointers(iw, PointerType.PART_HOLONYM, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); } } //holo if (arg.startsWith("-caus") && 6 == arg.length()) { if (needHelp) { System.out.println("Recursively display CAUSE TO relations of the search string.\n" + "\n" + "The action represented by the verb X causes the action represented by\n" + "the verb Y.\n" + "\n" + "CAUSE TO synsets are preceded by \"=>\", and are indented from the left\n" + "according to their level in the hierarchy."); } POS p = POS.getPOSForKey(arg.substring(5)); IndexWord iw = d.lookupIndexWord(p, key); if (null != iw) { System.out.println("\n'Cause to' of " + p.getLabel() + " " + iw.getLemma()); tracePointers(iw, PointerType.CAUSE, PointerUtils.INFINITY, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); } } //caus if (arg.startsWith("-pert") && 6 == arg.length()) { POS p = POS.getPOSForKey(arg.substring(5)); IndexWord iw = d.lookupIndexWord(p, key); if (null != iw) { System.out.println("\nPertainyms of " + p.getLabel() + " " + iw.getLemma()); tracePointers(iw, PointerType.PERTAINYM, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); } } //pert if (arg.startsWith("-attr") && 6 == arg.length()) { POS p = POS.getPOSForKey(arg.substring(5)); if (needHelp) { if (POS.NOUN == p) { System.out .println("Display adjectives for which search string is an attribute."); } if (POS.ADJECTIVE == p) { System.out.println("Display nouns that are attributes of search string."); } } IndexWord iw = d.lookupIndexWord(p, key); if (null != iw) { System.out.println("\nAttributes of " + p.getLabel() + " " + iw.getLemma()); tracePointers(iw, PointerType.ATTRIBUTE, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); } } //attr if (arg.startsWith("-deri") && 6 == arg.length()) { if (needHelp) { System.out.println( "Display derived forms - nouns and verbs that are related morphologically.\n" + "Each related synset is preceeded by its part of speech. Each word in the\n" + "synset is followed by its sense number."); } POS p = POS.getPOSForKey(arg.substring(5)); IndexWord iw = d.lookupIndexWord(p, key); if (null != iw) { System.out.println("\nDerived forms of " + p.getLabel() + " " + iw.getLemma()); tracePointers(iw, PointerType.NOMINALIZATION, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); } } //deri if (arg.startsWith("-domn") && 6 == arg.length()) { if (needHelp) { System.out.println("Display domain to which this synset belongs."); } POS p = POS.getPOSForKey(arg.substring(5)); IndexWord iw = d.lookupIndexWord(p, key); if (null != iw) { System.out.println("\nDomain of " + p.getLabel() + " " + iw.getLemma()); tracePointers(iw, PointerType.CATEGORY, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); tracePointers(iw, PointerType.USAGE, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); tracePointers(iw, PointerType.REGION, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); } } //domn if (arg.startsWith("-domt") && 6 == arg.length()) { if (needHelp) { System.out.println("Display all synsets belonging to the domain."); } POS p = POS.getPOSForKey(arg.substring(5)); IndexWord iw = d.lookupIndexWord(p, key); if (null != iw) { System.out.println("\nDomain of " + p.getLabel() + " " + iw.getLemma()); tracePointers(iw, PointerType.CATEGORY_MEMBER, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); tracePointers(iw, PointerType.USAGE_MEMBER, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); tracePointers(iw, PointerType.REGION_MEMBER, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); } } //domt if (arg.startsWith("-faml") && 6 == arg.length()) { if (needHelp) { System.out.println( "Display familiarity and polysemy information for the search string.\n" + "The polysemy count is the number of senses in WordNet."); } POS p = POS.getPOSForKey(arg.substring(5)); IndexWord iw = d.lookupIndexWord(p, key); if (null != iw) { String[] freqs = { "extremely rare", "very rare", "rare", "uncommon", "common", "familiar", "very familiar", "extremely familiar" }; String[] pos = { "a noun", "a verb", "an adjective", "an adverb" }; int cnt = iw.getSenses().size(); int familiar = 0; if (cnt == 0) { familiar = 0; } if (cnt == 1) { familiar = 1; } if (cnt == 2) { familiar = 2; } if (cnt >= 3 && cnt <= 4) { familiar = 3; } if (cnt >= 5 && cnt <= 8) { familiar = 4; } if (cnt >= 9 && cnt <= 16) { familiar = 5; } if (cnt >= 17 && cnt <= 32) { familiar = 6; } if (cnt > 32) { familiar = 7; } System.out.println("\n" + iw.getLemma() + " used as " + pos[p.getId() - 1] + " is " + freqs[familiar] + " (polysemy count = " + cnt + ")"); } } //faml if (arg.startsWith("-fram") && 6 == arg.length()) { if (needHelp) { System.out.println( "Display applicable verb sentence frames for the search string.\n" + "\n" + "A frame is a sentence template illustrating the usage of a verb.\n" + "\n" + "Verb sentence frames are preceded with the string \"*>\" if a sentence\n" + "frame is acceptable for all of the words in the synset, and with \"=>\"\n" + "if a sentence frame is acceptable for the search string only."); } POS p = POS.getPOSForKey(arg.substring(5)); IndexWord iw = d.lookupIndexWord(p, key); if (null != iw) { System.out.println("\nVerb frames of " + p.getLabel() + " " + iw.getLemma()); for (int i = 0; i < iw.getSenses().size(); i++) { Synset synset = iw.getSenses().get(i); for (String vf : synset.getVerbFrames()) { System.out.println("\t*> " + vf); } for (Word word : synset.getWords()) { if (iw.getLemma().equalsIgnoreCase(word.getLemma())) { if (word instanceof Verb) { Verb verb = (Verb) word; for (String vf : verb.getVerbFrames()) { System.out.println("\t=> " + vf); } } } } } } } //fram if (arg.startsWith("-hmer") && 6 == arg.length()) { if (needHelp) { System.out.println( "Display meronyms for search string tree. This is a recursive search\n" + "the prints all the meronyms of the search string and all of its\n" + "hypernyms. \n" + "\n" + "A meronym is the name of a constituent part, the substance of, or a\n" + "member of something. X is a meronym of Y if X is a part of Y."); } POS p = POS.getPOSForKey(arg.substring(5)); IndexWord iw = d.lookupIndexWord(p, key); if (null != iw) { System.out.println("\nMeronyms of " + p.getLabel() + " " + iw.getLemma()); tracePointers(iw, PointerType.MEMBER_MERONYM, PointerUtils.INFINITY, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); tracePointers(iw, PointerType.SUBSTANCE_MERONYM, PointerUtils.INFINITY, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); tracePointers(iw, PointerType.PART_MERONYM, PointerUtils.INFINITY, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); } } //hmer if (arg.startsWith("-hhol") && 6 == arg.length()) { if (needHelp) { System.out.println( "\"Display holonyms for search string tree. This is a recursive search\n" + "that prints all the holonyms of the search string and all of the\n" + "holonym's holonyms.\n" + "\n" + "A holonym is the name of the whole of which the meronym names a part.\n" + "Y is a holonym of X if X is a part of Y."); } POS p = POS.getPOSForKey(arg.substring(5)); IndexWord iw = d.lookupIndexWord(p, key); if (null != iw) { System.out.println("\nHolonyms of " + p.getLabel() + " " + iw.getLemma()); tracePointers(iw, PointerType.MEMBER_HOLONYM, PointerUtils.INFINITY, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); tracePointers(iw, PointerType.SUBSTANCE_HOLONYM, PointerUtils.INFINITY, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); tracePointers(iw, PointerType.PART_HOLONYM, PointerUtils.INFINITY, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); } } //hhol if (arg.startsWith("-mero") && 6 == arg.length()) { if (needHelp) { System.out.println("Display all meronyms of the search string. \n" + "\n" + "A meronym is the name of a constituent part, the substance of, or a\n" + "member of something. X is a meronym of Y if X is a part of Y.\n" + "\n" + "A holonym is the name of the whole of which the meronym names a part.\n" + "Y is a holonym of X if X is a part of Y."); } POS p = POS.getPOSForKey(arg.substring(5)); IndexWord iw = d.lookupIndexWord(p, key); if (null != iw) { System.out.println("\nMeronyms of " + p.getLabel() + " " + iw.getLemma()); tracePointers(iw, PointerType.MEMBER_MERONYM, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); tracePointers(iw, PointerType.SUBSTANCE_MERONYM, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); tracePointers(iw, PointerType.PART_MERONYM, 1, needSense, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); } } //mero if (arg.startsWith("-grep") && 6 == arg.length()) { if (needHelp) { System.out .println("Print all strings in the database containing the search string\n" + "as an individual word, or as the first or last string in a word or\n" + "collocation."); } POS p = POS.getPOSForKey(arg.substring(5)); System.out.println("\nGrep of " + p.getLabel() + " " + key); Iterator<IndexWord> ii = d.getIndexWordIterator(p, key); while (ii.hasNext()) { System.out.println(ii.next().getLemma()); } } //grep if ("-over".equals(arg)) { for (POS pos : POS.getAllPOS()) { if (null != d.getMorphologicalProcessor()) { IndexWord iw = d.getIndexWord(pos, key); //for plurals like species, glasses if (null != iw && key.equals(iw.getLemma())) { printOverview(pos, iw, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); } List<String> forms = d.getMorphologicalProcessor().lookupAllBaseForms(pos, key); if (null != forms) { for (String form : forms) { if (!form.equals(key)) { iw = d.getIndexWord(pos, form); if (null != iw) { printOverview(pos, iw, needGloss, needLex, needOffset, needSenseNum, needSenseKeys); } } } } } } } //over } } } } }
From source file:Main.java
public static String readFromFile(File file) { try (Reader in = new InputStreamReader(new FileInputStream(file), "UTF-8")) { char[] arr = new char[(int) file.length()]; int len = in.read(arr); return new String(arr, 0, len); } catch (IOException ex) { throw new RuntimeException("Unable to read file " + file.getName(), ex); }//from w ww . j a v a2 s. c o m }