List of usage examples for java.nio.charset StandardCharsets UTF_8
Charset UTF_8
To view the source code for java.nio.charset StandardCharsets UTF_8.
Click Source Link
From source file:com.adobe.aem.demo.Analytics.java
public static void main(String[] args) { String hostname = null;/*ww w . j a va2 s .c o m*/ String url = null; String eventfile = null; // Command line options for this tool Options options = new Options(); options.addOption("h", true, "Hostname"); options.addOption("u", true, "Url"); options.addOption("f", true, "Event data file"); CommandLineParser parser = new BasicParser(); try { CommandLine cmd = parser.parse(options, args); if (cmd.hasOption("u")) { url = cmd.getOptionValue("u"); } if (cmd.hasOption("f")) { eventfile = cmd.getOptionValue("f"); } if (cmd.hasOption("h")) { hostname = cmd.getOptionValue("h"); } if (eventfile == null || hostname == null || url == null) { System.out.println("Command line parameters: -h hostname -u url -f path_to_XML_file"); System.exit(-1); } } catch (ParseException ex) { logger.error(ex.getMessage()); } URLConnection urlConn = null; DataOutputStream printout = null; BufferedReader input = null; String u = "http://" + hostname + "/" + url; String tmp = null; try { URL myurl = new URL(u); urlConn = myurl.openConnection(); urlConn.setDoInput(true); urlConn.setDoOutput(true); urlConn.setUseCaches(false); urlConn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); printout = new DataOutputStream(urlConn.getOutputStream()); String xml = readFile(eventfile, StandardCharsets.UTF_8); printout.writeBytes(xml); printout.flush(); printout.close(); input = new BufferedReader(new InputStreamReader(urlConn.getInputStream())); logger.debug(xml); while (null != ((tmp = input.readLine()))) { logger.debug(tmp); } printout.close(); input.close(); } catch (Exception ex) { logger.error(ex.getMessage()); } }
From source file:io.mindmaps.migration.csv.Main.java
public static void main(String[] args) { String csvFileName = null;/*from w w w . ja v a 2 s .c o m*/ String csvEntityType = null; String engineURL = null; String graphName = null; for (int i = 0; i < args.length; i++) { if ("-file".equals(args[i])) csvFileName = args[++i]; else if ("-graph".equals(args[i])) graphName = args[++i]; else if ("-engine".equals(args[i])) engineURL = args[++i]; else if ("-as".equals(args[i])) { csvEntityType = args[++i]; } else if ("csv".equals(args[0])) { continue; } else die("Unknown option " + args[i]); } if (csvFileName == null) { die("Please specify CSV file using the -csv option"); } File csvFile = new File(csvFileName); if (!csvFile.exists()) { die("Cannot find file: " + csvFileName); } if (graphName == null) { die("Please provide the name of the graph using -graph"); } if (csvEntityType == null) { csvEntityType = csvFile.getName().replaceAll("[^A-Za-z0-9]", "_"); } System.out.println("Migrating " + csvFileName + " using MM Engine " + (engineURL == null ? "local" : engineURL) + " into graph " + graphName); // perform migration CSVSchemaMigrator schemaMigrator = new CSVSchemaMigrator(); CSVDataMigrator dataMigrator = new CSVDataMigrator(); // try { MindmapsGraph graph = engineURL == null ? MindmapsClient.getGraph(graphName) : MindmapsClient.getGraph(graphName, engineURL); Loader loader = engineURL == null ? new BlockingLoader(graphName) : new DistributedLoader(graphName, Lists.newArrayList(engineURL)); CSVParser csvParser = CSVParser.parse(csvFile.toURI().toURL(), StandardCharsets.UTF_8, CSVFormat.DEFAULT.withHeader()); schemaMigrator.graph(graph).configure(csvEntityType, csvParser).migrate(loader); System.out.println("Schema migration successful"); dataMigrator.graph(graph).configure(csvEntityType, csvParser).migrate(loader); System.out.println("DataType migration successful"); } catch (Throwable throwable) { throwable.printStackTrace(System.err); } System.exit(0); }
From source file:Main.java
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\tjava SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);/*from w w w. ja va2s . c o m*/ } String index = "index"; String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); // :Post-Release-Update-Version.LUCENE_XY: Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_0); BufferedReader in = null; if (queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), StandardCharsets.UTF_8)); } else { in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); } // :Post-Release-Update-Version.LUCENE_XY: QueryParser parser = new QueryParser(Version.LUCENE_4_10_0, field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, null, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } reader.close(); }
From source file:herddb.server.ServerMain.java
public static void main(String... args) { try {//from w w w . j av a 2 s .c o m LOG.log(Level.INFO, "Starting HerdDB version {0}", herddb.utils.Version.getVERSION()); Properties configuration = new Properties(); boolean configFileFromParameter = false; for (int i = 0; i < args.length; i++) { String arg = args[i]; if (!arg.startsWith("-")) { File configFile = new File(args[i]).getAbsoluteFile(); LOG.log(Level.INFO, "Reading configuration from {0}", configFile); try (InputStreamReader reader = new InputStreamReader(new FileInputStream(configFile), StandardCharsets.UTF_8)) { configuration.load(reader); } configFileFromParameter = true; } else if (arg.equals("--use-env")) { System.getenv().forEach((key, value) -> { System.out.println("Considering env as system property " + key + " -> " + value); System.setProperty(key, value); }); } else if (arg.startsWith("-D")) { int equals = arg.indexOf('='); if (equals > 0) { String key = arg.substring(2, equals); String value = arg.substring(equals + 1); System.setProperty(key, value); } } } if (!configFileFromParameter) { File configFile = new File("conf/server.properties").getAbsoluteFile(); LOG.log(Level.INFO, "Reading configuration from {0}", configFile); if (configFile.isFile()) { try (InputStreamReader reader = new InputStreamReader(new FileInputStream(configFile), StandardCharsets.UTF_8)) { configuration.load(reader); } } } System.getProperties().forEach((k, v) -> { String key = k + ""; if (!key.startsWith("java") && !key.startsWith("user")) { configuration.put(k, v); } }); LogManager.getLogManager().readConfiguration(); Runtime.getRuntime().addShutdownHook(new Thread("ctrlc-hook") { @Override public void run() { System.out.println("Ctrl-C trapped. Shutting down"); ServerMain _brokerMain = runningInstance; if (_brokerMain != null) { _brokerMain.close(); } } }); runningInstance = new ServerMain(configuration); runningInstance.start(); runningInstance.join(); } catch (Throwable t) { t.printStackTrace(); System.exit(1); } }
From source file:edu.ehu.galan.lite.Example.java
public static void main(String[] args) { //initizalize ehcache system System.setProperty("net.sf.ehcache.enableShutdownHook", "true"); if (CacheManager.getCacheManager("ehcacheLitet.xml") == null) { CacheManager.create("ehcacheLitet.xml"); }//from www . j a v a 2 s.c om cache = CacheManager.getInstance().getCache("LiteCache"); //load the corpus to process Corpus corpus = new Corpus("en"); //we spedify the directory and the database mapping (wikipedia in this case) corpus.loadCorpus("testCorpus", Document.SourceType.wikipedia); //will read the document using Illinois NLP utilities PlainTextDocumentReaderLBJEn parser = new PlainTextDocumentReaderLBJEn(); AlgorithmRunner runner = new AlgorithmRunner(); String resources = System.getProperty("user.dir") + "/resources/"; //algorithms initializacion CValueAlgortithm cvalue = new CValueAlgortithm(); cvalue.addNewProcessingFilter(new AdjPrepNounFilter()); TFIDFAlgorithm tf = new TFIDFAlgorithm(new CaseStemmer(CaseStemmer.CaseType.lowercase), "en"); ShallowParsingGrammarAlgortithm sha = new ShallowParsingGrammarAlgortithm( System.getProperty("user.dir") + "/resources/lite/" + "grammars/Cg2EnGrammar.grammar", "cg3/"); KPMinerAlgorithm kp = new KPMinerAlgorithm(); RakeAlgorithm ex = new RakeAlgorithm(); ex.loadStopWordsList("resources/lite/stopWordLists/RakeStopLists/SmartStopListEn"); ex.loadPunctStopWord("resources/lite/stopWordLists/RakeStopLists/RakePunctDefaultStopList"); //algorithm submitting to execute them in parallel runner.submitAlgorithm(kp); runner.submitAlgorithm(cvalue); runner.submitAlgorithm(tf); runner.submitAlgorithm(ex); runner.submitAlgorithm(sha); //load stop list List<String> standardStop = null; try { standardStop = Files.readAllLines(Paths.get(resources + "lite/stopWordLists/standardStopList"), StandardCharsets.UTF_8); } catch (IOException e1x) { Logger.getLogger(Example.class.getName()).log(Level.SEVERE, null, e1x); } //initialize Wikiminer helper (class that interacts with Wikiminer services) WikiminnerHelper helper = WikiminnerHelper.getInstance(resources); helper.setLanguage("en"); //we may operate in local mode (using Wikiminer as API instead of interacting via REST api // helper.setLocalMode(false,"/home/angel/nfs/wikiminer/configs/wikipedia"); WikiMinerMap wikimapping = new WikiMinerMap(resources, helper); CValueWikiDisambiguator disambiguator = new CValueWikiDisambiguator(resources, helper); CValueWikiRelationship relate = new CValueWikiRelationship(resources, helper); WikipediaData data = new WikipediaData(resources, helper); helper.openConnection(); //process all the documents in the corpus while (!corpus.getDocQueue().isEmpty()) { Document doc = corpus.getDocQueue().poll(); doc.setSource(Document.SourceType.wikipedia); parser.readSource(doc.getPath()); doc.setSentenceList(parser.getSentenceList()); doc.setTokenList(parser.getTokenizedSentenceList()); System.out.println(doc.getName()); runner.runAlgorihms(doc, resources); doc.applyGlobalStopWordList(standardStop); doc.mapThreshold(1.9f, new String[] { "CValue" }); doc.mapThreshold(0.00034554f, new String[] { "TFIDF" }); doc.removeAndMixTerms(); //map document wikimapping.mapCorpus(doc); disambiguator.disambiguateTopics(doc); //we may disambiguate topics that do not disambiguated correctly DuplicateRemoval.disambiguationRemoval(doc); DuplicateRemoval.topicDuplicateRemoval(doc); //obtain the wiki links,labels, etc data.processDocument(doc); //measure domain relatedness relate.relate(doc); //save the results Document.saveJsonToDir("", doc); } //close wikiminer connection and caches helper.closeConnection(); cache.dispose(); CacheManager.getInstance().shutdown(); System.exit(0); }
From source file:org.nmdp.b12s.mac.client.http.X509Config.java
public static void main(String[] args) throws KeyStoreException, NoSuchAlgorithmException, CertificateException, IOException, KeyManagementException, UnrecoverableKeyException { URL trustKeyStoreUrl = X509Config.class.getResource("/trusted.jks"); URL clientKeyStoreUri = X509Config.class.getResource("/test-client.jks"); SSLContext sslContext = SSLContexts.custom() // Configure trusted certs .loadTrustMaterial(trustKeyStoreUrl, "changeit".toCharArray()) // Configure client certificate .loadKeyMaterial(clientKeyStoreUri, "changeit".toCharArray(), "changeit".toCharArray()).build(); try (TextHttpClient httpClient = new TextHttpClient("https://macbeta.b12x.org/mac/api", sslContext)) { }/*from w w w .j av a 2s . c om*/ // Allow TLSv1 protocol only SSLConnectionSocketFactory sslsf = new SSLConnectionSocketFactory(sslContext, new String[] { "TLSv1" }, null, SSLConnectionSocketFactory.getDefaultHostnameVerifier()); try (CloseableHttpClient httpclient = HttpClients.custom().setSSLSocketFactory(sslsf).build()) { HttpGet httpget = new HttpGet("https://macbeta.b12x.org/mac/api/codes/AA"); System.out.println("executing request " + httpget.getRequestLine()); try (CloseableHttpResponse response = httpclient.execute(httpget)) { HttpEntity entity = response.getEntity(); System.out.println("----------------------------------------"); System.out.println(response.getStatusLine()); if (entity != null) { Charset charset = StandardCharsets.UTF_8; for (Header contentType : response.getHeaders("Content-Type")) { System.out.println("Content-Type: " + contentType); for (String part : contentType.getValue().split(";")) { if (part.startsWith("charset=")) { String charsetName = part.split("=")[1]; charset = Charset.forName(charsetName); } } } System.out.println("Response content length: " + entity.getContentLength()); String content = EntityUtils.toString(entity, charset); System.out.println(content); } EntityUtils.consume(entity); } } }
From source file:io.fabric8.apiman.gateway.ApimanGatewayStarter.java
/** * Main entry point for the Apiman Gateway micro service. * @param args the arguments/*from ww w .j a va 2 s . c om*/ * @throws Exception when any unhandled exception occurs */ public static final void main(String[] args) throws Exception { String isTestModeString = Systems.getEnvVarOrSystemProperty(APIMAN_GATEWAY_TESTMODE, "false"); boolean isTestMode = "true".equalsIgnoreCase(isTestModeString); if (isTestMode) log.info("Apiman Gateway Running in TestMode"); String isSslString = Systems.getEnvVarOrSystemProperty(APIMAN_GATEWAY_SSL, "false"); isSsl = "true".equalsIgnoreCase(isSslString); log.info("Apiman Gateway running in SSL: " + isSsl); String protocol = "http"; if (isSsl) protocol = "https"; URL elasticEndpoint = null; File gatewayConfigFile = new File(APIMAN_GATEWAY_PROPERTIES); String esUsername = null; String esPassword = null; if (gatewayConfigFile.exists()) { PropertiesConfiguration config = new PropertiesConfiguration(gatewayConfigFile); esUsername = config.getString("es.username"); esPassword = config.getString("es.password"); if (Utils.isNotNullOrEmpty(esPassword)) esPassword = new String(Base64.getDecoder().decode(esPassword), StandardCharsets.UTF_8).trim(); setConfigProp(APIMAN_GATEWAY_ES_USERNAME, esUsername); setConfigProp(APIMAN_GATEWAY_ES_PASSWORD, esPassword); } log.info(esUsername + esPassword); // Require ElasticSearch and the Gateway Services to to be up before proceeding if (isTestMode) { URL url = new URL(protocol + "://localhost:9200"); elasticEndpoint = waitForDependency(url, "elasticsearch-v1", "status", "200", esUsername, esPassword); } else { String defaultEsUrl = protocol + "://elasticsearch-v1:9200"; String esURL = Systems.getEnvVarOrSystemProperty(APIMAN_GATEWAY_ELASTICSEARCH_URL, defaultEsUrl); URL url = new URL(esURL); elasticEndpoint = waitForDependency(url, "elasticsearch-v1", "status", "200", esUsername, esPassword); log.info("Found " + elasticEndpoint); } File usersFile = new File(APIMAN_GATEWAY_USER_PATH); if (usersFile.exists()) { setConfigProp(Users.USERS_FILE_PROP, APIMAN_GATEWAY_USER_PATH); } log.info("** ******************************************** **"); Fabric8GatewayMicroService microService = new Fabric8GatewayMicroService(elasticEndpoint); if (isSsl) { microService.startSsl(); microService.joinSsl(); } else { microService.start(); microService.join(); } }
From source file:cu.uci.gws.sdlcrawler.PdfCrawlController.java
public static void main(String[] args) throws Exception { Properties cm = PdfCrawlerConfigManager.getInstance().loadConfigFile(); long startTime = System.currentTimeMillis(); DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); Date date = new Date(); System.out.println(dateFormat.format(date)); int numberOfCrawlers = Integer.parseInt(cm.getProperty("sdlcrawler.NumberOfCrawlers")); String pdfFolder = cm.getProperty("sdlcrawler.CrawlPdfFolder"); CrawlConfig config = new CrawlConfig(); config.setCrawlStorageFolder(cm.getProperty("sdlcrawler.CrawlStorageFolder")); config.setProxyHost(cm.getProperty("sdlcrawler.ProxyHost")); if (!"".equals(cm.getProperty("sdlcrawler.ProxyPort"))) { config.setProxyPort(Integer.parseInt(cm.getProperty("sdlcrawler.ProxyPort"))); }/*from w w w .j a v a 2 s .co m*/ config.setProxyUsername(cm.getProperty("sdlcrawler.ProxyUser")); config.setProxyPassword(cm.getProperty("sdlcrawler.ProxyPass")); config.setMaxDownloadSize(Integer.parseInt(cm.getProperty("sdlcrawler.MaxDownloadSize"))); config.setIncludeBinaryContentInCrawling( Boolean.parseBoolean(cm.getProperty("sdlcrawler.IncludeBinaryContent"))); config.setFollowRedirects(Boolean.parseBoolean(cm.getProperty("sdlcrawler.Redirects"))); config.setUserAgentString(cm.getProperty("sdlcrawler.UserAgent")); config.setMaxDepthOfCrawling(Integer.parseInt(cm.getProperty("sdlcrawler.MaxDepthCrawl"))); config.setMaxConnectionsPerHost(Integer.parseInt(cm.getProperty("sdlcrawler.MaxConnectionsPerHost"))); config.setSocketTimeout(Integer.parseInt(cm.getProperty("sdlcrawler.SocketTimeout"))); config.setMaxOutgoingLinksToFollow(Integer.parseInt(cm.getProperty("sdlcrawler.MaxOutgoingLinks"))); config.setResumableCrawling(Boolean.parseBoolean(cm.getProperty("sdlcrawler.ResumableCrawling"))); config.setIncludeHttpsPages(Boolean.parseBoolean(cm.getProperty("sdlcrawler.IncludeHttpsPages"))); config.setMaxTotalConnections(Integer.parseInt(cm.getProperty("sdlcrawler.MaxTotalConnections"))); config.setMaxPagesToFetch(Integer.parseInt(cm.getProperty("sdlcrawler.MaxPagesToFetch"))); config.setPolitenessDelay(Integer.parseInt(cm.getProperty("sdlcrawler.PolitenessDelay"))); config.setConnectionTimeout(Integer.parseInt(cm.getProperty("sdlcrawler.ConnectionTimeout"))); System.out.println(config.toString()); Collection<BasicHeader> defaultHeaders = new HashSet<>(); defaultHeaders .add(new BasicHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")); defaultHeaders.add(new BasicHeader("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3")); defaultHeaders.add(new BasicHeader("Accept-Language", "en-US,en,es-ES,es;q=0.8")); defaultHeaders.add(new BasicHeader("Connection", "keep-alive")); config.setDefaultHeaders(defaultHeaders); List<String> list = Files.readAllLines(Paths.get("config/" + cm.getProperty("sdlcrawler.SeedFile")), StandardCharsets.UTF_8); String[] crawlDomains = list.toArray(new String[list.size()]); PageFetcher pageFetcher = new PageFetcher(config); RobotstxtConfig robotstxtConfig = new RobotstxtConfig(); RobotstxtServer robotstxtServer = new RobotstxtServer(robotstxtConfig, pageFetcher); CrawlController controller = new CrawlController(config, pageFetcher, robotstxtServer); for (String domain : crawlDomains) { controller.addSeed(domain); } PdfCrawler.configure(crawlDomains, pdfFolder); controller.start(PdfCrawler.class, numberOfCrawlers); DateFormat dateFormat1 = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); Date date1 = new Date(); System.out.println(dateFormat1.format(date1)); long endTime = System.currentTimeMillis(); long totalTime = endTime - startTime; System.out.println("Total time:" + totalTime); }
From source file:io.fabric8.apiman.ApimanStarter.java
/** * Main entry point for the API Manager micro service. * @param args the arguments/*from ww w .j a v a 2 s. c o m*/ * @throws Exception when any unhandled exception occurs */ public static final void main(String[] args) throws Exception { Fabric8ManagerApiMicroService microService = new Fabric8ManagerApiMicroService(); boolean isTestMode = getSystemPropertyOrEnvVar(APIMAN_TESTMODE, false); if (isTestMode) log.info("Apiman running in TestMode"); boolean isSsl = getSystemPropertyOrEnvVar(APIMAN_SSL, false); log.info("Apiman running in SSL: " + isSsl); String protocol = "http"; if (isSsl) protocol = "https"; File apimanConfigFile = new File(APIMAN_PROPERTIES); String esUsername = null; String esPassword = null; if (apimanConfigFile.exists()) { PropertiesConfiguration config = new PropertiesConfiguration(apimanConfigFile); esUsername = config.getString("es.username"); esPassword = config.getString("es.password"); if (Utils.isNotNullOrEmpty(esPassword)) esPassword = new String(Base64.getDecoder().decode(esPassword), StandardCharsets.UTF_8).trim(); setConfigProp(APIMAN_ELASTICSEARCH_USERNAME, esUsername); setConfigProp(APIMAN_ELASTICSEARCH_PASSWORD, esPassword); } URL elasticEndpoint = null; // Require ElasticSearch and the Gateway Services to to be up before proceeding if (isTestMode) { URL url = new URL("https://localhost:9200"); elasticEndpoint = waitForDependency(url, "", "elasticsearch-v1", "status", "200", esUsername, esPassword); } else { String defaultEsUrl = protocol + "://elasticsearch-v1:9200"; String esURL = getSystemPropertyOrEnvVar(APIMAN_ELASTICSEARCH_URL, defaultEsUrl); URL url = new URL(esURL); elasticEndpoint = waitForDependency(url, "", "elasticsearch-v1", "status", "200", esUsername, esPassword); log.info("Found " + elasticEndpoint); String defaultGatewayUrl = protocol + "://apiman-gateway:7777"; gatewayUrl = getSystemPropertyOrEnvVar(APIMAN_GATEWAY_URL, defaultGatewayUrl); URL gatewayEndpoint = waitForDependency(new URL(gatewayUrl), "/api/system/status", "apiman-gateway", "up", "true", null, null); log.info("Found " + gatewayEndpoint); } setConfigProp("apiman.plugins.repositories", "http://repo1.maven.org/maven2/"); setConfigProp("apiman-manager.plugins.registries", "http://cdn.rawgit.com/apiman/apiman-plugin-registry/1.2.6.Final/registry.json"); setFabric8Props(elasticEndpoint); if (isSsl) { microService.startSsl(); microService.joinSsl(); } else { microService.start(); microService.join(); } }
From source file:eu.crydee.stanfordcorenlp.Tokenizer.java
/** * Wrapper around Stanford CoreNLP to tokenize text. * * Give it an input dir of text files with --input-dir and it'll ouput * tokenized versions, one sentence per line with space separated words to * --output-dir (defaults to out/)./* w ww. j a v a2 s .c o m*/ * * @param args CLI args. Example: --input-dir my-input --output-dir * my-output. */ public static void main(String[] args) { ArgumentParser parser = ArgumentParsers.newArgumentParser("stanford-corenlp-tokenizer-wrapper") .description("Converts Mediawiki dumps to text."); parser.addArgument("-i", "--input-dir").required(true).help("Path of the input text files directory."); parser.addArgument("-o", "--output-dir").help("Path of the output text files directory.").setDefault("out"); Params params = new Params(); try { parser.parseArgs(args, params); } catch (ArgumentParserException ex) { System.err.println("Could not parse arguments: " + ex.getMessage()); System.exit(1); } Tokenizer tokenizer = new Tokenizer(); try { Files.list(Paths.get(params.inDirPath)).filter(Files::isRegularFile).map(Path::toFile).map(f -> { try { return Pair.of(f.getName(), FileUtils.readFileToString(f, StandardCharsets.UTF_8)); } catch (IOException ex) { System.err.println("Could not read input text file: " + ex.getLocalizedMessage()); throw new UncheckedIOException(ex); } }).forEach(p -> { String text = tokenizer.tokenizeAndSentenceSplit(p.getRight()); try { FileUtils.writeStringToFile(Paths.get(params.outDirpath, p.getLeft()).toFile(), text, StandardCharsets.UTF_8); } catch (IOException ex) { System.err.println("Could not write output text file: " + ex.getLocalizedMessage()); } }); } catch (IOException ex) { System.err.println("Could not read from input directory: " + ex.getLocalizedMessage()); } }