List of usage examples for java.nio.file Files readAllLines
public static List<String> readAllLines(Path path, Charset cs) throws IOException
From source file:edu.ehu.galan.lite.Example.java
public static void main(String[] args) { //initizalize ehcache system System.setProperty("net.sf.ehcache.enableShutdownHook", "true"); if (CacheManager.getCacheManager("ehcacheLitet.xml") == null) { CacheManager.create("ehcacheLitet.xml"); }/* www .ja v a 2s . c o m*/ cache = CacheManager.getInstance().getCache("LiteCache"); //load the corpus to process Corpus corpus = new Corpus("en"); //we spedify the directory and the database mapping (wikipedia in this case) corpus.loadCorpus("testCorpus", Document.SourceType.wikipedia); //will read the document using Illinois NLP utilities PlainTextDocumentReaderLBJEn parser = new PlainTextDocumentReaderLBJEn(); AlgorithmRunner runner = new AlgorithmRunner(); String resources = System.getProperty("user.dir") + "/resources/"; //algorithms initializacion CValueAlgortithm cvalue = new CValueAlgortithm(); cvalue.addNewProcessingFilter(new AdjPrepNounFilter()); TFIDFAlgorithm tf = new TFIDFAlgorithm(new CaseStemmer(CaseStemmer.CaseType.lowercase), "en"); ShallowParsingGrammarAlgortithm sha = new ShallowParsingGrammarAlgortithm( System.getProperty("user.dir") + "/resources/lite/" + "grammars/Cg2EnGrammar.grammar", "cg3/"); KPMinerAlgorithm kp = new KPMinerAlgorithm(); RakeAlgorithm ex = new RakeAlgorithm(); ex.loadStopWordsList("resources/lite/stopWordLists/RakeStopLists/SmartStopListEn"); ex.loadPunctStopWord("resources/lite/stopWordLists/RakeStopLists/RakePunctDefaultStopList"); //algorithm submitting to execute them in parallel runner.submitAlgorithm(kp); runner.submitAlgorithm(cvalue); runner.submitAlgorithm(tf); runner.submitAlgorithm(ex); runner.submitAlgorithm(sha); //load stop list List<String> standardStop = null; try { standardStop = Files.readAllLines(Paths.get(resources + "lite/stopWordLists/standardStopList"), StandardCharsets.UTF_8); } catch (IOException e1x) { Logger.getLogger(Example.class.getName()).log(Level.SEVERE, null, e1x); } //initialize Wikiminer helper (class that interacts with Wikiminer services) WikiminnerHelper helper = WikiminnerHelper.getInstance(resources); helper.setLanguage("en"); //we may operate in local mode (using Wikiminer as API instead of interacting via REST api // helper.setLocalMode(false,"/home/angel/nfs/wikiminer/configs/wikipedia"); WikiMinerMap wikimapping = new WikiMinerMap(resources, helper); CValueWikiDisambiguator disambiguator = new CValueWikiDisambiguator(resources, helper); CValueWikiRelationship relate = new CValueWikiRelationship(resources, helper); WikipediaData data = new WikipediaData(resources, helper); helper.openConnection(); //process all the documents in the corpus while (!corpus.getDocQueue().isEmpty()) { Document doc = corpus.getDocQueue().poll(); doc.setSource(Document.SourceType.wikipedia); parser.readSource(doc.getPath()); doc.setSentenceList(parser.getSentenceList()); doc.setTokenList(parser.getTokenizedSentenceList()); System.out.println(doc.getName()); runner.runAlgorihms(doc, resources); doc.applyGlobalStopWordList(standardStop); doc.mapThreshold(1.9f, new String[] { "CValue" }); doc.mapThreshold(0.00034554f, new String[] { "TFIDF" }); doc.removeAndMixTerms(); //map document wikimapping.mapCorpus(doc); disambiguator.disambiguateTopics(doc); //we may disambiguate topics that do not disambiguated correctly DuplicateRemoval.disambiguationRemoval(doc); DuplicateRemoval.topicDuplicateRemoval(doc); //obtain the wiki links,labels, etc data.processDocument(doc); //measure domain relatedness relate.relate(doc); //save the results Document.saveJsonToDir("", doc); } //close wikiminer connection and caches helper.closeConnection(); cache.dispose(); CacheManager.getInstance().shutdown(); System.exit(0); }
From source file:cu.uci.gws.sdlcrawler.PdfCrawlController.java
public static void main(String[] args) throws Exception { Properties cm = PdfCrawlerConfigManager.getInstance().loadConfigFile(); long startTime = System.currentTimeMillis(); DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); Date date = new Date(); System.out.println(dateFormat.format(date)); int numberOfCrawlers = Integer.parseInt(cm.getProperty("sdlcrawler.NumberOfCrawlers")); String pdfFolder = cm.getProperty("sdlcrawler.CrawlPdfFolder"); CrawlConfig config = new CrawlConfig(); config.setCrawlStorageFolder(cm.getProperty("sdlcrawler.CrawlStorageFolder")); config.setProxyHost(cm.getProperty("sdlcrawler.ProxyHost")); if (!"".equals(cm.getProperty("sdlcrawler.ProxyPort"))) { config.setProxyPort(Integer.parseInt(cm.getProperty("sdlcrawler.ProxyPort"))); }/*from w w w. j a v a 2s. co m*/ config.setProxyUsername(cm.getProperty("sdlcrawler.ProxyUser")); config.setProxyPassword(cm.getProperty("sdlcrawler.ProxyPass")); config.setMaxDownloadSize(Integer.parseInt(cm.getProperty("sdlcrawler.MaxDownloadSize"))); config.setIncludeBinaryContentInCrawling( Boolean.parseBoolean(cm.getProperty("sdlcrawler.IncludeBinaryContent"))); config.setFollowRedirects(Boolean.parseBoolean(cm.getProperty("sdlcrawler.Redirects"))); config.setUserAgentString(cm.getProperty("sdlcrawler.UserAgent")); config.setMaxDepthOfCrawling(Integer.parseInt(cm.getProperty("sdlcrawler.MaxDepthCrawl"))); config.setMaxConnectionsPerHost(Integer.parseInt(cm.getProperty("sdlcrawler.MaxConnectionsPerHost"))); config.setSocketTimeout(Integer.parseInt(cm.getProperty("sdlcrawler.SocketTimeout"))); config.setMaxOutgoingLinksToFollow(Integer.parseInt(cm.getProperty("sdlcrawler.MaxOutgoingLinks"))); config.setResumableCrawling(Boolean.parseBoolean(cm.getProperty("sdlcrawler.ResumableCrawling"))); config.setIncludeHttpsPages(Boolean.parseBoolean(cm.getProperty("sdlcrawler.IncludeHttpsPages"))); config.setMaxTotalConnections(Integer.parseInt(cm.getProperty("sdlcrawler.MaxTotalConnections"))); config.setMaxPagesToFetch(Integer.parseInt(cm.getProperty("sdlcrawler.MaxPagesToFetch"))); config.setPolitenessDelay(Integer.parseInt(cm.getProperty("sdlcrawler.PolitenessDelay"))); config.setConnectionTimeout(Integer.parseInt(cm.getProperty("sdlcrawler.ConnectionTimeout"))); System.out.println(config.toString()); Collection<BasicHeader> defaultHeaders = new HashSet<>(); defaultHeaders .add(new BasicHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")); defaultHeaders.add(new BasicHeader("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3")); defaultHeaders.add(new BasicHeader("Accept-Language", "en-US,en,es-ES,es;q=0.8")); defaultHeaders.add(new BasicHeader("Connection", "keep-alive")); config.setDefaultHeaders(defaultHeaders); List<String> list = Files.readAllLines(Paths.get("config/" + cm.getProperty("sdlcrawler.SeedFile")), StandardCharsets.UTF_8); String[] crawlDomains = list.toArray(new String[list.size()]); PageFetcher pageFetcher = new PageFetcher(config); RobotstxtConfig robotstxtConfig = new RobotstxtConfig(); RobotstxtServer robotstxtServer = new RobotstxtServer(robotstxtConfig, pageFetcher); CrawlController controller = new CrawlController(config, pageFetcher, robotstxtServer); for (String domain : crawlDomains) { controller.addSeed(domain); } PdfCrawler.configure(crawlDomains, pdfFolder); controller.start(PdfCrawler.class, numberOfCrawlers); DateFormat dateFormat1 = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); Date date1 = new Date(); System.out.println(dateFormat1.format(date1)); long endTime = System.currentTimeMillis(); long totalTime = endTime - startTime; System.out.println("Total time:" + totalTime); }
From source file:es.upm.oeg.tools.quality.ldsniffer.cmd.LDSnifferApp.java
public static void main(String[] args) { HelpFormatter help = new HelpFormatter(); String header = "Assess a list of Linked Data resources using Linked Data Quality Model."; String footer = "Please report issues at https://github.com/nandana/ld-sniffer"; try {//from w ww . jav a 2 s .c o m CommandLine line = parseArguments(args); if (line.hasOption("help")) { help.printHelp("LDSnifferApp", header, OPTIONS, footer, true); System.exit(0); } evaluationTimeout = Integer.parseInt(line.getOptionValue("t", "10")); if (line.hasOption("md")) { includeMetricDefinitions = true; } if (line.hasOption("rdf")) { rdfOutput = true; } logger.info("URL List: " + line.getOptionValue("ul")); logger.info("TDB Path: " + line.getOptionValue("tdb")); logger.info("Metrics Path: " + line.getOptionValue("ml")); logger.info("Include Metric definitions: " + line.getOptionValue("ml")); logger.info("RDF output: " + line.getOptionValue("rdf")); logger.info("Timeout (mins): " + evaluationTimeout); if (line.hasOption("ml")) { Path path = Paths.get(line.getOptionValue("ml")); if (!Files.exists(path)) { throw new IOException(path.toAbsolutePath().toString() + " : File doesn't exit."); } } //Set the TDB path String tdbDirectory; if (line.hasOption("tdb")) { tdbDirectory = line.getOptionValue("tdb"); } else { Path tempPath = Files.createTempDirectory("tdb_"); tdbDirectory = tempPath.toAbsolutePath().toString(); } // Create the URL list for the evaluation if (!line.hasOption("ul") && !line.hasOption("url")) { System.out.println("One of the following parameters are required: url or urlList "); help.printHelp("LDSnifferApp", header, OPTIONS, footer, true); System.exit(0); } else if (line.hasOption("ul") && line.hasOption("url")) { System.out.println("You have to specify either url or urlList, not both."); help.printHelp("LDSnifferApp", header, OPTIONS, footer, true); System.exit(0); } List<String> urlList = null; if (line.hasOption("ul")) { Path path = Paths.get(line.getOptionValue("ul")); logger.info("Path : " + path.toAbsolutePath().toString()); logger.info("Path exits : " + Files.exists(path)); urlList = Files.readAllLines(path, Charset.defaultCharset()); } else if (line.hasOption("url")) { urlList = new ArrayList<>(); urlList.add(line.getOptionValue("url")); } Executor executor = new Executor(tdbDirectory, urlList); executor.execute(); } catch (MissingOptionException e) { help.printHelp("LDSnifferApp", header, OPTIONS, footer, true); logger.error("Missing arguments. Reason: " + e.getMessage(), e); System.exit(1); } catch (ParseException e) { logger.error("Parsing failed. Reason: " + e.getMessage(), e); System.exit(1); } catch (IOException e) { logger.error("Execution failed. Reason: " + e.getMessage(), e); System.exit(1); } }
From source file:revisaoswing.RevisaoSwing.java
public static void carregarArray() throws IOException { if (!Files.exists(arquivo)) return;/* w w w . j a v a 2 s .c o m*/ String texto = ""; List<String> linhas = Files.readAllLines(arquivo, Charset.defaultCharset()); for (String item : linhas) { texto += item; } array = (JSONArray) JSONValue.parse(texto); }
From source file:its.tools.SonarlintDaemon.java
private static String artifactVersion() { if (artifactVersion == null) { try {/* ww w . jav a2s .c o m*/ for (String l : Files.readAllLines(Paths.get("pom.xml"), StandardCharsets.UTF_8)) { String lineTrimmed = l.trim(); if (lineTrimmed.startsWith("<version>")) { artifactVersion = lineTrimmed.substring("<version>".length(), lineTrimmed.length() - "</version>".length()); break; } } } catch (IOException e) { throw new IllegalStateException(e); } } return artifactVersion; }
From source file:org.openbaton.vnfm.generic.utils.LogDispatcher.java
private static List<String> readFile(String path, Charset encoding) throws IOException { try {/*from ww w.ja v a2 s . c om*/ return Files.readAllLines(Paths.get(path), encoding); } catch (java.nio.file.NoSuchFileException e) { return new ArrayList<>(); } // return new String(encoded, encoding); }
From source file:com.sonar.maven.it.suite.AbstractMavenTest.java
protected static Version mojoVersion() { if (mojoVersion == null) { try {//from w ww.j av a 2s.c o m for (String line : Files.readAllLines(Paths.get("../pom.xml"), StandardCharsets.UTF_8)) { if (line.startsWith(" <version>")) { String version = StringUtils.substringAfter(line, "<version>"); version = StringUtils.substringBefore(version, "</version>"); mojoVersion = Version.create(version); return mojoVersion; } } } catch (IOException e) { throw new IllegalStateException(e); } throw new IllegalStateException("Unable to find version of the Maven plugin to be used by ITs"); } return mojoVersion; }
From source file:org.openstreetmap.gui.persistence.JSONPersistence.java
/** * Loads the marker data from file./* ww w.ja v a 2 s .c o m*/ * * @param pFineName * The name of the source file. * @return An array of MarkerData loaded from file. * @throws PersistenceException * If there's any problem with the load. */ public static MarkerData[] loadMarkers(String pFineName) { try { List<String> lines = Files.readAllLines(Paths.get(pFineName), StandardCharsets.ISO_8859_1); StringBuffer buffer = new StringBuffer(); for (String string : lines) { buffer.append(string.trim()); } List<MarkerData> markers = new Vector<>(); JSONObject geometryCollection = new JSONObject(buffer.toString()); if (geometryCollection.has("geometries")) { JSONArray points = geometryCollection.getJSONArray("geometries"); for (int i = 0; i < points.length(); i++) { markers.add(extractMarker(points.getJSONObject(i))); } } if (geometryCollection.has("features")) { JSONArray features = geometryCollection.getJSONArray("features"); for (int i = 0; i < features.length(); i++) { // currently disabled markers.add(extractFeature(features.getJSONObject(i))); } } return markers.toArray(new MarkerData[0]); } catch (IOException exception) { throw new PersistenceException(exception); } }
From source file:jobs.EvaluateRetrieval.java
@Override public void doJob() throws Exception { Path filePath = VirtualFile.fromRelativePath("/data/mesh_disease_terms.txt").getRealFile().toPath(); Charset charset = Charset.defaultCharset(); List<String> lines = Files.readAllLines(filePath, charset); Stopwatch stopwatch = Stopwatch.createUnstarted(); stopwatch.start();//from ww w .j av a 2s.c o m int total = lines.size(); int counter = 0; //TODO just to store the resutls and printing file StringBuilder sb = new StringBuilder(); sb.append("PMID\tMESH_ID\tMESH_TERM\n"); for (String line : lines) { String[] splits = line.split("\t"); String id = splits[0]; String term = splits[1]; String originalTerm = splits[1]; counter++; Logger.info("Term: " + term + "(" + counter + "/" + total + ")"); if (term.contains(",")) { Pattern p = Pattern.compile("(.*), (.*)"); Matcher m = p.matcher(term); if (m.find()) { String post = m.group(1); String pre = m.group(2); term = pre + " " + post; Logger.info("Term modified: " + term); } } Directory directory = FSDirectory.open(VirtualFile.fromRelativePath("/index").getRealFile()); DirectoryReader ireader = DirectoryReader.open(directory); //TODO Query analyzer - can be changed and switched Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47); IndexSearcher isearcher = new IndexSearcher(ireader); //Maybe different type of parser? QueryParser parser = new QueryParser(Version.LUCENE_47, "contents", analyzer); // Logger.info("Query: " + term); if (!term.contains("(")) { //TODO different syntax and operators Query query = parser.parse("\"" + term.replace("/", "\\/") + "\""); //Logger.info("query: " + query.toString()); ScoreDoc[] hits = isearcher.search(query, null, 10000).scoreDocs; //Logger.info("results: " + hits.length); int freq = hits.length; if (freq > 0) { for (int i = 0; i < hits.length; i++) { Document hitDoc = isearcher.doc(hits[i].doc); //Logger.info(hitDoc.get("pmid") + " - " + hits[i].score); sb.append(hitDoc.get("pmid")).append("\t").append(id).append("\t").append(originalTerm) .append("\n"); } } } ireader.close(); directory.close(); } stopwatch.stop(); Logger.info("Time to index the documents: " + stopwatch.elapsed(TimeUnit.SECONDS)); File file = VirtualFile.fromRelativePath("/data/annotatedArticles.txt").getRealFile(); FileUtils.writeStringToFile(file, sb.toString()); Logger.info("File saved: " + file.getAbsolutePath()); }
From source file:com.datumbox.framework.applications.nlp.CETRTest.java
/** * Test of extract method, of class CETR. *//*from ww w . j ava 2 s . c o m*/ @Test public void testExtract() { logger.info("extract"); Configuration conf = Configuration.getConfiguration(); String dbName = this.getClass().getSimpleName(); String text; try { List<String> lines = Files.readAllLines( Paths.get(this.getClass().getClassLoader().getResource("datasets/example.com.html").toURI()), StandardCharsets.UTF_8); text = StringUtils.join(lines, "\r\n"); } catch (IOException | URISyntaxException ex) { throw new RuntimeException(ex); } CETR.Parameters parameters = new CETR.Parameters(); parameters.setNumberOfClusters(2); parameters.setAlphaWindowSizeFor2DModel(3); parameters.setSmoothingAverageRadius(2); CETR instance = new CETR(dbName, conf); String expResult = "This domain is established to be used for illustrative examples in documents. You may use this domain in examples without prior coordination or asking for permission."; String result = instance.extract(text, parameters); assertEquals(expResult, result); }