List of usage examples for java.util Map entrySet
Set<Map.Entry<K, V>> entrySet();
From source file:com.beyondb.geocoding.BaiduAPI.java
public static void main(String[] args) throws IOException { // Map<String, String> json = BaiduAPI.testPost("29.542938", "114.064022"); // System.out.println("address :" + json.get("address")); // String str = BaiduAPI.getPointByAddress("??2400????", ""); // System.out.println(str); // Map<String, String> map = BaiduAPI.getLongLatCoordinate("?310????",""); // if (null != map) { // System.out.println(map.get("lng")); // System.out.println(map.get("lat")); // }// w w w . ja v a2 s . co m //Coordinate coord =BaiduAPI.getInstance().getCoordinate("?310????",""); Coordinate coord = BaiduAPI.getInstance().getCoordinate( "?310????", ""); if (null != coord) { System.out.println("lat:" + coord.Latitude); System.out.println("lng:" + coord.Longitude); } Map<String, Coordinate> places = BaiduAPI.getInstance().getPlaces("?", ""); if (places != null) { for (Map.Entry<String, Coordinate> entry : places.entrySet()) { String placeName = entry.getKey(); Coordinate coordinate = entry.getValue(); System.out.println("placeName:" + placeName); System.out.println("lat:" + coordinate.Latitude); System.out.println("lng:" + coordinate.Longitude); } } }
From source file:edu.cmu.lti.oaqa.annographix.apps.SolrSimpleIndexApp.java
public static void main(String[] args) { Options options = new Options(); options.addOption("i", null, true, "Input File"); options.addOption("u", null, true, "Solr URI"); options.addOption("n", null, true, "Batch size"); CommandLineParser parser = new org.apache.commons.cli.GnuParser(); try {//from w w w . jav a 2 s . c o m CommandLine cmd = parser.parse(options, args); if (cmd.hasOption("i")) { inputFile = cmd.getOptionValue("i"); } else { Usage("Specify Input File"); } if (cmd.hasOption("u")) { solrURI = cmd.getOptionValue("u"); } else { Usage("Specify Solr URI"); } if (cmd.hasOption("n")) { batchQty = Integer.parseInt(cmd.getOptionValue("n")); } SolrServerWrapper solrServer = new SolrServerWrapper(solrURI); BufferedReader inpText = new BufferedReader( new InputStreamReader(CompressUtils.createInputStream(inputFile))); XmlHelper xmlHlp = new XmlHelper(); String docText = XmlHelper.readNextXMLIndexEntry(inpText); for (int docNum = 1; docText != null; ++docNum, docText = XmlHelper.readNextXMLIndexEntry(inpText)) { // 1. Read document text Map<String, String> docFields = null; HashMap<String, Object> objDocFields = new HashMap<String, Object>(); try { docFields = xmlHlp.parseXMLIndexEntry(docText); } catch (SAXException e) { System.err.println("Parsing error, offending DOC:" + NL + docText); throw new Exception("Parsing error."); } for (Map.Entry<String, String> e : docFields.entrySet()) { //System.out.println(e.getKey() + " " + e.getValue()); objDocFields.put(e.getKey(), e.getValue()); } solrServer.indexDocument(objDocFields); if ((docNum - 1) % batchQty == 0) solrServer.indexCommit(); } solrServer.indexCommit(); } catch (ParseException e) { Usage("Cannot parse arguments"); } catch (Exception e) { System.err.println("Terminating due to an exception: " + e); System.exit(1); } }
From source file:de.zib.vold.userInterface.ABI.java
public static void main(String[] args) { ABI abi = new ABI(); while (true) { try {/* ww w . j av a 2s .c om*/ InputStreamReader isr = new InputStreamReader(System.in); BufferedReader br = new BufferedReader(isr); System.out.print("#: "); String s = br.readLine(); if (null == s) break; String[] a = s.split("\\s+"); if (a.length < 1) { System.out.println("ERROR: The following commands are valid:"); System.out.println("ERROR: insert <source> <scope> <type> <keyname> {<value> }*"); System.out.println("ERROR: lookup <scope> <type> <keyname>"); System.out.println("ERROR: exit"); continue; } else if (a[0].equals("lookup") || a[0].equals("l")) { if (a.length < 4) { System.out.println("ERROR: Syntax for lookup is:"); System.out.println("ERROR: lookup <scope> <type> <keyname>"); continue; } Map<Key, Set<String>> result; try { result = abi.frontend.lookup(new Key(a[1], a[2], a[3])); } catch (VoldException e) { System.out.println( "An internal error occured: " + e.getClass().getName() + ": " + e.getMessage()); continue; } for (Map.Entry<Key, Set<String>> entry : result.entrySet()) { Key k = entry.getKey(); System.out.println("+Found ('" + k.get_scope() + "', '" + k.get_type() + "', '" + k.get_keyname() + "')"); for (String v : entry.getValue()) { System.out.println("-" + v); } } } else if (a[0].equals("insert") || a[0].equals("i")) { if (a.length < 5) { System.out.println("ERROR: Syntax for insert is:"); System.out.println("ERROR: insert <source> <scope> <type> <keyname> {<value> }*"); continue; } Key k = new Key(a[2], a[3], a[4]); Set<String> values = new HashSet<String>(); for (int i = 5; i < a.length; ++i) { values.add(a[i]); } try { abi.frontend.insert(a[1], k, values, DateTime.now().getMillis()); } catch (VoldException e) { System.out.println( "An internal error occured: " + e.getClass().getName() + ": " + e.getMessage()); continue; } } else if (a[0].equals("exit") || a[0].equals("x")) { break; } else { System.out.println("ERROR: Unknown command!"); System.out.println("ERROR: The following commands are valid:"); System.out.println("ERROR: insert <source> <scope> <type> <keyname> {<value> }*"); System.out.println("ERROR: lookup <scope> <type> <keyname>"); System.out.println("ERROR: exit"); } } catch (IOException e) { e.printStackTrace(); } } System.exit(0); }
From source file:io.rodeo.chute.ChuteMain.java
public static void main(String[] args) throws SQLException, JsonParseException, JsonMappingException, IOException { InputStream is = new FileInputStream(new File(CONFIG_FILENAME)); ObjectMapper mapper = new ObjectMapper(new YAMLFactory()); mapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE); ChuteConfiguration config = mapper.readValue(is, ChuteConfiguration.class); Map<String, Importer> importManagers = new HashMap<String, Importer>(config.importerConfigurations.size()); for (Entry<String, ImporterConfiguration> importerConfig : config.importerConfigurations.entrySet()) { importManagers.put(importerConfig.getKey(), importerConfig.getValue().createImporter()); }//w w w . ja v a2s . c o m Map<String, Exporter> exportManagers = new HashMap<String, Exporter>(config.exporterConfigurations.size()); for (Entry<String, ExporterConfiguration> exporterConfig : config.exporterConfigurations.entrySet()) { exportManagers.put(exporterConfig.getKey(), exporterConfig.getValue().createExporter()); } for (Entry<String, ConnectionConfiguration> connectionConfig : config.connectionConfigurations.entrySet()) { importManagers.get(connectionConfig.getValue().in) .addProcessor(exportManagers.get(connectionConfig.getValue().out)); } for (Entry<String, Exporter> exportManager : exportManagers.entrySet()) { exportManager.getValue().start(); } for (Entry<String, Importer> importManager : importManagers.entrySet()) { importManager.getValue().start(); } }
From source file:com.joliciel.talismane.terminology.Main.java
public static void main(String[] args) throws Exception { String termFilePath = null;// ww w . j a va2 s. c om String outFilePath = null; Command command = Command.extract; int depth = -1; String databasePropertiesPath = null; String projectCode = null; Map<String, String> argMap = TalismaneConfig.convertArgs(args); String logConfigPath = argMap.get("logConfigFile"); if (logConfigPath != null) { argMap.remove("logConfigFile"); Properties props = new Properties(); props.load(new FileInputStream(logConfigPath)); PropertyConfigurator.configure(props); } Map<String, String> innerArgs = new HashMap<String, String>(); for (Entry<String, String> argEntry : argMap.entrySet()) { String argName = argEntry.getKey(); String argValue = argEntry.getValue(); if (argName.equals("command")) command = Command.valueOf(argValue); else if (argName.equals("termFile")) termFilePath = argValue; else if (argName.equals("outFile")) outFilePath = argValue; else if (argName.equals("depth")) depth = Integer.parseInt(argValue); else if (argName.equals("databaseProperties")) databasePropertiesPath = argValue; else if (argName.equals("projectCode")) projectCode = argValue; else innerArgs.put(argName, argValue); } if (termFilePath == null && databasePropertiesPath == null) throw new TalismaneException("Required argument: termFile or databasePropertiesPath"); if (termFilePath != null) { String currentDirPath = System.getProperty("user.dir"); File termFileDir = new File(currentDirPath); if (termFilePath.lastIndexOf("/") >= 0) { String termFileDirPath = termFilePath.substring(0, termFilePath.lastIndexOf("/")); termFileDir = new File(termFileDirPath); termFileDir.mkdirs(); } } long startTime = new Date().getTime(); try { TerminologyServiceLocator terminologyServiceLocator = TerminologyServiceLocator.getInstance(); TerminologyService terminologyService = terminologyServiceLocator.getTerminologyService(); TerminologyBase terminologyBase = null; if (projectCode == null) throw new TalismaneException("Required argument: projectCode"); File file = new File(databasePropertiesPath); FileInputStream fis = new FileInputStream(file); Properties dataSourceProperties = new Properties(); dataSourceProperties.load(fis); terminologyBase = terminologyService.getPostGresTerminologyBase(projectCode, dataSourceProperties); if (command.equals(Command.analyse) || command.equals(Command.extract)) { if (depth < 0) throw new TalismaneException("Required argument: depth"); if (command.equals(Command.analyse)) { innerArgs.put("command", "analyse"); } else { innerArgs.put("command", "process"); } TalismaneFrench talismaneFrench = new TalismaneFrench(); TalismaneConfig config = new TalismaneConfig(innerArgs, talismaneFrench); PosTagSet tagSet = TalismaneSession.getPosTagSet(); Charset outputCharset = config.getOutputCharset(); TermExtractor termExtractor = terminologyService.getTermExtractor(terminologyBase); termExtractor.setMaxDepth(depth); termExtractor.setOutFilePath(termFilePath); termExtractor.getIncludeChildren().add(tagSet.getPosTag("P")); termExtractor.getIncludeChildren().add(tagSet.getPosTag("P+D")); termExtractor.getIncludeChildren().add(tagSet.getPosTag("CC")); termExtractor.getIncludeWithParent().add(tagSet.getPosTag("DET")); if (outFilePath != null) { if (outFilePath.lastIndexOf("/") >= 0) { String outFileDirPath = outFilePath.substring(0, outFilePath.lastIndexOf("/")); File outFileDir = new File(outFileDirPath); outFileDir.mkdirs(); } File outFile = new File(outFilePath); outFile.delete(); outFile.createNewFile(); Writer writer = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(outFilePath), outputCharset)); TermAnalysisWriter termAnalysisWriter = new TermAnalysisWriter(writer); termExtractor.addTermObserver(termAnalysisWriter); } Talismane talismane = config.getTalismane(); talismane.setParseConfigurationProcessor(termExtractor); talismane.process(); } else if (command.equals(Command.list)) { List<Term> terms = terminologyBase.getTermsByFrequency(2); for (Term term : terms) { LOG.debug("Term: " + term.getText()); LOG.debug("Frequency: " + term.getFrequency()); LOG.debug("Heads: " + term.getHeads()); LOG.debug("Expansions: " + term.getExpansions()); LOG.debug("Contexts: " + term.getContexts()); } } } finally { long endTime = new Date().getTime(); long totalTime = endTime - startTime; LOG.info("Total time: " + totalTime); } }
From source file:au.org.ala.names.search.DwcaNameIndexer.java
/** * Example run/* w ww . j ava2 s . c o m*/ * * java cp .:names.jar au.org.ala.checklist.lucene.DwcaNameIndexer * -all * -dwca /data/bie-staging/names-lists/dwca-col * -target /data/lucene/testdwc-namematching * -irmng /data/bie-staging/irmng/IRMNG_DWC_HOMONYMS * -common /data/bie-staging/ala-names/col_vernacular.txt * * @param args */ public static void main(String[] args) { final String DEFAULT_DWCA = "/data/lucene/sources/dwca-col"; final String DEFAULT_IRMNG = "/data/lucene/sources/IRMNG_DWC_HOMONYMS"; final String DEFAULT_COMMON_NAME = "/data/lucene/sources/col_vernacular.txt"; final String DEFAULT_TARGET_DIR = "/data/lucene/namematching"; final String DEFAULT_TMP_DIR = "/data/lucene/nmload-tmp"; Options options = new Options(); options.addOption("v", "version", false, "Retrieve version information"); options.addOption("h", "help", false, "Retrieve options"); options.addOption("all", false, "Generates the load index and search index"); options.addOption("load", false, "Generate the load index only. " + "The load index is a temporary index generated from the raw data files" + " used to load the main search index"); options.addOption("search", false, "Generates the search index. A load index must already be created for this to run."); options.addOption("irmng", true, "The absolute path to the unzipped irmng DwCA. IRMNG is used to detect homonyms. Defaults to " + DEFAULT_IRMNG); options.addOption("dwca", true, "The absolute path to the unzipped DwCA for the scientific names. Defaults to " + DEFAULT_DWCA); options.addOption("target", true, "The target directory to write the new name index to. Defaults to " + DEFAULT_TARGET_DIR); options.addOption("tmp", true, "The tmp directory for the load index. Defaults to " + DEFAULT_TMP_DIR); options.addOption("common", true, "The common (vernacular) name file. Defaults to " + DEFAULT_COMMON_NAME); options.addOption("testSearch", true, "Debug a name search. This uses the target directory to search against."); CommandLineParser parser = new BasicParser(); try { // parse the command line arguments CommandLine line = parser.parse(options, args); if (line.hasOption("v")) { //only load the properties file if it exists otherwise default to the biocache-test-config.properties on the classpath InputStream stream = DwcaNameIndexer.class.getResourceAsStream("/git.properties"); Properties properties = new Properties(); if (stream != null) { properties.load(stream); properties.list(System.out); } else { System.err.println("Unable to retrieve versioning information"); } System.exit(-1); } if (line.hasOption("help")) { //only load the properties file if it exists otherwise default to the biocache-test-config.properties on the classpath new HelpFormatter().printHelp("nameindexer", options); System.exit(-1); } if (line.hasOption("testSearch")) { boolean indexExists = (new File(DEFAULT_TARGET_DIR).exists()); if (indexExists) { //do a name search - with option flag pointing to index location System.out.println("Search for name"); ALANameSearcher searcher = new ALANameSearcher( line.getOptionValue("target", DEFAULT_TARGET_DIR)); NameSearchResult nsr = searcher.searchForRecord(line.getOptionValue("testSearch")); if (nsr != null) { Map<String, String> props = nsr.toMap(); for (Map.Entry<String, String> entry : props.entrySet()) { System.out.println(entry.getKey() + ": " + entry.getValue()); } } else { System.err.println("No match for " + line.getOptionValue("testSearch")); } } else { System.err.println("Index unreadable. Check " + DEFAULT_TARGET_DIR); } System.exit(-1); } boolean load = line.hasOption("load") || line.hasOption("all"); boolean search = line.hasOption("search") || line.hasOption("all"); if (!line.hasOption("load") && !line.hasOption("search") && !line.hasOption("all")) { load = true; search = true; } log.info("Generating loading index: " + load); log.info("Generating searching index: " + search); boolean defaultIrmngReadable = (new File(DEFAULT_IRMNG).exists()); boolean defaultCommonReadable = (new File(DEFAULT_COMMON_NAME).exists()); boolean defaultDwcaReadable = (new File(DEFAULT_DWCA).exists()); if (line.getOptionValue("dwca") != null) { log.info("Using the DwCA name file: " + line.getOptionValue("dwca")); } else if (defaultDwcaReadable) { log.info("Using the default DwCA name file: " + DEFAULT_DWCA); } else { log.error( "No DwC Archive specified and the default file path does not exist or is inaccessible. Default path: " + DEFAULT_DWCA); System.exit(-1); } if (line.getOptionValue("irmng") == null && !defaultIrmngReadable) { log.warn( "No IRMNG export specified and the default file path does not exist or is inaccessible. Default path: " + DEFAULT_IRMNG); } else { log.info("Using the default IRMNG name file: " + DEFAULT_IRMNG); } if (line.getOptionValue("common") == null && !defaultCommonReadable) { log.warn( "No common name export specified and the default file path does not exist or is inaccessible. Default path: " + DEFAULT_COMMON_NAME); } else { log.info("Using the default common name file: " + DEFAULT_COMMON_NAME); } File targetDirectory = new File(line.getOptionValue("target", DEFAULT_TARGET_DIR)); if (targetDirectory.exists()) { String newPath = targetDirectory.getAbsolutePath() + "_" + DateFormatUtils.format(new Date(), "yyyy-MM-dd_hh-mm-ss"); log.info("Target directory already exists. Backing up to : " + newPath); File newTargetDirectory = new File(newPath); FileUtils.moveDirectory(targetDirectory, newTargetDirectory); FileUtils.forceMkdir(targetDirectory); } DwcaNameIndexer indexer = new DwcaNameIndexer(); indexer.create(load, search, line.getOptionValue("target", DEFAULT_TARGET_DIR), line.getOptionValue("tmp", DEFAULT_TMP_DIR), line.getOptionValue("dwca", DEFAULT_DWCA), line.getOptionValue("irmng", DEFAULT_IRMNG), line.getOptionValue("common", DEFAULT_COMMON_NAME)); } catch (Exception e) { e.printStackTrace(); } }
From source file:net.cyllene.hackerrank.downloader.HackerrankDownloader.java
public static void main(String[] args) { // Parse arguments and set up the defaults DownloaderSettings.cmd = parseArguments(args); if (DownloaderSettings.cmd.hasOption("help")) { printHelp();// w w w .j av a 2 s.c o m System.exit(0); } if (DownloaderSettings.cmd.hasOption("verbose")) { DownloaderSettings.beVerbose = true; } /** * Output directory logic: * 1) if directory exists, ask for -f option to overwrite, quit with message * 2) if -f flag is set, check if user has access to a parent directory * 3) if no access, quit with error * 4) if everything is OK, remember the path */ String sDesiredPath = DownloaderSettings.outputDir; if (DownloaderSettings.cmd.hasOption("directory")) { sDesiredPath = DownloaderSettings.cmd.getOptionValue("d", DownloaderSettings.outputDir); } if (DownloaderSettings.beVerbose) { System.out.println("Checking output dir: " + sDesiredPath); } Path desiredPath = Paths.get(sDesiredPath); if (Files.exists(desiredPath) && Files.isDirectory(desiredPath)) { if (!DownloaderSettings.cmd.hasOption("f")) { System.out.println("I wouldn't like to overwrite existing directory: " + sDesiredPath + ", set the --force flag if you are sure. May lead to data loss, be careful."); System.exit(0); } else { System.out.println( "WARNING!" + System.lineSeparator() + "--force flag is set. Overwriting directory: " + sDesiredPath + System.lineSeparator() + "WARNING!"); } } if ((Files.exists(desiredPath) && !Files.isWritable(desiredPath)) || !Files.isWritable(desiredPath.getParent())) { System.err .println("Fatal error: " + sDesiredPath + " cannot be created or modified. Check permissions."); // TODO: use Exceptions instead of system.exit System.exit(1); } DownloaderSettings.outputDir = sDesiredPath; Integer limit = DownloaderSettings.ITEMS_TO_DOWNLOAD; if (DownloaderSettings.cmd.hasOption("limit")) { try { limit = ((Number) DownloaderSettings.cmd.getParsedOptionValue("l")).intValue(); } catch (ParseException e) { System.out.println("Incorrect limit: " + e.getMessage() + System.lineSeparator() + "Using default value: " + limit); } } Integer offset = DownloaderSettings.ITEMS_TO_SKIP; if (DownloaderSettings.cmd.hasOption("offset")) { try { offset = ((Number) DownloaderSettings.cmd.getParsedOptionValue("o")).intValue(); } catch (ParseException e) { System.out.println("Incorrect offset: " + e.getMessage() + " Using default value: " + offset); } } DownloaderCore dc = DownloaderCore.INSTANCE; List<HRChallenge> challenges = new LinkedList<>(); // Download everything first Map<String, List<Integer>> structure = null; try { structure = dc.getStructure(offset, limit); } catch (IOException e) { System.err.println("Fatal Error: could not get data structure."); e.printStackTrace(); System.exit(1); } challengesLoop: for (Map.Entry<String, List<Integer>> entry : structure.entrySet()) { String challengeSlug = entry.getKey(); HRChallenge currentChallenge = null; try { currentChallenge = dc.getChallengeDetails(challengeSlug); } catch (IOException e) { System.err.println("Error: could not get challenge info for: " + challengeSlug); if (DownloaderSettings.beVerbose) { e.printStackTrace(); } continue challengesLoop; } submissionsLoop: for (Integer submissionId : entry.getValue()) { HRSubmission submission = null; try { submission = dc.getSubmissionDetails(submissionId); } catch (IOException e) { System.err.println("Error: could not get submission info for: " + submissionId); if (DownloaderSettings.beVerbose) { e.printStackTrace(); } continue submissionsLoop; } // TODO: probably should move filtering logic elsewhere(getStructure, maybe) if (submission.getStatus().equalsIgnoreCase("Accepted")) { currentChallenge.getSubmissions().add(submission); } } challenges.add(currentChallenge); } // Now dump all data to disk try { for (HRChallenge currentChallenge : challenges) { if (currentChallenge.getSubmissions().isEmpty()) continue; final String sChallengePath = DownloaderSettings.outputDir + "/" + currentChallenge.getSlug(); final String sSolutionPath = sChallengePath + "/accepted_solutions"; final String sDescriptionPath = sChallengePath + "/problem_description"; Files.createDirectories(Paths.get(sDescriptionPath)); Files.createDirectories(Paths.get(sSolutionPath)); // FIXME: this should be done the other way String plainBody = currentChallenge.getDescriptions().get(0).getBody(); String sFname; if (!plainBody.equals("null")) { sFname = sDescriptionPath + "/english.txt"; if (DownloaderSettings.beVerbose) { System.out.println("Writing to: " + sFname); } Files.write(Paths.get(sFname), plainBody.getBytes(StandardCharsets.UTF_8.name())); } String htmlBody = currentChallenge.getDescriptions().get(0).getBodyHTML(); String temporaryHtmlTemplate = "<html></body>" + htmlBody + "</body></html>"; sFname = sDescriptionPath + "/english.html"; if (DownloaderSettings.beVerbose) { System.out.println("Writing to: " + sFname); } Files.write(Paths.get(sFname), temporaryHtmlTemplate.getBytes(StandardCharsets.UTF_8.name())); for (HRSubmission submission : currentChallenge.getSubmissions()) { sFname = String.format("%s/%d.%s", sSolutionPath, submission.getId(), submission.getLanguage()); if (DownloaderSettings.beVerbose) { System.out.println("Writing to: " + sFname); } Files.write(Paths.get(sFname), submission.getSourceCode().getBytes(StandardCharsets.UTF_8.name())); } } } catch (IOException e) { System.err.println("Fatal Error: couldn't dump data to disk."); System.exit(1); } }
From source file:edu.jhu.hlt.concrete.gigaword.expt.ConvertGigawordDocuments.java
/** * @param args/* w w w . ja v a2 s . co m*/ */ public static void main(String... args) { Thread.setDefaultUncaughtExceptionHandler(new UncaughtExceptionHandler() { @Override public void uncaughtException(Thread t, Throwable e) { logger.error("Thread {} caught unhandled exception.", t.getName()); logger.error("Unhandled exception.", e); } }); if (args.length != 2) { logger.info("Usage: {} {} {}", GigawordConcreteConverter.class.getName(), "path/to/expt/file", "path/to/out/folder"); System.exit(1); } String exptPathStr = args[0]; String outPathStr = args[1]; // Verify path points to something. Path exptPath = Paths.get(exptPathStr); if (!Files.exists(exptPath)) { logger.error("File: {} does not exist. Re-run with the correct path to " + " the experiment 2 column file. See README.md."); System.exit(1); } logger.info("Experiment map located at: {}", exptPathStr); // Create output dir if not yet created. Path outPath = Paths.get(outPathStr); if (!Files.exists(outPath)) { logger.info("Creating directory: {}", outPath.toString()); try { Files.createDirectories(outPath); } catch (IOException e) { logger.error("Caught an IOException when creating output dir.", e); System.exit(1); } } logger.info("Output directory located at: {}", outPathStr); // Read in expt map. See README.md. Map<String, Set<String>> exptMap = null; try (Reader r = ExperimentUtils.createReader(exptPath); BufferedReader br = new BufferedReader(r)) { exptMap = ExperimentUtils.createFilenameToIdMap(br); } catch (IOException e) { logger.error("Caught an IOException when creating expt map.", e); System.exit(1); } // Start a timer. logger.info("Gigaword -> Concrete beginning."); StopWatch sw = new StopWatch(); sw.start(); // Iterate over expt map. exptMap.entrySet() // .parallelStream() .forEach(p -> { final String pathStr = p.getKey(); final Set<String> ids = p.getValue(); final Path lp = Paths.get(pathStr); logger.info("Converting path: {}", pathStr); // Get the file name and immediate folder it is under. int nElements = lp.getNameCount(); Path fileName = lp.getName(nElements - 1); Path subFolder = lp.getName(nElements - 2); String newFnStr = fileName.toString().split("\\.")[0] + ".tar"; // Mirror folders in output dir. Path localOutFolder = outPath.resolve(subFolder); Path localOutPath = localOutFolder.resolve(newFnStr); // Create output subfolders. if (!Files.exists(localOutFolder) && !Files.isDirectory(localOutFolder)) { logger.info("Creating out file: {}", localOutFolder.toString()); try { Files.createDirectories(localOutFolder); } catch (IOException e) { throw new RuntimeException("Caught an IOException when creating output dir.", e); } } // Iterate over communications. Iterator<Communication> citer; try (OutputStream os = Files.newOutputStream(localOutPath); BufferedOutputStream bos = new BufferedOutputStream(os); Archiver archiver = new TarArchiver(bos);) { citer = new ConcreteGigawordDocumentFactory().iterator(lp); while (citer.hasNext()) { Communication c = citer.next(); String cId = c.getId(); // Document ID must be in the set. Remove. boolean wasInSet = ids.remove(cId); if (!wasInSet) { // Some IDs are duplicated in Gigaword. // See ERRATA. logger.debug( "ID: {} was parsed from path: {}, but was not in the experiment map. Attempting to remove dupe.", cId, pathStr); // Attempt to create a duplicate id (append .duplicate to the id). // Then, try to remove again. String newId = RepairDuplicateIDs.repairDuplicate(cId); boolean dupeRemoved = ids.remove(newId); // There are not nested duplicates, so this should never fire. if (!dupeRemoved) { logger.info("Failed to remove dupe."); return; } else // Modify the communication ID to the unique version. c.setId(newId); } archiver.addEntry(new ArchivableCommunication(c)); } logger.info("Finished path: {}", pathStr); } catch (ConcreteException ex) { logger.error("Caught ConcreteException during Concrete mapping.", ex); logger.error("Path: {}", pathStr); } catch (IOException e) { logger.error("Error archiving communications.", e); logger.error("Path: {}", localOutPath.toString()); } }); sw.stop(); logger.info("Finished."); Minutes m = new Duration(sw.getTime()).toStandardMinutes(); logger.info("Runtime: Approximately {} minutes.", m.getMinutes()); }
From source file:com.linkedin.databus2.client.util.DatabusClusterUtil.java
/** * @param args//from w w w . j av a 2 s . c o m * DbusClusterUtil -z <zookeper-server> -c <cluster-name> [-p * <partitionNumber] partitions readSCN writeSCN SCN remove * clients */ public static void main(String[] args) { try { GnuParser cmdLineParser = new GnuParser(); Options options = new Options(); options.addOption("z", true, "zk-server").addOption("c", true, "cluster-name ") .addOption("p", true, "partition").addOption("l", false, "legacy") .addOption("h", false, "help"); CommandLine cmdLineArgs = cmdLineParser.parse(options, args, false); if (cmdLineArgs.hasOption('h')) { usage(); System.exit(0); } if (!cmdLineArgs.hasOption('c')) { usage(); System.exit(1); } String clusterName = cmdLineArgs.getOptionValue('c'); String zkServer = cmdLineArgs.getOptionValue('z'); boolean isLegacyChkptLocation = cmdLineArgs.hasOption('l'); if (zkServer == null || zkServer.isEmpty()) { zkServer = "localhost:2181"; } String partitionStr = cmdLineArgs.getOptionValue('p'); String partition = partitionStr; if ((partition != null) && partition.equals("all")) { partition = ""; } String[] fns = cmdLineArgs.getArgs(); if (fns.length < 1) { usage(); System.exit(1); } DatabusClusterUtilHelper clusterState = new DatabusClusterUtilHelper(zkServer, clusterName); String function = fns[0]; String arg1 = (fns.length > 1) ? fns[1] : null; String arg2 = (fns.length > 2) ? fns[2] : null; boolean clusterExists = clusterState.existsCluster(); if (function.equals("create")) { if (!clusterExists) { if (arg1 == null) { throw new DatabusClusterUtilException("create: please provide the number of partitions"); } int part = Integer.parseInt(arg1); clusterState.createCluster(part); return; } else { throw new DatabusClusterUtilException("Cluster " + clusterName + " already exists"); } } if (!clusterExists) { throw new DatabusClusterUtilException("Cluster doesn't exist! "); } if (function.equals("delete")) { clusterState.removeCluster(); } else if (function.equals("partitions")) { int numParts = clusterState.getNumPartitions(); System.out.println(numParts); } else { // all these functions require the notion of partition; Set<Integer> partitions = getPartitions(partition, clusterState.getNumPartitions()); if (function.equals("sources")) { DatabusClusterCkptManager ckptMgr = new DatabusClusterCkptManager(zkServer, clusterName, null, partitions, isLegacyChkptLocation); Set<String> sources = ckptMgr.getSourcesFromCheckpoint(); if (sources != null) { for (String s : sources) { System.out.println(s); } } else { throw new DatabusClusterUtilException( "sources: Sources not found for cluster " + clusterName); } } else if (function.equals("clients")) { clusterState.getClusterInfo(); for (Integer p : partitions) { String client = clusterState.getInstanceForPartition(p); System.out.println(p + "\t" + client); } } else if (function.equals("readSCN")) { List<String> sources = getSources(arg1); if ((sources != null) && !sources.isEmpty()) { DatabusClusterCkptManager ckptMgr = new DatabusClusterCkptManager(zkServer, clusterName, sources, partitions, isLegacyChkptLocation); Map<Integer, Checkpoint> ckpts = ckptMgr.readCheckpoint(); char delim = '\t'; for (Map.Entry<Integer, Checkpoint> mkPair : ckpts.entrySet()) { StringBuilder output = new StringBuilder(64); output.append(mkPair.getKey()); output.append(delim); Checkpoint cp = mkPair.getValue(); if (cp == null) { output.append(-1); output.append(delim); output.append(-1); } else { if (cp.getConsumptionMode() == DbusClientMode.ONLINE_CONSUMPTION) { output.append(cp.getWindowScn()); output.append(delim); output.append(cp.getWindowOffset()); } else if (cp.getConsumptionMode() == DbusClientMode.BOOTSTRAP_CATCHUP) { output.append(cp.getWindowScn()); output.append(delim); output.append(cp.getWindowOffset()); } else if (cp.getConsumptionMode() == DbusClientMode.BOOTSTRAP_SNAPSHOT) { output.append(cp.getBootstrapSinceScn()); output.append(delim); output.append(-1); } } System.out.println(output.toString()); } } else { throw new DatabusClusterUtilException("readSCN: please specify non-empty sources"); } } else if (function.equals("checkpoint")) { List<String> sources = getSources(arg1); if ((sources != null) && !sources.isEmpty()) { DatabusClusterCkptManager ckptMgr = new DatabusClusterCkptManager(zkServer, clusterName, sources, partitions, isLegacyChkptLocation); Map<Integer, Checkpoint> ckpts = ckptMgr.readCheckpoint(); char delim = '\t'; for (Map.Entry<Integer, Checkpoint> mkPair : ckpts.entrySet()) { StringBuilder output = new StringBuilder(64); output.append(mkPair.getKey()); output.append(delim); Checkpoint cp = mkPair.getValue(); if (cp == null) { output.append("null"); } else { output.append(cp.toString()); } System.out.println(output.toString()); } } else { throw new DatabusClusterUtilException("readSCN: please specify non-empty sources"); } } else if (function.equals("writeSCN")) { String scnStr = arg1; Long scn = Long.parseLong(scnStr); if (partitionStr != null) { List<String> sources = getSources(arg2); if ((sources != null) && !sources.isEmpty()) { DatabusClusterCkptManager ckptMgr = new DatabusClusterCkptManager(zkServer, clusterName, sources, partitions, isLegacyChkptLocation); ckptMgr.writeCheckpoint(scn); } else { throw new DatabusClusterUtilException("writeSCN: please specify non-empty sources"); } } else { throw new DatabusClusterUtilException( "writeSCN: to write the SCN to all partitions please use '-p all'"); } } else if (function.equals("removeSCN")) { if (partitionStr != null) { List<String> sources = getSources(arg1); if ((sources != null) && !sources.isEmpty()) { DatabusClusterCkptManager ckptMgr = new DatabusClusterCkptManager(zkServer, clusterName, sources, partitions, isLegacyChkptLocation); ckptMgr.remove(); } else { throw new DatabusClusterUtilException("remove: please specify non-empty sources"); } } else { throw new DatabusClusterUtilException( "remove: to remove SCN from all partitions please use '-p all'"); } } else { usage(); System.exit(1); } } } catch (ParseException e) { usage(); System.exit(1); } catch (DatabusClusterUtilException e) { System.err.println("Error! " + e.toString()); System.exit(1); } }
From source file:com.joliciel.talismane.terminology.TalismaneTermExtractorMain.java
public static void main(String[] args) throws Exception { String termFilePath = null;//from w w w .j a v a 2 s. c om String outFilePath = null; Command command = Command.extract; int depth = -1; String databasePropertiesPath = null; String projectCode = null; String terminologyPropertiesPath = null; Map<String, String> argMap = StringUtils.convertArgs(args); String logConfigPath = argMap.get("logConfigFile"); if (logConfigPath != null) { argMap.remove("logConfigFile"); Properties props = new Properties(); props.load(new FileInputStream(logConfigPath)); PropertyConfigurator.configure(props); } Map<String, String> innerArgs = new HashMap<String, String>(); for (Entry<String, String> argEntry : argMap.entrySet()) { String argName = argEntry.getKey(); String argValue = argEntry.getValue(); if (argName.equals("command")) command = Command.valueOf(argValue); else if (argName.equals("termFile")) termFilePath = argValue; else if (argName.equals("outFile")) outFilePath = argValue; else if (argName.equals("depth")) depth = Integer.parseInt(argValue); else if (argName.equals("databaseProperties")) databasePropertiesPath = argValue; else if (argName.equals("terminologyProperties")) terminologyPropertiesPath = argValue; else if (argName.equals("projectCode")) projectCode = argValue; else innerArgs.put(argName, argValue); } if (termFilePath == null && databasePropertiesPath == null) throw new TalismaneException("Required argument: termFile or databasePropertiesPath"); if (termFilePath != null) { String currentDirPath = System.getProperty("user.dir"); File termFileDir = new File(currentDirPath); if (termFilePath.lastIndexOf("/") >= 0) { String termFileDirPath = termFilePath.substring(0, termFilePath.lastIndexOf("/")); termFileDir = new File(termFileDirPath); termFileDir.mkdirs(); } } long startTime = new Date().getTime(); try { if (command.equals(Command.analyse)) { innerArgs.put("command", "analyse"); } else { innerArgs.put("command", "process"); } String sessionId = ""; TalismaneServiceLocator locator = TalismaneServiceLocator.getInstance(sessionId); TalismaneService talismaneService = locator.getTalismaneService(); TalismaneConfig config = talismaneService.getTalismaneConfig(innerArgs, sessionId); TerminologyServiceLocator terminologyServiceLocator = TerminologyServiceLocator.getInstance(locator); TerminologyService terminologyService = terminologyServiceLocator.getTerminologyService(); TerminologyBase terminologyBase = null; if (projectCode == null) throw new TalismaneException("Required argument: projectCode"); File file = new File(databasePropertiesPath); FileInputStream fis = new FileInputStream(file); Properties dataSourceProperties = new Properties(); dataSourceProperties.load(fis); terminologyBase = terminologyService.getPostGresTerminologyBase(projectCode, dataSourceProperties); TalismaneSession talismaneSession = talismaneService.getTalismaneSession(); if (command.equals(Command.analyse) || command.equals(Command.extract)) { Locale locale = talismaneSession.getLocale(); Map<TerminologyProperty, String> terminologyProperties = new HashMap<TerminologyProperty, String>(); if (terminologyPropertiesPath != null) { Map<String, String> terminologyPropertiesStr = StringUtils.getArgMap(terminologyPropertiesPath); for (String key : terminologyPropertiesStr.keySet()) { try { TerminologyProperty property = TerminologyProperty.valueOf(key); terminologyProperties.put(property, terminologyPropertiesStr.get(key)); } catch (IllegalArgumentException e) { throw new TalismaneException("Unknown terminology property: " + key); } } } else { terminologyProperties = getDefaultTerminologyProperties(locale); } if (depth <= 0 && !terminologyProperties.containsKey(TerminologyProperty.maxDepth)) throw new TalismaneException("Required argument: depth"); InputStream regexInputStream = getInputStreamFromResource( "parser_conll_with_location_input_regex.txt"); Scanner regexScanner = new Scanner(regexInputStream, "UTF-8"); String inputRegex = regexScanner.nextLine(); regexScanner.close(); config.setInputRegex(inputRegex); Charset outputCharset = config.getOutputCharset(); TermExtractor termExtractor = terminologyService.getTermExtractor(terminologyBase, terminologyProperties); if (depth > 0) termExtractor.setMaxDepth(depth); termExtractor.setOutFilePath(termFilePath); if (outFilePath != null) { if (outFilePath.lastIndexOf("/") >= 0) { String outFileDirPath = outFilePath.substring(0, outFilePath.lastIndexOf("/")); File outFileDir = new File(outFileDirPath); outFileDir.mkdirs(); } File outFile = new File(outFilePath); outFile.delete(); outFile.createNewFile(); Writer writer = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(outFilePath), outputCharset)); TermAnalysisWriter termAnalysisWriter = new TermAnalysisWriter(writer); termExtractor.addTermObserver(termAnalysisWriter); } Talismane talismane = config.getTalismane(); talismane.setParseConfigurationProcessor(termExtractor); talismane.process(); } else if (command.equals(Command.list)) { List<Term> terms = terminologyBase.findTerms(2, null, 0, null, null); for (Term term : terms) { LOG.debug("Term: " + term.getText()); LOG.debug("Frequency: " + term.getFrequency()); LOG.debug("Heads: " + term.getHeads()); LOG.debug("Expansions: " + term.getExpansions()); LOG.debug("Contexts: " + term.getContexts()); } } } finally { long endTime = new Date().getTime(); long totalTime = endTime - startTime; LOG.info("Total time: " + totalTime); } }