List of usage examples for java.io OutputStreamWriter OutputStreamWriter
public OutputStreamWriter(OutputStream out, CharsetEncoder enc)
From source file:GIST.IzbirkomExtractor.IzbirkomExtractor.java
/** * @param args// w w w . j a v a 2 s . c o m */ public static void main(String[] args) { // process command-line options Options options = new Options(); options.addOption("n", "noaddr", false, "do not do any address matching (for testing)"); options.addOption("i", "info", false, "create and populate address information table"); options.addOption("h", "help", false, "this message"); // database connection options.addOption("s", "server", true, "database server to connect to"); options.addOption("d", "database", true, "OSM database name"); options.addOption("u", "user", true, "OSM database user name"); options.addOption("p", "pass", true, "OSM database password"); // logging options options.addOption("l", "logdir", true, "log file directory (default './logs')"); options.addOption("e", "loglevel", true, "log level (default 'FINEST')"); // automatically generate the help statement HelpFormatter help_formatter = new HelpFormatter(); // database URI for connection String dburi = null; // Information message for help screen String info_msg = "IzbirkomExtractor [options] <html_directory>"; try { CommandLineParser parser = new GnuParser(); CommandLine cmd = parser.parse(options, args); if (cmd.hasOption('h') || cmd.getArgs().length != 1) { help_formatter.printHelp(info_msg, options); System.exit(1); } /* prohibit n and i together */ if (cmd.hasOption('n') && cmd.hasOption('i')) { System.err.println("Options 'n' and 'i' cannot be used together."); System.exit(1); } /* require database arguments without -n */ if (cmd.hasOption('n') && (cmd.hasOption('s') || cmd.hasOption('d') || cmd.hasOption('u') || cmd.hasOption('p'))) { System.err.println("Options 'n' and does not need any databse parameters."); System.exit(1); } /* require all 4 database options to be used together */ if (!cmd.hasOption('n') && !(cmd.hasOption('s') && cmd.hasOption('d') && cmd.hasOption('u') && cmd.hasOption('p'))) { System.err.println( "For database access all of the following arguments have to be specified: server, database, user, pass"); System.exit(1); } /* useful variables */ SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd'T'kk:mm"); String dateString = formatter.format(new Date()); /* setup logging */ File logdir = new File(cmd.hasOption('l') ? cmd.getOptionValue('l') : "logs"); FileUtils.forceMkdir(logdir); File log_file_name = new File( logdir + "/" + IzbirkomExtractor.class.getName() + "-" + formatter.format(new Date()) + ".log"); FileHandler log_file = new FileHandler(log_file_name.getPath()); /* create "latest" link to currently created log file */ Path latest_log_link = Paths.get(logdir + "/latest"); Files.deleteIfExists(latest_log_link); Files.createSymbolicLink(latest_log_link, Paths.get(log_file_name.getName())); log_file.setFormatter(new SimpleFormatter()); LogManager.getLogManager().reset(); // prevents logging to console logger.addHandler(log_file); logger.setLevel(cmd.hasOption('e') ? Level.parse(cmd.getOptionValue('e')) : Level.FINEST); // open directory with HTML files and create file list File dir = new File(cmd.getArgs()[0]); if (!dir.isDirectory()) { System.err.println("Unable to find directory '" + cmd.getArgs()[0] + "', exiting"); System.exit(1); } PathMatcher pmatcher = FileSystems.getDefault() .getPathMatcher("glob:? * ?*.html"); ArrayList<File> html_files = new ArrayList<>(); for (Path file : Files.newDirectoryStream(dir.toPath())) if (pmatcher.matches(file.getFileName())) html_files.add(file.toFile()); if (html_files.size() == 0) { System.err.println("No matching HTML files found in '" + dir.getAbsolutePath() + "', exiting"); System.exit(1); } // create csvResultSink FileOutputStream csvout_file = new FileOutputStream("parsed_addresses-" + dateString + ".csv"); OutputStreamWriter csvout = new OutputStreamWriter(csvout_file, "UTF-8"); ResultSink csvResultSink = new CSVResultSink(csvout, new CSVStrategy('|', '"', '#')); // Connect to DB and osmAddressMatcher AddressMatcher osmAddressMatcher; DBSink dbSink = null; DBInfoSink dbInfoSink = null; if (cmd.hasOption('n')) { osmAddressMatcher = new DummyAddressMatcher(); } else { dburi = "jdbc:postgresql://" + cmd.getOptionValue('s') + "/" + cmd.getOptionValue('d'); Connection con = DriverManager.getConnection(dburi, cmd.getOptionValue('u'), cmd.getOptionValue('p')); osmAddressMatcher = new OsmAddressMatcher(con); dbSink = new DBSink(con); if (cmd.hasOption('i')) dbInfoSink = new DBInfoSink(con); } /* create resultsinks */ SinkMultiplexor sm = SinkMultiplexor.newSinkMultiplexor(); sm.addResultSink(csvResultSink); if (dbSink != null) { sm.addResultSink(dbSink); if (dbInfoSink != null) sm.addResultSink(dbInfoSink); } // create tableExtractor TableExtractor te = new TableExtractor(osmAddressMatcher, sm); // TODO: printout summary of options: processing date/time, host, directory of HTML files, jdbc uri, command line with parameters // iterate through files logger.info("Start processing " + html_files.size() + " files in " + dir); for (int i = 0; i < html_files.size(); i++) { System.err.println("Parsing #" + i + ": " + html_files.get(i)); te.processHTMLfile(html_files.get(i)); } System.err.println("Processed " + html_files.size() + " HTML files"); logger.info("Finished processing " + html_files.size() + " files in " + dir); } catch (ParseException e1) { System.err.println("Failed to parse CLI: " + e1.getMessage()); help_formatter.printHelp(info_msg, options); System.exit(1); } catch (IOException e) { System.err.println("I/O Exception: " + e.getMessage()); e.printStackTrace(); System.exit(1); } catch (SQLException e) { System.err.println("Database '" + dburi + "': " + e.getMessage()); System.exit(1); } catch (ResultSinkException e) { System.err.println("Failed to initialize ResultSink: " + e.getMessage()); System.exit(1); } catch (TableExtractorException e) { System.err.println("Failed to initialize Table Extractor: " + e.getMessage()); System.exit(1); } catch (CloneNotSupportedException | IllegalAccessException | InstantiationException e) { System.err.println("Something really odd happened: " + e.getMessage()); e.printStackTrace(); System.exit(1); } }
From source file:com.joliciel.talismane.terminology.Main.java
public static void main(String[] args) throws Exception { String termFilePath = null;/*from w ww . ja v a 2 s . c o m*/ String outFilePath = null; Command command = Command.extract; int depth = -1; String databasePropertiesPath = null; String projectCode = null; Map<String, String> argMap = TalismaneConfig.convertArgs(args); String logConfigPath = argMap.get("logConfigFile"); if (logConfigPath != null) { argMap.remove("logConfigFile"); Properties props = new Properties(); props.load(new FileInputStream(logConfigPath)); PropertyConfigurator.configure(props); } Map<String, String> innerArgs = new HashMap<String, String>(); for (Entry<String, String> argEntry : argMap.entrySet()) { String argName = argEntry.getKey(); String argValue = argEntry.getValue(); if (argName.equals("command")) command = Command.valueOf(argValue); else if (argName.equals("termFile")) termFilePath = argValue; else if (argName.equals("outFile")) outFilePath = argValue; else if (argName.equals("depth")) depth = Integer.parseInt(argValue); else if (argName.equals("databaseProperties")) databasePropertiesPath = argValue; else if (argName.equals("projectCode")) projectCode = argValue; else innerArgs.put(argName, argValue); } if (termFilePath == null && databasePropertiesPath == null) throw new TalismaneException("Required argument: termFile or databasePropertiesPath"); if (termFilePath != null) { String currentDirPath = System.getProperty("user.dir"); File termFileDir = new File(currentDirPath); if (termFilePath.lastIndexOf("/") >= 0) { String termFileDirPath = termFilePath.substring(0, termFilePath.lastIndexOf("/")); termFileDir = new File(termFileDirPath); termFileDir.mkdirs(); } } long startTime = new Date().getTime(); try { TerminologyServiceLocator terminologyServiceLocator = TerminologyServiceLocator.getInstance(); TerminologyService terminologyService = terminologyServiceLocator.getTerminologyService(); TerminologyBase terminologyBase = null; if (projectCode == null) throw new TalismaneException("Required argument: projectCode"); File file = new File(databasePropertiesPath); FileInputStream fis = new FileInputStream(file); Properties dataSourceProperties = new Properties(); dataSourceProperties.load(fis); terminologyBase = terminologyService.getPostGresTerminologyBase(projectCode, dataSourceProperties); if (command.equals(Command.analyse) || command.equals(Command.extract)) { if (depth < 0) throw new TalismaneException("Required argument: depth"); if (command.equals(Command.analyse)) { innerArgs.put("command", "analyse"); } else { innerArgs.put("command", "process"); } TalismaneFrench talismaneFrench = new TalismaneFrench(); TalismaneConfig config = new TalismaneConfig(innerArgs, talismaneFrench); PosTagSet tagSet = TalismaneSession.getPosTagSet(); Charset outputCharset = config.getOutputCharset(); TermExtractor termExtractor = terminologyService.getTermExtractor(terminologyBase); termExtractor.setMaxDepth(depth); termExtractor.setOutFilePath(termFilePath); termExtractor.getIncludeChildren().add(tagSet.getPosTag("P")); termExtractor.getIncludeChildren().add(tagSet.getPosTag("P+D")); termExtractor.getIncludeChildren().add(tagSet.getPosTag("CC")); termExtractor.getIncludeWithParent().add(tagSet.getPosTag("DET")); if (outFilePath != null) { if (outFilePath.lastIndexOf("/") >= 0) { String outFileDirPath = outFilePath.substring(0, outFilePath.lastIndexOf("/")); File outFileDir = new File(outFileDirPath); outFileDir.mkdirs(); } File outFile = new File(outFilePath); outFile.delete(); outFile.createNewFile(); Writer writer = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(outFilePath), outputCharset)); TermAnalysisWriter termAnalysisWriter = new TermAnalysisWriter(writer); termExtractor.addTermObserver(termAnalysisWriter); } Talismane talismane = config.getTalismane(); talismane.setParseConfigurationProcessor(termExtractor); talismane.process(); } else if (command.equals(Command.list)) { List<Term> terms = terminologyBase.getTermsByFrequency(2); for (Term term : terms) { LOG.debug("Term: " + term.getText()); LOG.debug("Frequency: " + term.getFrequency()); LOG.debug("Heads: " + term.getHeads()); LOG.debug("Expansions: " + term.getExpansions()); LOG.debug("Contexts: " + term.getContexts()); } } } finally { long endTime = new Date().getTime(); long totalTime = endTime - startTime; LOG.info("Total time: " + totalTime); } }
From source file:eu.fbk.utils.lsa.util.Anvur.java
public static void main(String[] args) throws Exception { String logConfig = System.getProperty("log-config"); if (logConfig == null) { logConfig = "log-config.txt"; }/* w ww. j av a2s. co m*/ PropertyConfigurator.configure(logConfig); /* if (args.length != 2) { log.println("Usage: java -mx512M eu.fbk.utils.lsa.util.Anvur in-file out-dir"); System.exit(1); } File l = new File(args[1]); if (!l.exists()) { l.mkdir(); } List<String[]> list = readText(new File(args[0])); String oldCategory = ""; for (int i=0;i<list.size();i++) { String[] s = list.get(i); if (!oldCategory.equals(s[0])) { File f = new File(args[1] + File.separator + s[0]); boolean b = f.mkdir(); logger.debug(f + " created " + b); } File g = new File(args[1] + File.separator + s[0] + File.separator + s[1] + ".txt"); logger.debug("writing " + g + "..."); PrintWriter pw = new PrintWriter(new FileWriter(g)); //pw.println(tokenize(s[1].substring(0, s[1].indexOf(".")).replace('_', ' ') + " " + s[2])); if (s.length == 5) { pw.println(tokenize(s[1].substring(0, s[1].indexOf(".")).replace('_', ' ') + " " + s[2] + " " + s[4].replace('_', ' '))); } else { pw.println(tokenize(s[1].substring(0, s[1].indexOf(".")).replace('_', ' ') + " " + s[2])); } pw.flush(); pw.close(); } // end for i */ if (args.length != 7) { System.out.println(args.length); System.out.println( "Usage: java -mx2G eu.fbk.utils.lsa.util.Anvur input threshold size dim idf in-file-csv fields\n\n"); System.exit(1); } // DecimalFormat dec = new DecimalFormat("#.00"); File Ut = new File(args[0] + "-Ut"); File Sk = new File(args[0] + "-S"); File r = new File(args[0] + "-row"); File c = new File(args[0] + "-col"); File df = new File(args[0] + "-df"); double threshold = Double.parseDouble(args[1]); int size = Integer.parseInt(args[2]); int dim = Integer.parseInt(args[3]); boolean rescaleIdf = Boolean.parseBoolean(args[4]); //"author_check"0, "authors"1, "title"2, "year"3, "pubtype"4, "publisher"5, "journal"6, "volume"7, "number"8, "pages"9, "abstract"10, "nauthors", "citedby" String[] labels = { "author_check", "authors", "title", "year", "pubtype", "publisher", "journal", "volume", "number", "pages", "abstract", "nauthors", "citedby" //author_id authors title year pubtype publisher journal volume number pages abstract nauthors citedby }; String name = buildName(labels, args[6]); File bwf = new File(args[5] + name + "-bow.txt"); PrintWriter bw = new PrintWriter( new BufferedWriter(new OutputStreamWriter(new FileOutputStream(bwf), "UTF-8"))); File bdf = new File(args[5] + name + "-bow.csv"); PrintWriter bd = new PrintWriter( new BufferedWriter(new OutputStreamWriter(new FileOutputStream(bdf), "UTF-8"))); File lwf = new File(args[5] + name + "-ls.txt"); PrintWriter lw = new PrintWriter( new BufferedWriter(new OutputStreamWriter(new FileOutputStream(lwf), "UTF-8"))); File ldf = new File(args[5] + name + "-ls.csv"); PrintWriter ld = new PrintWriter( new BufferedWriter(new OutputStreamWriter(new FileOutputStream(ldf), "UTF-8"))); File blwf = new File(args[5] + name + "-bow+ls.txt"); PrintWriter blw = new PrintWriter( new BufferedWriter(new OutputStreamWriter(new FileOutputStream(blwf), "UTF-8"))); File bldf = new File(args[5] + name + "-bow+ls.csv"); PrintWriter bld = new PrintWriter( new BufferedWriter(new OutputStreamWriter(new FileOutputStream(bldf), "UTF-8"))); File logf = new File(args[5] + name + ".log"); PrintWriter log = new PrintWriter( new BufferedWriter(new OutputStreamWriter(new FileOutputStream(logf), "UTF-8"))); //System.exit(0); LSM lsm = new LSM(Ut, Sk, r, c, df, dim, rescaleIdf); LSSimilarity lss = new LSSimilarity(lsm, size); List<String[]> list = readText(new File(args[5])); // author_check authors title year pubtype publisher journal volume number pages abstract nauthors citedby //header for (int i = 0; i < list.size(); i++) { String[] s1 = list.get(i); String t1 = s1[0].toLowerCase(); bw.print("\t"); lw.print("\t"); blw.print("\t"); bw.print(i + "(" + s1[0] + ")"); lw.print(i + "(" + s1[0] + ")"); blw.print(i + "(" + s1[0] + ")"); } // end for i bw.print("\n"); lw.print("\n"); blw.print("\n"); for (int i = 0; i < list.size(); i++) { logger.info(i + "\t"); String[] s1 = list.get(i); String t1 = buildText(s1, args[6]); BOW bow1 = new BOW(t1); logger.info(bow1); Vector d1 = lsm.mapDocument(bow1); d1.normalize(); log.println("d1:" + d1); Vector pd1 = lsm.mapPseudoDocument(d1); pd1.normalize(); log.println("pd1:" + pd1); Vector m1 = merge(pd1, d1); log.println("m1:" + m1); // write the orginal line for (int j = 0; j < s1.length; j++) { bd.print(s1[j]); bd.print("\t"); ld.print(s1[j]); ld.print("\t"); bld.print(s1[j]); bld.print("\t"); } // write the bow, ls, and bow+ls vectors bd.println(d1); ld.println(pd1); bld.println(m1); bw.print(i + "(" + s1[0] + ")"); lw.print(i + "(" + s1[0] + ")"); blw.print(i + "(" + s1[0] + ")"); for (int j = 0; j < i + 1; j++) { bw.print("\t"); lw.print("\t"); blw.print("\t"); } // end for j for (int j = i + 1; j < list.size(); j++) { logger.info(i + "\t" + j); String[] s2 = list.get(j); String t2 = buildText(s2, args[6]); BOW bow2 = new BOW(t2); log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") t1:" + t1); log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") t2:" + t2); log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") bow1:" + bow1); log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") bow2:" + bow2); Vector d2 = lsm.mapDocument(bow2); d2.normalize(); log.println("d2:" + d2); Vector pd2 = lsm.mapPseudoDocument(d2); pd2.normalize(); log.println("pd2:" + pd2); Vector m2 = merge(pd2, d2); log.println("m2:" + m2); float cosVSM = d1.dotProduct(d2) / (float) Math.sqrt(d1.dotProduct(d1) * d2.dotProduct(d2)); float cosLSM = pd1.dotProduct(pd2) / (float) Math.sqrt(pd1.dotProduct(pd1) * pd2.dotProduct(pd2)); float cosBOWLSM = m1.dotProduct(m2) / (float) Math.sqrt(m1.dotProduct(m1) * m2.dotProduct(m2)); bw.print("\t"); bw.print(dec.format(cosVSM)); lw.print("\t"); lw.print(dec.format(cosLSM)); blw.print("\t"); blw.print(dec.format(cosBOWLSM)); log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") bow\t" + cosVSM); log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") ls:\t" + cosLSM); log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") bow+ls:\t" + cosBOWLSM); } bw.print("\n"); lw.print("\n"); blw.print("\n"); } // end for i logger.info("wrote " + bwf); logger.info("wrote " + bwf); logger.info("wrote " + bdf); logger.info("wrote " + lwf); logger.info("wrote " + ldf); logger.info("wrote " + blwf); logger.info("wrote " + bldf); logger.info("wrote " + logf); ld.close(); bd.close(); bld.close(); bw.close(); lw.close(); blw.close(); log.close(); }
From source file:com.joliciel.frenchTreebank.FrenchTreebank.java
/** * @param args/*w w w.ja v a2 s . co m*/ */ public static void main(String[] args) throws Exception { String command = args[0]; String outFilePath = ""; String outDirPath = ""; String treebankPath = ""; String ftbFileName = ""; String rawTextDir = ""; String queryPath = ""; String sentenceNumber = null; boolean firstArg = true; for (String arg : args) { if (firstArg) { firstArg = false; continue; } int equalsPos = arg.indexOf('='); String argName = arg.substring(0, equalsPos); String argValue = arg.substring(equalsPos + 1); if (argName.equals("outfile")) outFilePath = argValue; else if (argName.equals("outdir")) outDirPath = argValue; else if (argName.equals("ftbFileName")) ftbFileName = argValue; else if (argName.equals("treebank")) treebankPath = argValue; else if (argName.equals("sentence")) sentenceNumber = argValue; else if (argName.equals("query")) queryPath = argValue; else if (argName.equals("rawTextDir")) rawTextDir = argValue; else throw new RuntimeException("Unknown argument: " + argName); } TalismaneServiceLocator talismaneServiceLocator = TalismaneServiceLocator.getInstance(); TreebankServiceLocator locator = TreebankServiceLocator.getInstance(talismaneServiceLocator); if (treebankPath.length() == 0) locator.setDataSourcePropertiesFile("jdbc-live.properties"); if (command.equals("search")) { final SearchService searchService = locator.getSearchService(); final XmlPatternSearch search = searchService.newXmlPatternSearch(); search.setXmlPatternFile(queryPath); List<SearchResult> searchResults = search.perform(); FileWriter fileWriter = new FileWriter(outFilePath); for (SearchResult searchResult : searchResults) { String lineToWrite = ""; Sentence sentence = searchResult.getSentence(); Phrase phrase = searchResult.getPhrase(); lineToWrite += sentence.getFile().getFileName() + "|"; lineToWrite += sentence.getSentenceNumber() + "|"; List<PhraseUnit> phraseUnits = searchResult.getPhraseUnits(); LOG.debug("Phrase: " + phrase.getId()); for (PhraseUnit phraseUnit : phraseUnits) lineToWrite += phraseUnit.getLemma().getText() + "|"; lineToWrite += phrase.getText(); fileWriter.write(lineToWrite + "\n"); } fileWriter.flush(); fileWriter.close(); } else if (command.equals("load")) { final TreebankService treebankService = locator.getTreebankService(); final TreebankSAXParser parser = new TreebankSAXParser(); parser.setTreebankService(treebankService); parser.parseDocument(treebankPath); } else if (command.equals("loadAll")) { final TreebankService treebankService = locator.getTreebankService(); File dir = new File(treebankPath); String firstFile = null; if (args.length > 2) firstFile = args[2]; String[] files = dir.list(); if (files == null) { throw new RuntimeException("Not a directory or no children: " + treebankPath); } else { boolean startProcessing = true; if (firstFile != null) startProcessing = false; for (int i = 0; i < files.length; i++) { if (!startProcessing && files[i].equals(firstFile)) startProcessing = true; if (startProcessing) { String filePath = args[1] + "/" + files[i]; LOG.debug(filePath); final TreebankSAXParser parser = new TreebankSAXParser(); parser.setTreebankService(treebankService); parser.parseDocument(filePath); } } } } else if (command.equals("loadRawText")) { final TreebankService treebankService = locator.getTreebankService(); final TreebankRawTextAssigner assigner = new TreebankRawTextAssigner(); assigner.setTreebankService(treebankService); assigner.setRawTextDirectory(rawTextDir); assigner.loadRawText(); } else if (command.equals("tokenize")) { Writer csvFileWriter = null; if (outFilePath != null && outFilePath.length() > 0) { if (outFilePath.lastIndexOf("/") > 0) { String outputDirPath = outFilePath.substring(0, outFilePath.lastIndexOf("/")); File outputDir = new File(outputDirPath); outputDir.mkdirs(); } File csvFile = new File(outFilePath); csvFile.delete(); csvFile.createNewFile(); csvFileWriter = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(csvFile, false), "UTF8")); } try { final TreebankService treebankService = locator.getTreebankService(); TreebankExportService treebankExportService = locator.getTreebankExportServiceLocator() .getTreebankExportService(); TreebankUploadService treebankUploadService = locator.getTreebankUploadServiceLocator() .getTreebankUploadService(); TreebankReader treebankReader = null; if (treebankPath.length() > 0) { File treebankFile = new File(treebankPath); if (sentenceNumber != null) treebankReader = treebankUploadService.getXmlReader(treebankFile, sentenceNumber); else treebankReader = treebankUploadService.getXmlReader(treebankFile); } else { treebankReader = treebankService.getDatabaseReader(TreebankSubSet.ALL, 0); } TokeniserAnnotatedCorpusReader reader = treebankExportService .getTokeniserAnnotatedCorpusReader(treebankReader, csvFileWriter); while (reader.hasNextTokenSequence()) { TokenSequence tokenSequence = reader.nextTokenSequence(); List<Integer> tokenSplits = tokenSequence.getTokenSplits(); String sentence = tokenSequence.getText(); LOG.debug(sentence); int currentPos = 0; StringBuilder sb = new StringBuilder(); for (int split : tokenSplits) { if (split == 0) continue; String token = sentence.substring(currentPos, split); sb.append('|'); sb.append(token); currentPos = split; } LOG.debug(sb.toString()); } } finally { csvFileWriter.flush(); csvFileWriter.close(); } } else if (command.equals("export")) { if (outDirPath.length() == 0) throw new RuntimeException("Parameter required: outdir"); File outDir = new File(outDirPath); outDir.mkdirs(); final TreebankService treebankService = locator.getTreebankService(); FrenchTreebankXmlWriter xmlWriter = new FrenchTreebankXmlWriter(); xmlWriter.setTreebankService(treebankService); if (ftbFileName.length() == 0) { xmlWriter.write(outDir); } else { TreebankFile ftbFile = treebankService.loadTreebankFile(ftbFileName); String fileName = ftbFileName.substring(ftbFileName.lastIndexOf('/') + 1); File xmlFile = new File(outDir, fileName); xmlFile.delete(); xmlFile.createNewFile(); Writer xmlFileWriter = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(xmlFile, false), "UTF8")); xmlWriter.write(xmlFileWriter, ftbFile); xmlFileWriter.flush(); xmlFileWriter.close(); } } else { throw new RuntimeException("Unknown command: " + command); } LOG.debug("========== END ============"); }
From source file:net.java.sen.tools.MkSenDic.java
/** * Build sen dictionary.//ww w . j a va 2 s.c o m * * @param args * custom dictionary files. see dic/build.xml. */ public static void main(String args[]) { ResourceBundle rb = ResourceBundle.getBundle("dictionary"); DictionaryMaker dm1 = new DictionaryMaker(); DictionaryMaker dm2 = new DictionaryMaker(); DictionaryMaker dm3 = new DictionaryMaker(); // 1st field information of connect file. Vector rule1 = new Vector(); // 2nd field information of connect file. Vector rule2 = new Vector(); // 3rd field information of connect file. Vector rule3 = new Vector(); // 4th field information of connect file. // this field shows cost of morpheme connection // [size3*(x3*size2+x2)+x1] // [size3*(Attr1*size2+Attr2)+Attl] short score[] = new short[20131]; long start = System.currentTimeMillis(); // ///////////////////////////////////////// // // Step1. Loading connetion file. // log.info("(1/7): reading connection matrix ... "); try { log.info("connection file = " + rb.getString("text_connection_file")); log.info("charset = " + rb.getString("dic.charset")); CSVParser csvparser = new CSVParser(new FileInputStream(rb.getString("text_connection_file")), rb.getString("dic.charset")); String t[]; int line = 0; while ((t = csvparser.nextTokens()) != null) { if (t.length < 4) { log.warn("invalid line in " + rb.getString("text_connection_file") + ":" + line); log.warn(rb.getString("text_connection_file") + "may be broken."); break; } dm1.add(t[0]); rule1.add(t[0]); dm2.add(t[1]); rule2.add(t[1]); dm3.add(t[2]); rule3.add(t[2]); if (line == score.length) { score = resize(score); } score[line++] = (short) Integer.parseInt(t[3]); } // ///////////////////////////////////////// // // Step2. Building internal dictionary // log.info("(2/7): building type dictionary ... "); dm1.build(); dm2.build(); dm3.build(); // if you want check specified morpheme, you uncomment and modify // following line: /* * System.out.print("22="); dm3.getById(22); * System.out.print("368="); dm3.getById(368); * * System.out.println(dm3.getDicId("?????*,*,*,*,?")); * DictionaryMaker.debug = true; * System.out.println(dm3.getDicId("?????*,*,*,*,?")); * System.out.println(dm3.getDicIdNoCache("?????*,*,*,*,?")); */ } catch (IOException e) { e.printStackTrace(); System.exit(0); } // ------------------------------------------------- int size1 = dm1.size(); int size2 = dm2.size(); int size3 = dm3.size(); int ruleSize = rule1.size(); short matrix[] = new short[size1 * size2 * size3]; short default_cost = (short) Integer.parseInt(rb.getString("default_connection_cost")); // ///////////////////////////////////////// // // Step3. Writing Connection Matrix // log.info("(3/7): writing conection matrix (" + size1 + " x " + size2 + " x " + size3 + " = " + size1 * size2 * size3 + ") ..."); for (int i = 0; i < (int) (size1 * size2 * size3); i++) matrix[i] = default_cost; for (int i = 0; i < ruleSize; i++) { Vector r1 = dm1.getRuleIdList((String) rule1.get(i)); Vector r2 = dm2.getRuleIdList((String) rule2.get(i)); Vector r3 = dm3.getRuleIdList((String) rule3.get(i)); for (Iterator i1 = r1.iterator(); i1.hasNext();) { int ii1 = ((Integer) i1.next()).intValue(); for (Iterator i2 = r2.iterator(); i2.hasNext();) { int ii2 = ((Integer) i2.next()).intValue(); for (Iterator i3 = r3.iterator(); i3.hasNext();) { int ii3 = ((Integer) i3.next()).intValue(); int pos = size3 * (size2 * ii1 + ii2) + ii3; matrix[pos] = score[i]; } } } } try { DataOutputStream out = new DataOutputStream( new BufferedOutputStream(new FileOutputStream(rb.getString("matrix_file")))); out.writeShort(size1); out.writeShort(size2); out.writeShort(size3); for (int i1 = 0; i1 < size1; i1++) for (int i2 = 0; i2 < size2; i2++) for (int i3 = 0; i3 < size3; i3++) { out.writeShort(matrix[size3 * (size2 * i1 + i2) + i3]); // if (matrix[size3 * (size2 * i1 + i2) + i3] != // default_cost) { // } } out.close(); } catch (IOException e) { e.printStackTrace(); System.exit(0); } matrix = null; score = null; // ------------------------------------------------- int pos_start = Integer.parseInt(rb.getString("pos_start")); int pos_size = Integer.parseInt(rb.getString("pos_size")); int di = 0; int offset = 0; ArrayList dicList = new ArrayList(); // ///////////////////////////////////////// // // Step4. Reading Morpheme Information // log.info("(4/7): reading morpheme information ... "); String t = null; String[] csv = null; try { // writer for feature file. BufferedWriter bw = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(rb.getString("pos_file")), rb.getString("sen.charset"))); log.info("load dic: " + rb.getString("text_dic_file")); BufferedReader dicStream = null; int custom_dic = -1; if (args.length == 0) { dicStream = new BufferedReader(new InputStreamReader( new FileInputStream(rb.getString("text_dic_file")), rb.getString("dic.charset"))); } else { custom_dic = 0; dicStream = new BufferedReader( new InputStreamReader(new FileInputStream(args[custom_dic]), rb.getString("dic.charset"))); } int line = 0; CSVData key_b = new CSVData(); CSVData pos_b = new CSVData(); while (true) { t = dicStream.readLine(); if (t == null) { dicStream.close(); custom_dic++; if (args.length == custom_dic) { break; } else { // read custum dictionary log.info("load dic: " + "args[custum_dic]"); dicStream = new BufferedReader(new InputStreamReader(new FileInputStream(args[custom_dic]), rb.getString("dic.charset"))); } continue; } CSVParser parser = new CSVParser(t); csv = parser.nextTokens(); if (csv.length < (pos_size + pos_start)) { throw new RuntimeException("format error:" + t); } key_b.clear(); pos_b.clear(); for (int i = pos_start; i < (pos_start + pos_size - 1); i++) { key_b.append(csv[i]); pos_b.append(csv[i]); } key_b.append(csv[pos_start + pos_size - 1]); pos_b.append(csv[pos_start + pos_size - 1]); for (int i = pos_start + pos_size; i < (csv.length - 1); i++) { pos_b.append(csv[i]); } pos_b.append(csv[csv.length - 1]); CToken token = new CToken(); token.rcAttr2 = (short) dm1.getDicId(key_b.toString()); token.rcAttr1 = (short) dm2.getDicId(key_b.toString()); token.lcAttr = (short) dm3.getDicId(key_b.toString()); token.posid = 0; token.posID = offset; token.length = (short) csv[0].length(); token.cost = (short) Integer.parseInt(csv[1]); dicList.add(new PairObject(csv[0], token)); byte b[] = pos_b.toString().getBytes(rb.getString("sen.charset")); offset += (b.length + 1); String pos_b_str = pos_b.toString(); bw.write(pos_b_str, 0, pos_b_str.length()); // bw.write(b, 0, b.length); bw.write(0); if (++di % 50000 == 0) log.info("" + di + "... "); } bw.close(); // ----end of writing feature.cha ---- } catch (Exception e) { log.error("Error: " + t); e.printStackTrace(); System.exit(1); } rule1 = null; rule2 = null; rule3 = null; // ///////////////////////////////////////// // // Step5. Sort lexs and write to file // log.info("(5/7): sorting lex... "); int value[] = new int[dicList.size()]; char key[][] = new char[dicList.size()][]; int spos = 0; int dsize = 0; int bsize = 0; String prev = ""; Collections.sort(dicList); // ///////////////////////////////////////// // // Step6. Writing Token Information // log.info("(6/7): writing token... "); try { // writer for token file. DataOutputStream out = new DataOutputStream( new BufferedOutputStream(new FileOutputStream(rb.getString("token_file")))); // writing 'bos' and 'eos' and 'unknown' token. CToken token = new CToken(); token.rcAttr2 = (short) dm1.getDicId(rb.getString("bos_pos")); token.rcAttr1 = (short) dm2.getDicId(rb.getString("bos_pos")); token.lcAttr = (short) dm3.getDicId(rb.getString("bos_pos")); token.write(out); token.rcAttr2 = (short) dm1.getDicId(rb.getString("eos_pos")); token.rcAttr1 = (short) dm2.getDicId(rb.getString("eos_pos")); token.lcAttr = (short) dm3.getDicId(rb.getString("eos_pos")); token.write(out); token.rcAttr2 = (short) dm1.getDicId(rb.getString("unknown_pos")); token.rcAttr1 = (short) dm2.getDicId(rb.getString("unknown_pos")); token.lcAttr = (short) dm3.getDicId(rb.getString("unknown_pos")); token.posID = -1; token.write(out); log.info("key size = " + key.length); for (int i = 0; i < key.length; i++) { String k = (String) ((PairObject) dicList.get(i)).key; if (!prev.equals(k) && i != 0) { key[dsize] = ((String) ((PairObject) dicList.get(spos)).key).toCharArray(); value[dsize] = bsize + (spos << 8); dsize++; bsize = 1; spos = i; } else { bsize++; } prev = (String) ((PairObject) dicList.get(i)).key; ((CToken) (((PairObject) dicList.get(i)).value)).write(out); } out.flush(); out.close(); } catch (Exception e) { e.printStackTrace(); System.exit(1); } key[dsize] = ((String) ((PairObject) dicList.get(spos)).key).toCharArray(); value[dsize] = bsize + (spos << 8); dsize++; dm1 = null; dm2 = null; dm3 = null; dicList = null; // ///////////////////////////////////////// // // Step7. Build Double Array // log.info("(7/7): building Double-Array (size = " + dsize + ") ..."); DoubleArrayTrie da = new DoubleArrayTrie(); da.build(key, null, value, dsize); try { da.save(rb.getString("double_array_file")); } catch (Exception e) { e.printStackTrace(); } log.info("total time = " + (System.currentTimeMillis() - start) / 1000 + "[ms]"); }
From source file:at.riemers.velocity2js.velocity.Velocity2Js.java
/** * @param args the command line arguments *//*ww w .java 2 s.c om*/ public static void main(String[] args) { try { if (args.length == 0) { printUsage(); return; } if (args[0].equals("-d") && args.length >= 3) { String resource = null; if (args.length >= 4) { resource = args[3]; } List<I18NBundle> bundles = getBundles(resource); Velocity2Js.generateDir(args[1], args[2], bundles); return; } if (args[0].equals("-f") && args.length >= 4) { Properties p = new Properties(); p.setProperty("resource.loader", "file"); p.setProperty("file.resource.loader.description", "Velocity File Resource Loader"); p.setProperty("file.resource.loader.class", "org.apache.velocity.runtime.resource.loader.FileResourceLoader"); p.setProperty("file.resource.loader.path", args[1]); Velocity2Js.init(p); String function = createFunctionName(args[2]); String resource = null; if (args.length >= 5) { resource = args[4]; } List<I18NBundle> bundles = getBundles(resource); for (I18NBundle bundle : bundles) { String fname = args[3]; int e = args[3].lastIndexOf('.'); if (e <= 0 || e > args[3].length()) e = args[3].length(); fname = args[3].substring(0, e) + bundle.getLocale() + args[3].substring(e); BufferedWriter writer = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(fname), "UTF8")); Velocity2Js.generate(args[2], function, writer, bundle.getBundle()); writer.flush(); writer.close(); } return; } printUsage(); return; } catch (ResourceNotFoundException rnfe) { log.error("[velocity2js] : cannot find template : " + rnfe.getMessage()); } catch (ParseErrorException pee) { log.error("[velocity2js] : Syntax error in template :" + pee); } catch (Exception ex) { System.out.flush(); log.error("[velocity2js] : unknown error " + ex.getMessage()); ex.printStackTrace(System.out); System.out.println(ex.getLocalizedMessage()); System.exit(1); } }
From source file:com.joliciel.talismane.terminology.TalismaneTermExtractorMain.java
public static void main(String[] args) throws Exception { String termFilePath = null;/* ww w. ja v a 2 s . co m*/ String outFilePath = null; Command command = Command.extract; int depth = -1; String databasePropertiesPath = null; String projectCode = null; String terminologyPropertiesPath = null; Map<String, String> argMap = StringUtils.convertArgs(args); String logConfigPath = argMap.get("logConfigFile"); if (logConfigPath != null) { argMap.remove("logConfigFile"); Properties props = new Properties(); props.load(new FileInputStream(logConfigPath)); PropertyConfigurator.configure(props); } Map<String, String> innerArgs = new HashMap<String, String>(); for (Entry<String, String> argEntry : argMap.entrySet()) { String argName = argEntry.getKey(); String argValue = argEntry.getValue(); if (argName.equals("command")) command = Command.valueOf(argValue); else if (argName.equals("termFile")) termFilePath = argValue; else if (argName.equals("outFile")) outFilePath = argValue; else if (argName.equals("depth")) depth = Integer.parseInt(argValue); else if (argName.equals("databaseProperties")) databasePropertiesPath = argValue; else if (argName.equals("terminologyProperties")) terminologyPropertiesPath = argValue; else if (argName.equals("projectCode")) projectCode = argValue; else innerArgs.put(argName, argValue); } if (termFilePath == null && databasePropertiesPath == null) throw new TalismaneException("Required argument: termFile or databasePropertiesPath"); if (termFilePath != null) { String currentDirPath = System.getProperty("user.dir"); File termFileDir = new File(currentDirPath); if (termFilePath.lastIndexOf("/") >= 0) { String termFileDirPath = termFilePath.substring(0, termFilePath.lastIndexOf("/")); termFileDir = new File(termFileDirPath); termFileDir.mkdirs(); } } long startTime = new Date().getTime(); try { if (command.equals(Command.analyse)) { innerArgs.put("command", "analyse"); } else { innerArgs.put("command", "process"); } String sessionId = ""; TalismaneServiceLocator locator = TalismaneServiceLocator.getInstance(sessionId); TalismaneService talismaneService = locator.getTalismaneService(); TalismaneConfig config = talismaneService.getTalismaneConfig(innerArgs, sessionId); TerminologyServiceLocator terminologyServiceLocator = TerminologyServiceLocator.getInstance(locator); TerminologyService terminologyService = terminologyServiceLocator.getTerminologyService(); TerminologyBase terminologyBase = null; if (projectCode == null) throw new TalismaneException("Required argument: projectCode"); File file = new File(databasePropertiesPath); FileInputStream fis = new FileInputStream(file); Properties dataSourceProperties = new Properties(); dataSourceProperties.load(fis); terminologyBase = terminologyService.getPostGresTerminologyBase(projectCode, dataSourceProperties); TalismaneSession talismaneSession = talismaneService.getTalismaneSession(); if (command.equals(Command.analyse) || command.equals(Command.extract)) { Locale locale = talismaneSession.getLocale(); Map<TerminologyProperty, String> terminologyProperties = new HashMap<TerminologyProperty, String>(); if (terminologyPropertiesPath != null) { Map<String, String> terminologyPropertiesStr = StringUtils.getArgMap(terminologyPropertiesPath); for (String key : terminologyPropertiesStr.keySet()) { try { TerminologyProperty property = TerminologyProperty.valueOf(key); terminologyProperties.put(property, terminologyPropertiesStr.get(key)); } catch (IllegalArgumentException e) { throw new TalismaneException("Unknown terminology property: " + key); } } } else { terminologyProperties = getDefaultTerminologyProperties(locale); } if (depth <= 0 && !terminologyProperties.containsKey(TerminologyProperty.maxDepth)) throw new TalismaneException("Required argument: depth"); InputStream regexInputStream = getInputStreamFromResource( "parser_conll_with_location_input_regex.txt"); Scanner regexScanner = new Scanner(regexInputStream, "UTF-8"); String inputRegex = regexScanner.nextLine(); regexScanner.close(); config.setInputRegex(inputRegex); Charset outputCharset = config.getOutputCharset(); TermExtractor termExtractor = terminologyService.getTermExtractor(terminologyBase, terminologyProperties); if (depth > 0) termExtractor.setMaxDepth(depth); termExtractor.setOutFilePath(termFilePath); if (outFilePath != null) { if (outFilePath.lastIndexOf("/") >= 0) { String outFileDirPath = outFilePath.substring(0, outFilePath.lastIndexOf("/")); File outFileDir = new File(outFileDirPath); outFileDir.mkdirs(); } File outFile = new File(outFilePath); outFile.delete(); outFile.createNewFile(); Writer writer = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(outFilePath), outputCharset)); TermAnalysisWriter termAnalysisWriter = new TermAnalysisWriter(writer); termExtractor.addTermObserver(termAnalysisWriter); } Talismane talismane = config.getTalismane(); talismane.setParseConfigurationProcessor(termExtractor); talismane.process(); } else if (command.equals(Command.list)) { List<Term> terms = terminologyBase.findTerms(2, null, 0, null, null); for (Term term : terms) { LOG.debug("Term: " + term.getText()); LOG.debug("Frequency: " + term.getFrequency()); LOG.debug("Heads: " + term.getHeads()); LOG.debug("Expansions: " + term.getExpansions()); LOG.debug("Contexts: " + term.getContexts()); } } } finally { long endTime = new Date().getTime(); long totalTime = endTime - startTime; LOG.info("Total time: " + totalTime); } }
From source file:client.QueryLastFm.java
License:asdf
public static void main(String[] args) throws Exception { // isAlreadyInserted("asdfs","jas,jnjkah"); // FileWriter fw = new FileWriter(".\\tracks.csv"); OutputStream track_os = new FileOutputStream(".\\tracks.csv"); PrintWriter out = new PrintWriter(new OutputStreamWriter(track_os, "UTF-8")); OutputStream track_id_os = new FileOutputStream(".\\track_id_sim_track_id.csv"); PrintWriter track_id_out = new PrintWriter(new OutputStreamWriter(track_id_os, "UTF-8")); track_id_out.print(""); ByteArrayInputStream input;/*from w w w . j a v a2 s . c o m*/ Document doc = null; CloseableHttpClient httpclient = HttpClients.createDefault(); String trackName = ""; String artistName = ""; String sourceMbid = ""; out.print("ID");// first row first column out.print(","); out.print("TrackName");// first row second column out.print(","); out.println("Artist");// first row third column track_id_out.print("source");// first row second column track_id_out.print(","); track_id_out.println("target");// first row third column // track_id_out.print(","); // track_id_out.println("type");// first row third column // out.flush(); // out.close(); // fw.close(); // os.close(); try { URI uri = new URIBuilder().setScheme("http").setHost("ws.audioscrobbler.com").setPath("/2.0/") .setParameter("method", "track.getsimilar").setParameter("artist", "cher") .setParameter("track", "believe").setParameter("limit", "100") .setParameter("api_key", "88858618961414f8bec919bddd057044").build(); // new URIBuilder(). HttpGet request = new HttpGet(uri); // request. // This is useful for last.fm logging and preventing them from blocking this client request.setHeader(HttpHeaders.USER_AGENT, "nileshmore@gatech.edu - ClassAssignment at GeorgiaTech Non-commercial use"); HttpGet httpGet = new HttpGet( "http://ws.audioscrobbler.com/2.0/?method=track.getsimilar&artist=cher&track=believe&limit=4&api_key=88858618961414f8bec919bddd057044"); CloseableHttpResponse response = httpclient.execute(request); int statusCode = response.getStatusLine().getStatusCode(); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); // The underlying HTTP connection is still held by the response object // to allow the response content to be streamed directly from the network socket. // In order to ensure correct deallocation of system resources // the user MUST call CloseableHttpResponse#close() from a finally clause. // Please note that if response content is not fully consumed the underlying // connection cannot be safely re-used and will be shut down and discarded // by the connection manager. try { if (statusCode == 200) { HttpEntity entity1 = response.getEntity(); BufferedReader br = new BufferedReader( new InputStreamReader((response.getEntity().getContent()))); Document document = builder.parse((response.getEntity().getContent())); Element root = document.getDocumentElement(); root.normalize(); // Need to focus and resolve this part NodeList nodes; nodes = root.getChildNodes(); nodes = root.getElementsByTagName("track"); if (nodes.getLength() == 0) { // System.out.println("empty"); return; } Node trackNode; for (int k = 0; k < nodes.getLength(); k++) // can access all tracks now { trackNode = nodes.item(k); NodeList trackAttributes = trackNode.getChildNodes(); // check if mbid is present in track attributes // System.out.println("Length " + (trackAttributes.item(5).getNodeName().compareToIgnoreCase("mbid") == 0)); if ((trackAttributes.item(5).getNodeName().compareToIgnoreCase("mbid") == 0)) { if (((Element) trackAttributes.item(5)).hasChildNodes()) ;// System.out.println("Go aHead"); else continue; } else continue; for (int n = 0; n < trackAttributes.getLength(); n++) { Node attribute = trackAttributes.item(n); if ((attribute.getNodeName().compareToIgnoreCase("name")) == 0) { // System.out.println(((Element)attribute).getFirstChild().getNodeValue()); trackName = ((Element) attribute).getFirstChild().getNodeValue(); // make string encoding as UTF-8 ************ } if ((attribute.getNodeName().compareToIgnoreCase("mbid")) == 0) { // System.out.println(n + " " + ((Element)attribute).getFirstChild().getNodeValue()); sourceMbid = attribute.getFirstChild().getNodeValue(); } if ((attribute.getNodeName().compareToIgnoreCase("artist")) == 0) { NodeList ArtistNodeList = attribute.getChildNodes(); for (int j = 0; j < ArtistNodeList.getLength(); j++) { Node Artistnode = ArtistNodeList.item(j); if ((Artistnode.getNodeName().compareToIgnoreCase("name")) == 0) { // System.out.println(((Element)Artistnode).getFirstChild().getNodeValue()); artistName = ((Element) Artistnode).getFirstChild().getNodeValue(); } } } } out.print(sourceMbid); out.print(","); out.print(trackName); out.print(","); out.println(artistName); // out.print(","); findSimilarTracks(track_id_out, sourceMbid, trackName, artistName); } track_id_out.flush(); out.flush(); out.close(); track_id_out.close(); track_os.close(); // fw.close(); Element trac = (Element) nodes.item(0); // trac.normalize(); nodes = trac.getChildNodes(); // System.out.println(nodes.getLength()); for (int i = 0; i < nodes.getLength(); i++) { Node node = nodes.item(i); // System.out.println(node.getNodeName()); if ((node.getNodeName().compareToIgnoreCase("name")) == 0) { // System.out.println(((Element)node).getFirstChild().getNodeValue()); } if ((node.getNodeName().compareToIgnoreCase("mbid")) == 0) { // System.out.println(((Element)node).getFirstChild().getNodeValue()); } if ((node.getNodeName().compareToIgnoreCase("artist")) == 0) { // System.out.println("Well"); NodeList ArtistNodeList = node.getChildNodes(); for (int j = 0; j < ArtistNodeList.getLength(); j++) { Node Artistnode = ArtistNodeList.item(j); if ((Artistnode.getNodeName().compareToIgnoreCase("name")) == 0) { /* System.out.println(((Element)Artistnode).getFirstChild().getNodeValue());*/ } /*System.out.println(Artistnode.getNodeName());*/ } } } /*if(node instanceof Element){ //a child element to process Element child = (Element) node; String attribute = child.getAttribute("width"); }*/ // System.out.println(root.getAttribute("status")); NodeList tracks = root.getElementsByTagName("track"); Element track = (Element) tracks.item(0); // System.out.println(track.getTagName()); track.getChildNodes(); } else { System.out.println("failed with status" + response.getStatusLine()); } // input = (ByteArrayInputStream)entity1.getContent(); // do something useful with the response body // and ensure it is fully consumed } finally { response.close(); } } finally { System.out.println("Exited succesfully."); httpclient.close(); } }
From source file:gobblin.runtime.util.JobStateToJsonConverter.java
@SuppressWarnings("all") public static void main(String[] args) throws Exception { Option sysConfigOption = Option.builder("sc").argName("system configuration file") .desc("Gobblin system configuration file").longOpt("sysconfig").hasArgs().build(); Option storeUrlOption = Option.builder("u").argName("gobblin state store URL") .desc("Gobblin state store root path URL").longOpt("storeurl").hasArgs().required().build(); Option jobNameOption = Option.builder("n").argName("gobblin job name").desc("Gobblin job name") .longOpt("name").hasArgs().required().build(); Option jobIdOption = Option.builder("i").argName("gobblin job id").desc("Gobblin job id").longOpt("id") .hasArgs().build();//www .ja v a 2 s . c om Option convertAllOption = Option.builder("a") .desc("Whether to convert all past job states of the given job").longOpt("all").build(); Option keepConfigOption = Option.builder("kc").desc("Whether to keep all configuration properties") .longOpt("keepConfig").build(); Option outputToFile = Option.builder("t").argName("output file name").desc("Output file name") .longOpt("toFile").hasArgs().build(); Options options = new Options(); options.addOption(sysConfigOption); options.addOption(storeUrlOption); options.addOption(jobNameOption); options.addOption(jobIdOption); options.addOption(convertAllOption); options.addOption(keepConfigOption); options.addOption(outputToFile); CommandLine cmd = null; try { CommandLineParser parser = new DefaultParser(); cmd = parser.parse(options, args); } catch (ParseException pe) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("JobStateToJsonConverter", options); System.exit(1); } Properties sysConfig = new Properties(); if (cmd.hasOption(sysConfigOption.getLongOpt())) { sysConfig = JobConfigurationUtils.fileToProperties(cmd.getOptionValue(sysConfigOption.getLongOpt())); } JobStateToJsonConverter converter = new JobStateToJsonConverter(sysConfig, cmd.getOptionValue('u'), cmd.hasOption("kc")); StringWriter stringWriter = new StringWriter(); if (cmd.hasOption('i')) { converter.convert(cmd.getOptionValue('n'), cmd.getOptionValue('i'), stringWriter); } else { if (cmd.hasOption('a')) { converter.convertAll(cmd.getOptionValue('n'), stringWriter); } else { converter.convert(cmd.getOptionValue('n'), stringWriter); } } if (cmd.hasOption('t')) { Closer closer = Closer.create(); try { FileOutputStream fileOutputStream = closer.register(new FileOutputStream(cmd.getOptionValue('t'))); OutputStreamWriter outputStreamWriter = closer.register( new OutputStreamWriter(fileOutputStream, ConfigurationKeys.DEFAULT_CHARSET_ENCODING)); BufferedWriter bufferedWriter = closer.register(new BufferedWriter(outputStreamWriter)); bufferedWriter.write(stringWriter.toString()); } catch (Throwable t) { throw closer.rethrow(t); } finally { closer.close(); } } else { System.out.println(stringWriter.toString()); } }
From source file:de.morbz.osmpoispbf.Scanner.java
public static void main(String[] args) { System.out.println("OsmPoisPbf " + VERSION + " started"); // Get input file if (args.length < 1) { System.out.println("Error: Please provide an input file"); System.exit(-1);//from w w w . j av a2 s. c o m } String inputFile = args[args.length - 1]; // Get output file String outputFile; int index = inputFile.indexOf('.'); if (index != -1) { outputFile = inputFile.substring(0, index); } else { outputFile = inputFile; } outputFile += ".csv"; // Setup CLI parameters options = new Options(); options.addOption("ff", "filterFile", true, "The file that is used to filter categories"); options.addOption("of", "outputFile", true, "The output CSV file to be written"); options.addOption("rt", "requiredTags", true, "Comma separated list of tags that are required [name]"); options.addOption("ut", "undesiredTags", true, "Comma separated list of tags=value combinations that should be filtered [key=value]"); options.addOption("ot", "outputTags", true, "Comma separated list of tags that are exported [name]"); options.addOption("ph", "printHeader", false, "If flag is set, the `outputTags` are printed as first line in the output file."); options.addOption("r", "relations", false, "Parse relations"); options.addOption("nw", "noWays", false, "Don't parse ways"); options.addOption("nn", "noNodes", false, "Don't parse nodes"); options.addOption("u", "allowUnclosedWays", false, "Allow ways that aren't closed"); options.addOption("d", "decimals", true, "Number of decimal places of coordinates [7]"); options.addOption("s", "separator", true, "Separator character for CSV [|]"); options.addOption("v", "verbose", false, "Print all found POIs"); options.addOption("h", "help", false, "Print this help"); // Parse parameters CommandLine line = null; try { line = (new DefaultParser()).parse(options, args); } catch (ParseException exp) { System.err.println(exp.getMessage()); printHelp(); System.exit(-1); } // Help if (line.hasOption("help")) { printHelp(); System.exit(0); } // Get filter file String filterFile = null; if (line.hasOption("filterFile")) { filterFile = line.getOptionValue("filterFile"); } // Get output file if (line.hasOption("outputFile")) { outputFile = line.getOptionValue("outputFile"); } // Check files if (inputFile.equals(outputFile)) { System.out.println("Error: Input and output files are the same"); System.exit(-1); } File file = new File(inputFile); if (!file.exists()) { System.out.println("Error: Input file doesn't exist"); System.exit(-1); } // Check OSM entity types boolean parseNodes = true; boolean parseWays = true; boolean parseRelations = false; if (line.hasOption("noNodes")) { parseNodes = false; } if (line.hasOption("noWays")) { parseWays = false; } if (line.hasOption("relations")) { parseRelations = true; } // Unclosed ways allowed? if (line.hasOption("allowUnclosedWays")) { onlyClosedWays = false; } // Get CSV separator char separator = '|'; if (line.hasOption("separator")) { String arg = line.getOptionValue("separator"); if (arg.length() != 1) { System.out.println("Error: The CSV separator has to be exactly 1 character"); System.exit(-1); } separator = arg.charAt(0); } Poi.setSeparator(separator); // Set decimals int decimals = 7; // OSM default if (line.hasOption("decimals")) { String arg = line.getOptionValue("decimals"); try { int dec = Integer.valueOf(arg); if (dec < 0) { System.out.println("Error: Decimals must not be less than 0"); System.exit(-1); } else { decimals = dec; } } catch (NumberFormatException ex) { System.out.println("Error: Decimals have to be a number"); System.exit(-1); } } Poi.setDecimals(decimals); // Verbose mode? if (line.hasOption("verbose")) { printPois = true; } // Required tags if (line.hasOption("requiredTags")) { String arg = line.getOptionValue("requiredTags"); requiredTags = arg.split(","); } // Undesired tags if (line.hasOption("undesiredTags")) { String arg = line.getOptionValue("undesiredTags"); undesiredTags = new HashMap<>(); for (String undesired : arg.split(",")) { String[] keyVal = undesired.split("="); if (keyVal.length != 2) { System.out.println("Error: Undesired Tags have to formated like tag=value"); System.exit(-1); } if (!undesiredTags.containsKey(keyVal[0])) { undesiredTags.put(keyVal[0], new HashSet<>(1)); } undesiredTags.get(keyVal[0]).add(keyVal[1]); } } // Output tags if (line.hasOption("outputTags")) { String arg = line.getOptionValue("outputTags"); outputTags = arg.split(","); } // Get filter rules FilterFileParser parser = new FilterFileParser(filterFile); filters = parser.parse(); if (filters == null) { System.exit(-1); } // Setup CSV output try { writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputFile), "UTF8")); } catch (IOException e) { System.out.println("Error: Output file error"); System.exit(-1); } // Print Header if (line.hasOption("printHeader")) { String header = "category" + separator + "osm_id" + separator + "lat" + separator + "lon"; for (int i = 0; i < outputTags.length; i++) { header += separator + outputTags[i]; } try { writer.write(header + "\n"); } catch (IOException e) { System.out.println("Error: Output file write error"); System.exit(-1); } } // Setup OSMonaut EntityFilter filter = new EntityFilter(parseNodes, parseWays, parseRelations); Osmonaut naut = new Osmonaut(inputFile, filter, false); // Start watch StopWatch stopWatch = new StopWatch(); stopWatch.start(); // Start OSMonaut String finalSeparator = String.valueOf(separator); naut.scan(new IOsmonautReceiver() { boolean entityNeeded; @Override public boolean needsEntity(EntityType type, Tags tags) { // Are there any tags? if (tags.size() == 0) { return false; } // Check required tags for (String tag : requiredTags) { if (!tags.hasKey(tag)) { return false; } } entityNeeded = getCategory(tags, filters) != null; if (undesiredTags != null && entityNeeded) { for (String key : undesiredTags.keySet()) { if (tags.hasKey(key)) { for (String val : undesiredTags.get(key)) { if (tags.hasKeyValue(key, val)) { return false; } } } } } return entityNeeded; } @Override public void foundEntity(Entity entity) { // Check if way is closed if (onlyClosedWays && entity.getEntityType() == EntityType.WAY) { if (!((Way) entity).isClosed()) { return; } } // Get category Tags tags = entity.getTags(); String cat = getCategory(tags, filters); if (cat == null) { return; } // Get center LatLon center = entity.getCenter(); if (center == null) { return; } // Make OSM-ID String type = ""; switch (entity.getEntityType()) { case NODE: type = "node"; break; case WAY: type = "way"; break; case RELATION: type = "relation"; break; } String id = String.valueOf(entity.getId()); // Make output tags String[] values = new String[outputTags.length]; for (int i = 0; i < outputTags.length; i++) { String key = outputTags[i]; if (tags.hasKey(key)) { values[i] = tags.get(key).replaceAll(finalSeparator, "").replaceAll("\"", ""); } } // Make POI poisFound++; Poi poi = new Poi(values, cat, center, type, id); // Output if (printPois && System.currentTimeMillis() > lastMillis + 40) { printPoisFound(); lastMillis = System.currentTimeMillis(); } // Write to file try { writer.write(poi.toCsv() + "\n"); } catch (IOException e) { System.out.println("Error: Output file write error"); System.exit(-1); } } }); // Close writer try { writer.close(); } catch (IOException e) { System.out.println("Error: Output file close error"); System.exit(-1); } // Output results stopWatch.stop(); printPoisFound(); System.out.println(); System.out.println("Elapsed time in milliseconds: " + stopWatch.getElapsedTime()); // Quit System.exit(0); }