List of usage examples for java.lang String equals
public boolean equals(Object anObject)
From source file:io.anserini.index.IndexTweets.java
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(new Option(HELP_OPTION, "show help")); options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment")); options.addOption(new Option(STORE_TERM_VECTORS_OPTION, "store term vectors")); options.addOption(OptionBuilder.withArgName("dir").hasArg().withDescription("source collection directory") .create(COLLECTION_OPTION)); options.addOption(/*from w w w. j a v a2 s . c o m*/ OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("file with deleted tweetids") .create(DELETES_OPTION)); options.addOption(OptionBuilder.withArgName("id").hasArg().withDescription("max id").create(MAX_ID_OPTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (cmdline.hasOption(HELP_OPTION) || !cmdline.hasOption(COLLECTION_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(IndexTweets.class.getName(), options); System.exit(-1); } String collectionPath = cmdline.getOptionValue(COLLECTION_OPTION); String indexPath = cmdline.getOptionValue(INDEX_OPTION); final FieldType textOptions = new FieldType(); textOptions.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); textOptions.setStored(true); textOptions.setTokenized(true); if (cmdline.hasOption(STORE_TERM_VECTORS_OPTION)) { textOptions.setStoreTermVectors(true); } LOG.info("collection: " + collectionPath); LOG.info("index: " + indexPath); LongOpenHashSet deletes = null; if (cmdline.hasOption(DELETES_OPTION)) { deletes = new LongOpenHashSet(); File deletesFile = new File(cmdline.getOptionValue(DELETES_OPTION)); if (!deletesFile.exists()) { System.err.println("Error: " + deletesFile + " does not exist!"); System.exit(-1); } LOG.info("Reading deletes from " + deletesFile); FileInputStream fin = new FileInputStream(deletesFile); byte[] ignoreBytes = new byte[2]; fin.read(ignoreBytes); // "B", "Z" bytes from commandline tools BufferedReader br = new BufferedReader(new InputStreamReader(new CBZip2InputStream(fin))); String s; while ((s = br.readLine()) != null) { if (s.contains("\t")) { deletes.add(Long.parseLong(s.split("\t")[0])); } else { deletes.add(Long.parseLong(s)); } } br.close(); fin.close(); LOG.info("Read " + deletes.size() + " tweetids from deletes file."); } long maxId = Long.MAX_VALUE; if (cmdline.hasOption(MAX_ID_OPTION)) { maxId = Long.parseLong(cmdline.getOptionValue(MAX_ID_OPTION)); LOG.info("index: " + maxId); } long startTime = System.currentTimeMillis(); File file = new File(collectionPath); if (!file.exists()) { System.err.println("Error: " + file + " does not exist!"); System.exit(-1); } StatusStream stream = new JsonStatusCorpusReader(file); Directory dir = FSDirectory.open(Paths.get(indexPath)); final IndexWriterConfig config = new IndexWriterConfig(ANALYZER); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, config); int cnt = 0; Status status; try { while ((status = stream.next()) != null) { if (status.getText() == null) { continue; } // Skip deletes tweetids. if (deletes != null && deletes.contains(status.getId())) { continue; } if (status.getId() > maxId) { continue; } cnt++; Document doc = new Document(); doc.add(new LongPoint(StatusField.ID.name, status.getId())); doc.add(new StoredField(StatusField.ID.name, status.getId())); doc.add(new LongPoint(StatusField.EPOCH.name, status.getEpoch())); doc.add(new StoredField(StatusField.EPOCH.name, status.getEpoch())); doc.add(new TextField(StatusField.SCREEN_NAME.name, status.getScreenname(), Store.YES)); doc.add(new Field(StatusField.TEXT.name, status.getText(), textOptions)); doc.add(new IntPoint(StatusField.FRIENDS_COUNT.name, status.getFollowersCount())); doc.add(new StoredField(StatusField.FRIENDS_COUNT.name, status.getFollowersCount())); doc.add(new IntPoint(StatusField.FOLLOWERS_COUNT.name, status.getFriendsCount())); doc.add(new StoredField(StatusField.FOLLOWERS_COUNT.name, status.getFriendsCount())); doc.add(new IntPoint(StatusField.STATUSES_COUNT.name, status.getStatusesCount())); doc.add(new StoredField(StatusField.STATUSES_COUNT.name, status.getStatusesCount())); long inReplyToStatusId = status.getInReplyToStatusId(); if (inReplyToStatusId > 0) { doc.add(new LongPoint(StatusField.IN_REPLY_TO_STATUS_ID.name, inReplyToStatusId)); doc.add(new StoredField(StatusField.IN_REPLY_TO_STATUS_ID.name, inReplyToStatusId)); doc.add(new LongPoint(StatusField.IN_REPLY_TO_USER_ID.name, status.getInReplyToUserId())); doc.add(new StoredField(StatusField.IN_REPLY_TO_USER_ID.name, status.getInReplyToUserId())); } String lang = status.getLang(); if (!lang.equals("unknown")) { doc.add(new TextField(StatusField.LANG.name, status.getLang(), Store.YES)); } long retweetStatusId = status.getRetweetedStatusId(); if (retweetStatusId > 0) { doc.add(new LongPoint(StatusField.RETWEETED_STATUS_ID.name, retweetStatusId)); doc.add(new StoredField(StatusField.RETWEETED_STATUS_ID.name, retweetStatusId)); doc.add(new LongPoint(StatusField.RETWEETED_USER_ID.name, status.getRetweetedUserId())); doc.add(new StoredField(StatusField.RETWEETED_USER_ID.name, status.getRetweetedUserId())); doc.add(new IntPoint(StatusField.RETWEET_COUNT.name, status.getRetweetCount())); doc.add(new StoredField(StatusField.RETWEET_COUNT.name, status.getRetweetCount())); if (status.getRetweetCount() < 0 || status.getRetweetedStatusId() < 0) { LOG.warn("Error parsing retweet fields of " + status.getId()); } } writer.addDocument(doc); if (cnt % 100000 == 0) { LOG.info(cnt + " statuses indexed"); } } LOG.info(String.format("Total of %s statuses added", cnt)); if (cmdline.hasOption(OPTIMIZE_OPTION)) { LOG.info("Merging segments..."); writer.forceMerge(1); LOG.info("Done!"); } LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms"); } catch (Exception e) { e.printStackTrace(); } finally { writer.close(); dir.close(); stream.close(); } }
From source file:org.glowroot.benchmark.ResultFormatter.java
public static void main(String[] args) throws IOException { File resultFile = new File(args[0]); Scores scores = new Scores(); double baselineStartupTime = -1; double glowrootStartupTime = -1; ObjectMapper mapper = new ObjectMapper(); String content = Files.toString(resultFile, Charsets.UTF_8); content = content.replaceAll("\n", " "); ArrayNode results = (ArrayNode) mapper.readTree(content); for (JsonNode result : results) { String benchmark = result.get("benchmark").asText(); benchmark = benchmark.substring(0, benchmark.lastIndexOf('.')); ObjectNode primaryMetric = (ObjectNode) result.get("primaryMetric"); double score = primaryMetric.get("score").asDouble(); if (benchmark.equals(StartupBenchmark.class.getName())) { baselineStartupTime = score; continue; } else if (benchmark.equals(StartupWithGlowrootBenchmark.class.getName())) { glowrootStartupTime = score; continue; }// w w w .ja v a 2 s .c o m ObjectNode scorePercentiles = (ObjectNode) primaryMetric.get("scorePercentiles"); double score50 = scorePercentiles.get("50.0").asDouble(); double score95 = scorePercentiles.get("95.0").asDouble(); double score99 = scorePercentiles.get("99.0").asDouble(); double score999 = scorePercentiles.get("99.9").asDouble(); double score9999 = scorePercentiles.get("99.99").asDouble(); double allocatedBytes = getAllocatedBytes(result); if (benchmark.equals(ServletBenchmark.class.getName())) { scores.baselineResponseTimeAvg = score; scores.baselineResponseTime50 = score50; scores.baselineResponseTime95 = score95; scores.baselineResponseTime99 = score99; scores.baselineResponseTime999 = score999; scores.baselineResponseTime9999 = score9999; scores.baselineAllocatedBytes = allocatedBytes; } else if (benchmark.equals(ServletWithGlowrootBenchmark.class.getName())) { scores.glowrootResponseTimeAvg = score; scores.glowrootResponseTime50 = score50; scores.glowrootResponseTime95 = score95; scores.glowrootResponseTime99 = score99; scores.glowrootResponseTime999 = score999; scores.glowrootResponseTime9999 = score9999; scores.glowrootAllocatedBytes = allocatedBytes; } else { throw new AssertionError(benchmark); } } System.out.println(); printScores(scores); if (baselineStartupTime != -1) { System.out.println("STARTUP"); System.out.format("%25s%14s%14s%n", "", "baseline", "glowroot"); printLine("Startup time (avg)", "ms", baselineStartupTime, glowrootStartupTime); } System.out.println(); }
From source file:com.semsaas.jsonxml.tools.JsonXpath.java
public static void main(String[] args) { /*//from w w w . jav a2s. c o m * Process options */ LinkedList<String> files = new LinkedList<String>(); LinkedList<String> expr = new LinkedList<String>(); boolean help = false; String activeOption = null; String error = null; for (int i = 0; i < args.length && error == null && !help; i++) { if (activeOption != null) { if (activeOption.equals("-e")) { expr.push(args[i]); } else if (activeOption.equals("-h")) { help = true; } else { error = "Unknown option " + activeOption; } activeOption = null; } else { if (args[i].startsWith("-")) { activeOption = args[i]; } else { files.push(args[i]); } } } if (error != null) { System.err.println(error); showHelp(); } else if (help) { showHelp(); } else { try { TransformerFactory transformerFactory = TransformerFactory.newInstance(); Transformer transformer = transformerFactory.newTransformer(); for (String f : files) { System.out.println("*** " + f + " ***"); try { // Create a JSON XML reader XMLReader reader = XMLReaderFactory.createXMLReader("com.semsaas.jsonxml.JsonXMLReader"); // Prepare a reader with the JSON file as input InputStreamReader stringReader = new InputStreamReader(new FileInputStream(f)); SAXSource saxSource = new SAXSource(reader, new InputSource(stringReader)); // Prepare a DOMResult which will hold the DOM of the xjson DOMResult domResult = new DOMResult(); // Run SAX processing through a transformer // (This could be done more simply, but we have here the opportunity to pass our xjson through // an XSLT and get a legacy XML output ;) ) transformer.transform(saxSource, domResult); Node dom = domResult.getNode(); XPathFactory xpathFactory = XPathFactory.newInstance(); for (String x : expr) { try { XPath xpath = xpathFactory.newXPath(); xpath.setNamespaceContext(new NamespaceContext() { public Iterator getPrefixes(String namespaceURI) { return null; } public String getPrefix(String namespaceURI) { return null; } public String getNamespaceURI(String prefix) { if (prefix == null) { return XJSON.XMLNS; } else if ("j".equals(prefix)) { return XJSON.XMLNS; } else { return null; } } }); NodeList nl = (NodeList) xpath.evaluate(x, dom, XPathConstants.NODESET); System.out.println("-- Found " + nl.getLength() + " nodes for xpath '" + x + "' in file '" + f + "'"); for (int i = 0; i < nl.getLength(); i++) { System.out.println(" +(" + i + ")+ "); XMLJsonGenerator handler = new XMLJsonGenerator(); StringWriter buffer = new StringWriter(); handler.setOutputWriter(buffer); SAXResult result = new SAXResult(handler); transformer.transform(new DOMSource(nl.item(i)), result); System.out.println(buffer.toString()); } } catch (XPathExpressionException e) { System.err.println("-- Error evaluating '" + x + "' on file '" + f + "'"); e.printStackTrace(); } catch (TransformerException e) { System.err.println("-- Error evaluating '" + x + "' on file '" + f + "'"); e.printStackTrace(); } } } catch (FileNotFoundException e) { System.err.println("File '" + f + "' was not found"); } catch (SAXException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (TransformerException e) { e.printStackTrace(); } } } catch (TransformerConfigurationException e) { e.printStackTrace(); } } }
From source file:jp.mamesoft.mailsocketchat.Mailsocketchat.java
public static void main(String argv[]) { System.out.println("MailSocketChat Ver." + version); if (argv.length != 6) { System.out.println(// ww w .ja v a 2s. c o m "ERROR! ? <?URL> <???> <GMail??> <GMail> <(Simple???Normal???All)> <????(Subject???Text)> ???????"); System.exit(0); } TimerTask newversion = new NewVersion(); Timer timer = new Timer(); timer.schedule(newversion, 0, 6 * 60 * 60 * 1000); //6?????? chat_url = argv[0]; chat_name = argv[1]; mail_user = argv[2]; mail_pass = argv[3]; if (argv[4].equals("Simple")) { logformat = "simple"; } else if (argv[4].equals("Normal")) { logformat = "normal"; } else if (argv[4].equals("All")) { logformat = "all"; } else { System.out.println( "ERROR! (5)???????Simple???Normal???All???????"); System.exit(0); } if (argv[5].equals("Subject")) { subjectname = true; } else if (argv[5].equals("Text")) { subjectname = false; } else { System.out.println( "ERROR! ????(6)???????Subject???Text???????"); System.exit(0); } try { Properties headers = new Properties(); headers.setProperty("user-agent", "MailSocketChat/" + version + " (" + osName + " " + osVer + ") Java/" + javaver + " (Mamesoft Web)"); socket = new SocketIO(chat_url, headers); socket.connect(new IOCallback() { @Override public void onMessage(JSONObject json, IOAcknowledge ack) { try { } catch (JSONException e) { e.printStackTrace(); } } @Override public void onMessage(String data, IOAcknowledge ack) { } @Override public void onError(SocketIOException socketIOException) { System.out.println("??????"); System.err.println(socketIOException); System.exit(0); } @Override public void onDisconnect() { System.out.println("???????"); System.exit(0); } @Override public void onConnect() { socket.emit("register", new JSONObject().put("mode", "client").put("lastid", 1)); System.out.println("SocketChat?????"); Thread mail = new Mail(); mail.start(); } @Override public void on(String event, IOAcknowledge ack, Object... args) { if (event.equals("log")) { JSONObject jsondata = (JSONObject) args[0]; Logperse(jsondata); } if (event.equals("init")) { JSONObject jsondata = (JSONObject) args[0]; JSONArray logs = jsondata.getJSONArray("logs"); for (int i = logs.length() - 1; i >= 0; i--) { JSONObject log = logs.getJSONObject(i); Logperse(log); } socket.emit("inout", new JSONObject().put("name", chat_name)); } if (event.equals("result")) { JSONObject jsondata = (JSONObject) args[0]; System.out.println(jsondata); } if (event.equals("users")) { JSONObject jsondata = (JSONObject) args[0]; JSONArray users = jsondata.getJSONArray("users"); for (int i = 0; i < users.length(); i++) { JSONObject user = users.getJSONObject(i); userchange(user); } } if (event.equals("newuser")) { JSONObject jsondata = (JSONObject) args[0]; userchange(jsondata); } if (event.equals("inout")) { JSONObject jsondata = (JSONObject) args[0]; userchange(jsondata); } if (event.equals("deluser")) { Integer id = (Integer) args[0]; if (users.containsKey(id)) { users.remove(id); } if (roms.containsKey(id)) { roms.remove(id); } } if (event.equals("userinfo")) { JSONObject jsondata = (JSONObject) args[0]; if (jsondata.getBoolean("rom")) { in = false; } else { in = true; } } } }); } catch (MalformedURLException e1) { e1.printStackTrace(); System.out.println("URL????????"); return; } }
From source file:BufferedSocketClient.java
public static void main(String args[]) throws Exception { Socket socket1;//w w w .j a va2 s . com int portNumber = 1777; String str = "initialize"; socket1 = new Socket(InetAddress.getLocalHost(), portNumber); BufferedReader br = new BufferedReader(new InputStreamReader(socket1.getInputStream())); PrintWriter pw = new PrintWriter(socket1.getOutputStream(), true); pw.println(str); while ((str = br.readLine()) != null) { System.out.println(str); pw.println("bye"); if (str.equals("bye")) break; } br.close(); pw.close(); socket1.close(); }
From source file:com.maxpowered.amazon.advertising.api.app.App.java
public static void main(final String... args) throws FileNotFoundException, IOException, JAXBException, XMLStreamException, InterruptedException { try (ClassPathXmlApplicationContext ctx = new ClassPathXmlApplicationContext("application-context.xml")) { /*//from w w w .j a v a 2s . co m * Get default options based on spring configs */ final String inputDefault = getOptionDefaultBasedOnSpringProperty(ctx, PROPERTY_APP_INPUT, STD_IN_STR); final String processedDefault = inputDefault.equals(STD_IN_STR) ? DEFAULT_PROCESSED_FILE_BASE : inputDefault + PROCESSED_EXT; final String outputDefault = getOptionDefaultBasedOnSpringProperty(ctx, PROPERTY_APP_OUTPUT, STD_OUT_STR); int throttleDefault = Integer.valueOf(getOptionDefaultBasedOnSpringProperty(ctx, PROPERTY_APP_THROTTLE, String.valueOf(DEFAULT_APP_THROTTLE))); // Maximum of 25000 requests per hour throttleDefault = Math.min(throttleDefault, MAX_APP_THROTTLE); /* * Get options from the CLI args */ final Options options = new Options(); options.addOption("h", false, "Display this help."); options.addOption("i", true, "Set the file to read ASINs from. " + DEFAULT_STR + inputDefault); options.addOption("p", true, "Set the file to store processed ASINs in. " + DEFAULT_STR + processedDefault + " or '" + PROCESSED_EXT + "' appended to the input file name."); // Add a note that the output depends on the configured processors. If none are configured, it defaults to a // std.out processor options.addOption("o", true, "Set the file to write fetched info xml to via FileProcessor. " + DEFAULT_STR + outputDefault); options.addOption("1", false, "Override output file and always output fetched info xml to std.out."); options.addOption("t", true, "Set the requests per hour throttle (max of " + MAX_APP_THROTTLE + "). " + DEFAULT_STR + throttleDefault); final CommandLineParser parser = new DefaultParser(); CommandLine cmd = null; boolean needsHelp = false; try { cmd = parser.parse(options, args); } catch (final ParseException e) { needsHelp = true; } if (cmd.hasOption("h") || needsHelp) { final HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("App", options); return; } // Get throttle rate final int throttle = Math.min( cmd.hasOption("t") ? Integer.valueOf(cmd.getOptionValue("t")) : throttleDefault, MAX_APP_THROTTLE); LOG.debug("Throttle (default {}) is {} requests per hour", throttleDefault, throttle); // We don't want to hit our limit, just under an hour worth of milliseconds final int requestWait = 3540000 / throttle; // Get input stream String input; if (cmd.hasOption("i")) { input = cmd.getOptionValue("i"); } else { input = inputDefault; } LOG.debug("Input name (default {}) is {}", inputDefault, input); // Get processed file String processed; if (cmd.hasOption("p")) { processed = cmd.getOptionValue("p"); } else { processed = input + PROCESSED_EXT; } LOG.debug("Processed file name (default {}) is {}", processedDefault, processed); final File processedFile = new File(processed); processedFile.createNewFile(); try (final InputStream inputStream = getInputStream(input)) { // Get output stream String output; if (cmd.hasOption("o")) { output = cmd.getOptionValue("o"); } else { output = outputDefault; } if (cmd.hasOption("1")) { output = STD_OUT_STR; } LOG.debug("Output (default {}) name is {}", outputDefault, output); // Special logic to set the FileProcessor output if (output.equals(STD_OUT_STR)) { final FileProcessor fileProcessor = ctx.getBeanFactory().getBean(FileProcessor.class); fileProcessor.setOutputStream(System.out); } else if (!output.equals(outputDefault)) { final FileProcessor fileProcessor = ctx.getBeanFactory().getBean(FileProcessor.class); fileProcessor.setOutputFile(output); } // This could be easily configured through CLI or properties final List<String> responseGroups = Lists.newArrayList(); for (final ResponseGroup responseGroup : new ResponseGroup[] { ResponseGroup.IMAGES, ResponseGroup.ITEM_ATTRIBUTES }) { responseGroups.add(responseGroup.getResponseGroupName()); } final String responseGroupString = Joiner.on(",").join(responseGroups); // Search the list of remaining ASINs final ProductFetcher fetcher = ctx.getBeanFactory().getBean(ProductFetcher.class); fetcher.setProcessedFile(processedFile); fetcher.setRequestWait(requestWait); fetcher.setInputStream(inputStream); fetcher.setResponseGroups(responseGroupString); // This ensures that statistics of processed asins should almost always get printed at the end Runtime.getRuntime().addShutdownHook(new Thread() { @Override public void run() { fetcher.logStatistics(); } }); fetcher.fetchProductInformation(); } } }
From source file:com.joliciel.talismane.terminology.TalismaneTermExtractorMain.java
public static void main(String[] args) throws Exception { String termFilePath = null;/*from ww w. j a v a2 s.c o m*/ String outFilePath = null; Command command = Command.extract; int depth = -1; String databasePropertiesPath = null; String projectCode = null; String terminologyPropertiesPath = null; Map<String, String> argMap = StringUtils.convertArgs(args); String logConfigPath = argMap.get("logConfigFile"); if (logConfigPath != null) { argMap.remove("logConfigFile"); Properties props = new Properties(); props.load(new FileInputStream(logConfigPath)); PropertyConfigurator.configure(props); } Map<String, String> innerArgs = new HashMap<String, String>(); for (Entry<String, String> argEntry : argMap.entrySet()) { String argName = argEntry.getKey(); String argValue = argEntry.getValue(); if (argName.equals("command")) command = Command.valueOf(argValue); else if (argName.equals("termFile")) termFilePath = argValue; else if (argName.equals("outFile")) outFilePath = argValue; else if (argName.equals("depth")) depth = Integer.parseInt(argValue); else if (argName.equals("databaseProperties")) databasePropertiesPath = argValue; else if (argName.equals("terminologyProperties")) terminologyPropertiesPath = argValue; else if (argName.equals("projectCode")) projectCode = argValue; else innerArgs.put(argName, argValue); } if (termFilePath == null && databasePropertiesPath == null) throw new TalismaneException("Required argument: termFile or databasePropertiesPath"); if (termFilePath != null) { String currentDirPath = System.getProperty("user.dir"); File termFileDir = new File(currentDirPath); if (termFilePath.lastIndexOf("/") >= 0) { String termFileDirPath = termFilePath.substring(0, termFilePath.lastIndexOf("/")); termFileDir = new File(termFileDirPath); termFileDir.mkdirs(); } } long startTime = new Date().getTime(); try { if (command.equals(Command.analyse)) { innerArgs.put("command", "analyse"); } else { innerArgs.put("command", "process"); } String sessionId = ""; TalismaneServiceLocator locator = TalismaneServiceLocator.getInstance(sessionId); TalismaneService talismaneService = locator.getTalismaneService(); TalismaneConfig config = talismaneService.getTalismaneConfig(innerArgs, sessionId); TerminologyServiceLocator terminologyServiceLocator = TerminologyServiceLocator.getInstance(locator); TerminologyService terminologyService = terminologyServiceLocator.getTerminologyService(); TerminologyBase terminologyBase = null; if (projectCode == null) throw new TalismaneException("Required argument: projectCode"); File file = new File(databasePropertiesPath); FileInputStream fis = new FileInputStream(file); Properties dataSourceProperties = new Properties(); dataSourceProperties.load(fis); terminologyBase = terminologyService.getPostGresTerminologyBase(projectCode, dataSourceProperties); TalismaneSession talismaneSession = talismaneService.getTalismaneSession(); if (command.equals(Command.analyse) || command.equals(Command.extract)) { Locale locale = talismaneSession.getLocale(); Map<TerminologyProperty, String> terminologyProperties = new HashMap<TerminologyProperty, String>(); if (terminologyPropertiesPath != null) { Map<String, String> terminologyPropertiesStr = StringUtils.getArgMap(terminologyPropertiesPath); for (String key : terminologyPropertiesStr.keySet()) { try { TerminologyProperty property = TerminologyProperty.valueOf(key); terminologyProperties.put(property, terminologyPropertiesStr.get(key)); } catch (IllegalArgumentException e) { throw new TalismaneException("Unknown terminology property: " + key); } } } else { terminologyProperties = getDefaultTerminologyProperties(locale); } if (depth <= 0 && !terminologyProperties.containsKey(TerminologyProperty.maxDepth)) throw new TalismaneException("Required argument: depth"); InputStream regexInputStream = getInputStreamFromResource( "parser_conll_with_location_input_regex.txt"); Scanner regexScanner = new Scanner(regexInputStream, "UTF-8"); String inputRegex = regexScanner.nextLine(); regexScanner.close(); config.setInputRegex(inputRegex); Charset outputCharset = config.getOutputCharset(); TermExtractor termExtractor = terminologyService.getTermExtractor(terminologyBase, terminologyProperties); if (depth > 0) termExtractor.setMaxDepth(depth); termExtractor.setOutFilePath(termFilePath); if (outFilePath != null) { if (outFilePath.lastIndexOf("/") >= 0) { String outFileDirPath = outFilePath.substring(0, outFilePath.lastIndexOf("/")); File outFileDir = new File(outFileDirPath); outFileDir.mkdirs(); } File outFile = new File(outFilePath); outFile.delete(); outFile.createNewFile(); Writer writer = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(outFilePath), outputCharset)); TermAnalysisWriter termAnalysisWriter = new TermAnalysisWriter(writer); termExtractor.addTermObserver(termAnalysisWriter); } Talismane talismane = config.getTalismane(); talismane.setParseConfigurationProcessor(termExtractor); talismane.process(); } else if (command.equals(Command.list)) { List<Term> terms = terminologyBase.findTerms(2, null, 0, null, null); for (Term term : terms) { LOG.debug("Term: " + term.getText()); LOG.debug("Frequency: " + term.getFrequency()); LOG.debug("Heads: " + term.getHeads()); LOG.debug("Expansions: " + term.getExpansions()); LOG.debug("Contexts: " + term.getContexts()); } } } finally { long endTime = new Date().getTime(); long totalTime = endTime - startTime; LOG.info("Total time: " + totalTime); } }
From source file:com.seavus.wordcountermaven.WordCounter.java
/** * @param args the command line arguments * @throws java.io.FileNotFoundException *///from w w w . jav a 2 s .c om public static void main(String[] args) throws FileNotFoundException { InputStream fileStream = WordCounter.class.getClassLoader().getResourceAsStream("test.txt"); BufferedReader br = new BufferedReader(new InputStreamReader(fileStream)); Map<String, Integer> wordMap = new HashMap<>(); String line; boolean tokenFound = false; try { while ((line = br.readLine()) != null) { String[] tokens = line.trim().split("\\s+"); //trims surrounding whitespaces and splits lines into tokens for (String token : tokens) { for (Map.Entry<String, Integer> entry : wordMap.entrySet()) { if (StringUtils.equalsIgnoreCase(token, entry.getKey())) { wordMap.put(entry.getKey(), (wordMap.get(entry.getKey()) + 1)); tokenFound = true; } } if (!token.equals("") && !tokenFound) { wordMap.put(token.toLowerCase(), 1); } tokenFound = false; } } br.close(); } catch (IOException ex) { Logger.getLogger(WordCounter.class.getName()).log(Level.SEVERE, null, ex); } System.out.println("string : " + "frequency\r\n" + "-------------------"); //prints out each unique word (i.e. case-insensitive string token) and its frequency to the console for (Map.Entry<String, Integer> entry : wordMap.entrySet()) { System.out.println(entry.getKey() + " : " + entry.getValue()); } }
From source file:PopClean.java
public static void main(String args[]) { try {/*w w w .j a v a 2s .co m*/ String hostname = null, username = null, password = null; int port = 110; int sizelimit = -1; String subjectPattern = null; Pattern pattern = null; Matcher matcher = null; boolean delete = false; boolean confirm = true; // Handle command-line arguments for (int i = 0; i < args.length; i++) { if (args[i].equals("-user")) username = args[++i]; else if (args[i].equals("-pass")) password = args[++i]; else if (args[i].equals("-host")) hostname = args[++i]; else if (args[i].equals("-port")) port = Integer.parseInt(args[++i]); else if (args[i].equals("-size")) sizelimit = Integer.parseInt(args[++i]); else if (args[i].equals("-subject")) subjectPattern = args[++i]; else if (args[i].equals("-debug")) debug = true; else if (args[i].equals("-delete")) delete = true; else if (args[i].equals("-force")) // don't confirm confirm = false; } // Verify them if (hostname == null || username == null || password == null || sizelimit == -1) usage(); // Make sure the pattern is a valid regexp if (subjectPattern != null) { pattern = Pattern.compile(subjectPattern); matcher = pattern.matcher(""); } // Say what we are going to do System.out .println("Connecting to " + hostname + " on port " + port + " with username " + username + "."); if (delete) { System.out.println("Will delete all messages longer than " + sizelimit + " bytes"); if (subjectPattern != null) System.out.println("that have a subject matching: [" + subjectPattern + "]"); } else { System.out.println("Will list subject lines for messages " + "longer than " + sizelimit + " bytes"); if (subjectPattern != null) System.out.println("that have a subject matching: [" + subjectPattern + "]"); } // If asked to delete, ask for confirmation unless -force is given if (delete && confirm) { System.out.println(); System.out.print("Do you want to proceed (y/n) [n]: "); System.out.flush(); BufferedReader console = new BufferedReader(new InputStreamReader(System.in)); String response = console.readLine(); if (!response.equals("y")) { System.out.println("No messages deleted."); System.exit(0); } } // Connect to the server, and set up streams s = new Socket(hostname, port); in = new BufferedReader(new InputStreamReader(s.getInputStream())); out = new PrintWriter(new OutputStreamWriter(s.getOutputStream())); // Read the welcome message from the server, confirming it is OK. System.out.println("Connected: " + checkResponse()); // Now log in send("USER " + username); // Send username, wait for response send("PASS " + password); // Send password, wait for response System.out.println("Logged in"); // Check how many messages are waiting, and report it String stat = send("STAT"); StringTokenizer t = new StringTokenizer(stat); System.out.println(t.nextToken() + " messages in mailbox."); System.out.println("Total size: " + t.nextToken()); // Get a list of message numbers and sizes send("LIST"); // Send LIST command, wait for OK response. // Now read lines from the server until we get . by itself List msgs = new ArrayList(); String line; for (;;) { line = in.readLine(); if (line == null) throw new IOException("Unexpected EOF"); if (line.equals(".")) break; msgs.add(line); } // Now loop through the lines we read one at a time. // Each line should specify the message number and its size. int nummsgs = msgs.size(); for (int i = 0; i < nummsgs; i++) { String m = (String) msgs.get(i); StringTokenizer st = new StringTokenizer(m); int msgnum = Integer.parseInt(st.nextToken()); int msgsize = Integer.parseInt(st.nextToken()); // If the message is too small, ignore it. if (msgsize <= sizelimit) continue; // If we're listing messages, or matching subject lines // find the subject line for this message String subject = null; if (!delete || pattern != null) { subject = getSubject(msgnum); // get the subject line // If we couldn't find a subject, skip the message if (subject == null) continue; // If this subject does not match the pattern, then // skip the message if (pattern != null) { matcher.reset(subject); if (!matcher.matches()) continue; } // If we are listing, list this message if (!delete) { System.out.println("Subject " + msgnum + ": " + subject); continue; // so we never delete it } } // If we were asked to delete, then delete the message if (delete) { send("DELE " + msgnum); if (pattern == null) System.out.println("Deleted message " + msgnum); else System.out.println("Deleted message " + msgnum + ": " + subject); } } // When we're done, log out and shutdown the connection shutdown(); } catch (Exception e) { // If anything goes wrong print exception and show usage System.err.println(e); usage(); // Always try to shutdown nicely so the server doesn't hang on us shutdown(); } }
From source file:io.anserini.index.IndexTweetsUpdatePlace.java
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(new Option(HELP_OPTION, "show help")); options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment")); options.addOption(new Option(STORE_TERM_VECTORS_OPTION, "store term vectors")); options.addOption(OptionBuilder.withArgName("collection").hasArg() .withDescription("source collection directory").create(COLLECTION_OPTION)); options.addOption(/* ww w. j a va 2 s .c o m*/ OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("file with deleted tweetids") .create(DELETES_OPTION)); options.addOption(OptionBuilder.withArgName("id").hasArg().withDescription("max id").create(MAX_ID_OPTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (cmdline.hasOption(HELP_OPTION) || !cmdline.hasOption(COLLECTION_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(IndexTweetsUpdatePlace.class.getName(), options); System.exit(-1); } String collectionPath = cmdline.getOptionValue(COLLECTION_OPTION); String indexPath = cmdline.getOptionValue(INDEX_OPTION); System.out.println(collectionPath + " " + indexPath); LOG.info("collection: " + collectionPath); LOG.info("index: " + indexPath); long startTime = System.currentTimeMillis(); File file = new File(collectionPath); if (!file.exists()) { System.err.println("Error: " + file + " does not exist!"); System.exit(-1); } final FieldType textOptions = new FieldType(); textOptions.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); textOptions.setStored(true); textOptions.setTokenized(true); if (cmdline.hasOption(STORE_TERM_VECTORS_OPTION)) { textOptions.setStoreTermVectors(true); } final StatusStream stream = new JsonStatusCorpusReader(file); final Directory dir = new SimpleFSDirectory(Paths.get(cmdline.getOptionValue(INDEX_OPTION))); final IndexWriterConfig config = new IndexWriterConfig(ANALYZER); config.setOpenMode(IndexWriterConfig.OpenMode.APPEND); final IndexWriter writer = new IndexWriter(dir, config); System.out.print("Original # of docs " + writer.numDocs()); int updateCount = 0; Runtime.getRuntime().addShutdownHook(new Thread() { public void run() { try { stream.close(); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } try { writer.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } try { dir.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } System.out.println("Shutting down"); } }); int cnt = 0; Status status; try { while ((status = stream.next()) != null) { if (status.getPlace() != null) { // Query q = NumericRangeQuery.newLongRange(TweetStreamReader.StatusField.ID.name, status.getId(), // status.getId(), true, true); // System.out.print("Deleting docCount="+writer.numDocs()); // writer.deleteDocuments(q); // writer.commit(); // System.out.print(" Deleted docCount="+writer.numDocs()); Document doc = new Document(); doc.add(new LongField(StatusField.ID.name, status.getId(), Field.Store.YES)); doc.add(new LongField(StatusField.EPOCH.name, status.getEpoch(), Field.Store.YES)); doc.add(new TextField(StatusField.SCREEN_NAME.name, status.getScreenname(), Store.YES)); doc.add(new Field(StatusField.TEXT.name, status.getText(), textOptions)); doc.add(new IntField(StatusField.FRIENDS_COUNT.name, status.getFollowersCount(), Store.YES)); doc.add(new IntField(StatusField.FOLLOWERS_COUNT.name, status.getFriendsCount(), Store.YES)); doc.add(new IntField(StatusField.STATUSES_COUNT.name, status.getStatusesCount(), Store.YES)); doc.add(new DoubleField(StatusField.LONGITUDE.name, status.getLongitude(), Store.YES)); doc.add(new DoubleField(StatusField.LATITUDE.name, status.getlatitude(), Store.YES)); doc.add(new StringField(StatusField.PLACE.name, status.getPlace(), Store.YES)); long inReplyToStatusId = status.getInReplyToStatusId(); if (inReplyToStatusId > 0) { doc.add(new LongField(StatusField.IN_REPLY_TO_STATUS_ID.name, inReplyToStatusId, Field.Store.YES)); doc.add(new LongField(StatusField.IN_REPLY_TO_USER_ID.name, status.getInReplyToUserId(), Field.Store.YES)); } String lang = status.getLang(); if (!lang.equals("unknown")) { doc.add(new TextField(StatusField.LANG.name, status.getLang(), Store.YES)); } long retweetStatusId = status.getRetweetedStatusId(); if (retweetStatusId > 0) { doc.add(new LongField(StatusField.RETWEETED_STATUS_ID.name, retweetStatusId, Field.Store.YES)); doc.add(new LongField(StatusField.RETWEETED_USER_ID.name, status.getRetweetedUserId(), Field.Store.YES)); doc.add(new IntField(StatusField.RETWEET_COUNT.name, status.getRetweetCount(), Store.YES)); if (status.getRetweetCount() < 0 || status.getRetweetedStatusId() < 0) { LOG.warn("Error parsing retweet fields of " + status.getId()); } } long id = status.getId(); BytesRefBuilder brb = new BytesRefBuilder(); NumericUtils.longToPrefixCodedBytes(id, 0, brb); Term term = new Term(StatusField.ID.name, brb.get()); writer.updateDocument(term, doc); // writer.addDocument(doc); updateCount += 1; if (updateCount % 10000 == 0) { LOG.info(updateCount + " statuses updated"); writer.commit(); System.out.println("Updated docCount=" + writer.numDocs()); } } } LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms"); } catch (Exception e) { e.printStackTrace(); } finally { writer.close(); dir.close(); stream.close(); } }