List of usage examples for java.util.regex Matcher group
public String group(String name)
From source file:com.wittawat.wordseg.Main.java
public static void main(String[] args) throws Exception { Console con = System.console(); if (con == null) { System.out.println("The system must support console to run the program."); System.exit(1);/* w ww .j av a 2 s.c o m*/ } // Load model System.out.println("Loading model ..."); Classifier model = Data.getDefaultModel(); System.out.println("Finished loading model."); System.out.println(getAgreement()); boolean isUseDict = true; // Dummy statement to eliminate all lazy loading System.out.println("\n" + new NukeTokenizer3( "?????", model, isUseDict).tokenize() + "\n"); System.out.println(getHelp()); final String SET_DICT_PAT_STR = "\\s*set\\s+dict\\s+(true|false)\\s*"; final Pattern SET_DICT_PAT = Pattern.compile(SET_DICT_PAT_STR); while (true) { System.out.print(">> "); String line = con.readLine(); if (line != null && !line.trim().equals("")) { line = line.trim(); try { if (line.equals("h") || line.equals("help")) { System.out.println(getHelp()); } else if (line.equals("about")) { System.out.println(getAbout()); } else if (line.equals("agreement")) { System.out.println(getAgreement()); } else if (SET_DICT_PAT.matcher(line).find()) { Matcher m = SET_DICT_PAT.matcher(line); m.find(); String v = m.group(1); isUseDict = v.equals("true"); System.out.println("Dictionary will " + (isUseDict ? "" : "not ") + "be used."); } else if (line.matches("q|quit|exit")) { System.out.println("Bye"); System.exit(0); } else if (line.contains(":tokfile:")) { String[] splits = line.split(":tokfile:"); String in = splits[0]; String out = splits[1]; String content = FileUtils.readFileToString(new File(in)); long start = new Date().getTime(); NukeTokenizer tokenizer = new NukeTokenizer3(content, model, isUseDict); String tokenized = tokenizer.tokenize(); long end = new Date().getTime(); System.out.println("Time to tokenize: " + (end - start) + " ms."); FileUtils.writeStringToFile(new File(out), tokenized); } else if (line.contains(":tokfile")) { String[] splits = line.split(":tokfile"); String in = splits[0]; String content = FileUtils.readFileToString(new File(in)); long start = new Date().getTime(); NukeTokenizer tokenizer = new NukeTokenizer3(content, model, isUseDict); String tokenized = tokenizer.tokenize(); long end = new Date().getTime(); System.out.println(tokenized); System.out.println("Time to tokenize: " + (end - start) + " ms."); } else if (line.contains(":tok:")) { String[] splits = line.split(":tok:"); String inText = splits[0]; String out = splits[1]; long start = new Date().getTime(); NukeTokenizer tokenizer = new NukeTokenizer3(inText, model, isUseDict); String tokenized = tokenizer.tokenize(); long end = new Date().getTime(); System.out.println("Time to tokenize: " + (end - start) + " ms."); FileUtils.writeStringToFile(new File(out), tokenized); } else if (line.contains(":tok")) { String[] splits = line.split(":tok"); String inText = splits[0]; long start = new Date().getTime(); NukeTokenizer tokenizer = new NukeTokenizer3(inText, model, isUseDict); String tokenized = tokenizer.tokenize(); long end = new Date().getTime(); System.out.println(tokenized); System.out.println("Time to tokenize: " + (end - start) + " ms."); } else { System.out.println("Unknown command"); } } catch (Exception e) { System.out.println("Error. See the exception."); e.printStackTrace(); } } } }
From source file:com.hp.avmon.trap.service.TrapService.java
public static void main(String[] args) { String text = "{3}123{3}{10}"; Pattern p = Pattern.compile(".*?(\\{.+?\\})"); Matcher m = p.matcher(text); while (m.find()) { System.out.println(m.group(1)); }// w w w. j a v a 2s. c om }
From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step5LinguisticPreprocessing.java
public static void main(String[] args) throws Exception { // input dir - list of xml query containers // step4-boiler-plate/ File inputDir = new File(args[0]); // output dir File outputDir = new File(args[1]); if (!outputDir.exists()) { outputDir.mkdirs();//from w ww . j ava 2 s.co m } // iterate over query containers for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) { QueryResultContainer queryResultContainer = QueryResultContainer .fromXML(FileUtils.readFileToString(f, "utf-8")); for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) { // System.out.println(rankedResults.plainText); if (rankedResults.plainText != null) { String[] lines = StringUtils.split(rankedResults.plainText, "\n"); // collecting all cleaned lines List<String> cleanLines = new ArrayList<>(lines.length); // collecting line tags List<String> lineTags = new ArrayList<>(lines.length); for (String line : lines) { // get the tag String tag = null; Matcher m = OPENING_TAG_PATTERN.matcher(line); if (m.find()) { tag = m.group(1); } if (tag == null) { throw new IllegalArgumentException("No html tag found for line:\n" + line); } // replace the tag at the beginning and the end String noTagText = line.replaceAll("^<\\S+>", "").replaceAll("</\\S+>$", ""); // do some html cleaning noTagText = noTagText.replaceAll(" ", " "); noTagText = noTagText.trim(); // add to the output if (!noTagText.isEmpty()) { cleanLines.add(noTagText); lineTags.add(tag); } } if (cleanLines.isEmpty()) { // the document is empty System.err.println("Document " + rankedResults.clueWebID + " in query " + queryResultContainer.qID + " is empty"); } else { // now join them back to paragraphs String text = StringUtils.join(cleanLines, "\n"); // create JCas JCas jCas = JCasFactory.createJCas(); jCas.setDocumentText(text); jCas.setDocumentLanguage("en"); // annotate WebParagraph SimplePipeline.runPipeline(jCas, AnalysisEngineFactory.createEngineDescription(WebParagraphAnnotator.class)); // fill the original tag information List<WebParagraph> webParagraphs = new ArrayList<>( JCasUtil.select(jCas, WebParagraph.class)); // they must be the same size as original ones if (webParagraphs.size() != lineTags.size()) { throw new IllegalStateException( "Different size of annotated paragraphs and original lines"); } for (int i = 0; i < webParagraphs.size(); i++) { WebParagraph p = webParagraphs.get(i); // get tag String tag = lineTags.get(i); p.setOriginalHtmlTag(tag); } SimplePipeline.runPipeline(jCas, AnalysisEngineFactory.createEngineDescription(StanfordSegmenter.class, // only on existing WebParagraph annotations StanfordSegmenter.PARAM_ZONE_TYPES, WebParagraph.class.getCanonicalName())); // now convert to XMI ByteArrayOutputStream byteOutputStream = new ByteArrayOutputStream(); XmiCasSerializer.serialize(jCas.getCas(), byteOutputStream); // encode to base64 String encoded = new BASE64Encoder().encode(byteOutputStream.toByteArray()); rankedResults.originalXmi = encoded; } } } // and save the query to output dir File outputFile = new File(outputDir, queryResultContainer.qID + ".xml"); FileUtils.writeStringToFile(outputFile, queryResultContainer.toXML(), "utf-8"); System.out.println("Finished " + outputFile); } }
From source file:LogExample.java
public static void main(String argv[]) { String logEntryPattern = "^([\\d.]+) (\\S+) (\\S+) \\[([\\w:/]+\\s[+\\-]\\d{4})\\] \"(.+?)\" (\\d{3}) (\\d+) \"([^\"]+)\" \"([^\"]+)\""; System.out.println("Using RE Pattern:"); System.out.println(logEntryPattern); System.out.println("Input line is:"); System.out.println(logEntryLine); Pattern p = Pattern.compile(logEntryPattern); Matcher matcher = p.matcher(logEntryLine); if (!matcher.matches() || NUM_FIELDS != matcher.groupCount()) { System.err.println("Bad log entry (or problem with RE?):"); System.err.println(logEntryLine); return;/*w ww . j a va 2 s .c o m*/ } System.out.println("IP Address: " + matcher.group(1)); System.out.println("Date&Time: " + matcher.group(4)); System.out.println("Request: " + matcher.group(5)); System.out.println("Response: " + matcher.group(6)); System.out.println("Bytes Sent: " + matcher.group(7)); if (!matcher.group(8).equals("-")) System.out.println("Referer: " + matcher.group(8)); System.out.println("Browser: " + matcher.group(9)); }
From source file:edu.illinois.cs.cogcomp.ner.BenchmarkOutputParser.java
/** * This main method will take one required argument, idenfitying the file containing * the results. Optionally, "-single" may also be passed indicating it will extract * the F1 value for single token values only. * @param args//from www . j a v a 2s. com * @throws IOException */ public static void main(String[] args) throws IOException { parseArgs(args); System.out.println("L1lr,L1t,L2lr,L2t,L1 token,L2 token,F1,F2"); for (File file : resultsfile.listFiles()) { if (file.getName().startsWith("L1r")) { File resultsfile = new File(file, "ner/results.out"); if (resultsfile.exists()) { try { Parameters p = parseFilename(file); String lines = FileUtils.readFileToString(resultsfile); // get the token level score. String tokenL2 = null, tokenL1 = null; Matcher matcher = l2tokenlevelpattern.matcher(lines); if (matcher.find()) tokenL2 = matcher.group(1); else { matcher = ol2tokenlevelpattern.matcher(lines); if (matcher.find()) tokenL2 = matcher.group(1); else System.err.println("No token level match"); } matcher = l1tokenlevelpattern.matcher(lines); if (matcher.find()) tokenL1 = matcher.group(1); else { matcher = ol1tokenlevelpattern.matcher(lines); if (matcher.find()) tokenL1 = matcher.group(1); else System.err.println("No token level match"); } matcher = phraselevelpattern.matcher(lines); matcher.find(); String phraseL1 = matcher.group(1); String phraseL2 = matcher.group(2); System.out.println( p.toString() + "," + tokenL1 + "," + tokenL2 + "," + phraseL1 + "," + phraseL2); } catch (java.lang.IllegalStateException ise) { System.err.println("The results file could not be parsed : \"" + resultsfile + "\""); } } else { System.err.println("no results in " + resultsfile); } } } }
From source file:de.mpg.escidoc.services.cone.util.CCCrawler.java
public static void main(String[] args) throws Exception { HttpClient httpClient = new HttpClient(); querier = QuerierFactory.newQuerier(false); // field_commercial for (YesNo fieldCommercial : YesNo.values()) { // field_derivatives for (YesNo fieldDerivatives : YesNo.values()) { // field_derivatives for (Jurisdiction fieldJurisdiction : Jurisdiction.values()) { String licenceUrl = ccUrl + "&field_commercial=" + fieldCommercial.toString() + "&field_derivatives=" + fieldDerivatives.toString() + "&field_jurisdiction=" + fieldJurisdiction.toString() + "&lang=de_DE"; System.out.println(licenceUrl); GetMethod method = new GetMethod(licenceUrl); ProxyHelper.executeMethod(httpClient, method); if (method.getStatusCode() == 200) { TreeFragment fragment = new TreeFragment(); String key1 = "urn:cone:commercial"; String key3 = "urn:cone:jurisdiction"; List<LocalizedTripleObject> list = new ArrayList<LocalizedTripleObject>(); list.add(new LocalizedString(fieldCommercial.toBoolean())); fragment.put(key1, list); List<LocalizedTripleObject> list2 = new ArrayList<LocalizedTripleObject>(); list2.add(new LocalizedString(fieldDerivatives.toBoolean())); fragment.put("urn:cone:derivatives", list2); List<LocalizedTripleObject> list3 = new ArrayList<LocalizedTripleObject>(); list3.add(new LocalizedString(fieldJurisdiction.toString())); fragment.put(key3, list3); String codeToCopy = extractCode(method); Pattern urlPattern = Pattern.compile("href=\"([^\"]+)\""); Matcher urlMatcher = urlPattern.matcher(codeToCopy); if (urlMatcher.find()) { String url = urlMatcher.group(1); fragment.setSubject(url); Pattern versionPattern = Pattern.compile("/(\\d+\\.\\d+)/[^/]+/$"); Matcher versionMatcher = versionPattern.matcher(url); if (versionMatcher.find()) { list = new ArrayList<LocalizedTripleObject>(); list.add(new LocalizedString(versionMatcher.group(1))); fragment.put("urn:cone:version", list); }//from ww w .j av a 2 s . c o m Pattern imgPattern = Pattern.compile("src=\"([^\"]+)\""); Matcher imgMatcher = imgPattern.matcher(codeToCopy); if (imgMatcher.find()) { list = new ArrayList<LocalizedTripleObject>(); list.add(new LocalizedString(imgMatcher.group(1))); fragment.put("http://xmlns.com/foaf/0.1/depiction", list); } GetMethod method2 = new GetMethod(url); ProxyHelper.executeMethod(httpClient, method2); String page = method2.getResponseBodyAsString(); Pattern namePattern = Pattern.compile("<h2 property=\"dc:title\">([^<]+)</h2>"); Matcher nameMatcher = namePattern.matcher(page); if (nameMatcher.find()) { list = new ArrayList<LocalizedTripleObject>(); list.add(new LocalizedString(nameMatcher.group(1))); fragment.put("http://purl.org/dc/elements/1.1/title", list); } List<LocalizedTripleObject> languages = extractLanguages(page, url); fragment.put("urn:cone:translation", languages); querier.delete("cclicences", url); querier.create("cclicences", url, fragment); } } else { System.out.println("Not found: " + licenceUrl); } } } } querier.release(); }
From source file:com.bigdata.rdf.sail.webapp.AbstractProtocolTest.java
/** * Sanity check the {@link #charset} pattern * @param argv// w w w. jav a 2 s .c om */ public static void main(String argv[]) { for (final String t : new String[] { "text/html ; charset=iso-8856-1", "text/html ; charset=iso-8856-1; foo = bar", "text/html ;charset=iso-8856-1; foo = bar", "text/html ; charset= \"iso-8856-1\"", "text/html ; charset=iso-8856-1; foo = bar", "text/html ; charset = iso-8856-1; foo = bar", "text/html ; foo = bar", "text/html", }) { final Matcher m = charset.matcher(t); System.err.println(t + " ====> " + (m.find() ? m.group(1) : "")); } }
From source file:net.cloudkit.relaxation.HttpClientTest.java
public static void main(String[] args) throws Exception { InetAddress[] addresss = InetAddress.getAllByName("google.com"); for (InetAddress address : addresss) { System.out.println(address); }// w w w. j a v a 2 s . c o m CloseableHttpClient httpclient = HttpClients.createDefault(); String __VIEWSTATE = ""; String __EVENTVALIDATION = ""; HttpGet httpGet = new HttpGet("http://query.customs.gov.cn/MNFTQ/MRoadQuery.aspx?" + Math.random() * 1000); httpGet.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"); httpGet.setHeader("Accept-Encoding", "gzip, deflate, sdch"); httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.8,en;q=0.6"); httpGet.setHeader("Cache-Control", "no-cache"); // httpGet.setHeader("Connection", "keep-alive"); httpGet.setHeader("Host", "query.customs.gov.cn"); httpGet.setHeader("Pragma", "no-cache"); httpGet.setHeader("Upgrade-Insecure-Requests", "1"); httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36"); HttpClientContext context = HttpClientContext.create(); // CloseableHttpResponse response1 = httpclient.execute(httpGet, context); CloseableHttpResponse response1 = httpclient.execute(httpGet); // Header[] headers = response1.getHeaders(HttpHeaders.CONTENT_TYPE); // System.out.println("context cookies:" + context.getCookieStore().getCookies()); // String setCookie = response1.getFirstHeader("Set-Cookie").getValue(); // System.out.println("context cookies:" + setCookie); try { System.out.println(response1.getStatusLine()); HttpEntity entity1 = response1.getEntity(); // do something useful with the response body and ensure it is fully consumed String result = IOUtils.toString(entity1.getContent(), "GBK"); // System.out.println(result); Matcher m1 = Pattern.compile( "<input type=\\\"hidden\\\" name=\\\"__VIEWSTATE\\\" id=\\\"__VIEWSTATE\\\" value=\\\"(.*)\\\" />") .matcher(result); __VIEWSTATE = m1.find() ? m1.group(1) : ""; Matcher m2 = Pattern.compile( "<input type=\\\"hidden\\\" name=\\\"__EVENTVALIDATION\\\" id=\\\"__EVENTVALIDATION\\\" value=\\\"(.*)\\\" />") .matcher(result); __EVENTVALIDATION = m2.find() ? m2.group(1) : ""; System.out.println(__VIEWSTATE); System.out.println(__EVENTVALIDATION); /* File storeFile = new File("D:\\customs\\customs"+ i +".jpg"); FileOutputStream output = new FileOutputStream(storeFile); IOUtils.copy(input, output); output.close(); */ EntityUtils.consume(entity1); } finally { response1.close(); } HttpPost httpPost = new HttpPost( "http://query.customs.gov.cn/MNFTQ/MRoadQuery.aspx?" + Math.random() * 1000); httpPost.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"); httpPost.setHeader("Accept-Encoding", "gzip, deflate"); httpPost.setHeader("Accept-Language", "zh-CN,zh;q=0.8,en;q=0.6"); httpPost.setHeader("Cache-Control", "no-cache"); // httpPost.setHeader("Connection", "keep-alive"); httpPost.setHeader("Content-Type", "application/x-www-form-urlencoded"); httpPost.setHeader("Cookie", "ASP.NET_SessionId=t1td453hcuy4oqiplekkqe55"); httpPost.setHeader("Host", "query.customs.gov.cn"); httpPost.setHeader("Origin", "http://query.customs.gov.cn"); httpPost.setHeader("Pragma", "no-cache"); httpPost.setHeader("Referer", "http://query.customs.gov.cn/MNFTQ/MRoadQuery.aspx"); httpPost.setHeader("Upgrade-Insecure-Requests", "1"); httpPost.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36"); List<NameValuePair> nvps = new ArrayList<NameValuePair>(); nvps.add(new BasicNameValuePair("__VIEWSTATE", __VIEWSTATE)); nvps.add(new BasicNameValuePair("__EVENTVALIDATION", __EVENTVALIDATION)); nvps.add(new BasicNameValuePair("ScrollTop", "")); nvps.add(new BasicNameValuePair("__essVariable", "")); nvps.add(new BasicNameValuePair("MRoadQueryCtrl1$txtManifestID", "5100312462240")); nvps.add(new BasicNameValuePair("MRoadQueryCtrl1$txtBillNo", "7PH650021105")); nvps.add(new BasicNameValuePair("MRoadQueryCtrl1$txtCode", "a778")); nvps.add(new BasicNameValuePair("MRoadQueryCtrl1$btQuery", " ")); nvps.add(new BasicNameValuePair("select", "")); nvps.add(new BasicNameValuePair("select1", "")); nvps.add(new BasicNameValuePair("select2", "")); nvps.add(new BasicNameValuePair("select3", "")); nvps.add(new BasicNameValuePair("select4", "")); nvps.add(new BasicNameValuePair("select5", "??")); nvps.add(new BasicNameValuePair("select6", "")); nvps.add(new BasicNameValuePair("select7", "")); nvps.add(new BasicNameValuePair("select8", "")); httpPost.setEntity(new UrlEncodedFormEntity(nvps, "GBK")); CloseableHttpResponse response2 = httpclient.execute(httpPost); try { System.out.println(response2.getStatusLine()); HttpEntity entity2 = response2.getEntity(); // do something useful with the response body // and ensure it is fully consumed // System.out.println(entity2.getContent()); System.out.println(IOUtils.toString(response2.getEntity().getContent(), "GBK")); EntityUtils.consume(entity2); } finally { response2.close(); } }
From source file:uk.ac.ebi.intact.editor.controller.misc.MyNotesController.java
public static void main(String[] args) { Pattern p = Pattern.compile("\\{(\\w+):(\\w+)\\s(.+)\\}"); Matcher matcher = p.matcher( "{query:Lalalala select exp from Experiment exp where exp.bioSource.cvTissue.ac = 'EBI-2609142'}"); while (matcher.find()) { System.out.println(matcher.group(1)); System.out.println(matcher.group(2)); System.out.println(matcher.group(3)); }/*from ww w . j a v a 2 s . c om*/ }
From source file:RegExpExample.java
public static void main(String args[]) { String fileName = "RETestSource.java"; String unadornedClassRE = "^\\s*class (\\w+)"; String doubleIdentifierRE = "\\b(\\w+)\\s+\\1\\b"; Pattern classPattern = Pattern.compile(unadornedClassRE); Pattern doublePattern = Pattern.compile(doubleIdentifierRE); Matcher classMatcher, doubleMatcher; int lineNumber = 0; try {/*from w w w . j av a 2s .c o m*/ BufferedReader br = new BufferedReader(new FileReader(fileName)); String line; while ((line = br.readLine()) != null) { lineNumber++; classMatcher = classPattern.matcher(line); doubleMatcher = doublePattern.matcher(line); if (classMatcher.find()) { System.out.println("The class [" + classMatcher.group(1) + "] is not public"); } while (doubleMatcher.find()) { System.out.println("The word \"" + doubleMatcher.group(1) + "\" occurs twice at position " + doubleMatcher.start() + " on line " + lineNumber); } } } catch (IOException ioe) { System.out.println("IOException: " + ioe); ioe.printStackTrace(); } }