Example usage for java.util.regex Matcher group

List of usage examples for java.util.regex Matcher group

Introduction

In this page you can find the example usage for java.util.regex Matcher group.

Prototype

public String group(String name) 

Source Link

Document

Returns the input subsequence captured by the given named-capturing group during the previous match operation.

Usage

From source file:com.wittawat.wordseg.Main.java

public static void main(String[] args) throws Exception {
    Console con = System.console();
    if (con == null) {
        System.out.println("The system must support console to run the program.");
        System.exit(1);/*  w ww  .j av a  2 s.c o m*/
    }
    // Load model
    System.out.println("Loading model ...");
    Classifier model = Data.getDefaultModel();

    System.out.println("Finished loading model.");
    System.out.println(getAgreement());

    boolean isUseDict = true;

    // Dummy statement to eliminate all lazy loading
    System.out.println("\n" + new NukeTokenizer3(
            "?????",
            model, isUseDict).tokenize() + "\n");

    System.out.println(getHelp());

    final String SET_DICT_PAT_STR = "\\s*set\\s+dict\\s+(true|false)\\s*";
    final Pattern SET_DICT_PAT = Pattern.compile(SET_DICT_PAT_STR);
    while (true) {
        System.out.print(">> ");
        String line = con.readLine();
        if (line != null && !line.trim().equals("")) {

            line = line.trim();
            try {
                if (line.equals("h") || line.equals("help")) {
                    System.out.println(getHelp());
                } else if (line.equals("about")) {
                    System.out.println(getAbout());
                } else if (line.equals("agreement")) {
                    System.out.println(getAgreement());
                } else if (SET_DICT_PAT.matcher(line).find()) {
                    Matcher m = SET_DICT_PAT.matcher(line);
                    m.find();
                    String v = m.group(1);
                    isUseDict = v.equals("true");
                    System.out.println("Dictionary will " + (isUseDict ? "" : "not ") + "be used.");
                } else if (line.matches("q|quit|exit")) {
                    System.out.println("Bye");
                    System.exit(0);
                } else if (line.contains(":tokfile:")) {
                    String[] splits = line.split(":tokfile:");
                    String in = splits[0];
                    String out = splits[1];
                    String content = FileUtils.readFileToString(new File(in));
                    long start = new Date().getTime();

                    NukeTokenizer tokenizer = new NukeTokenizer3(content, model, isUseDict);

                    String tokenized = tokenizer.tokenize();
                    long end = new Date().getTime();
                    System.out.println("Time to tokenize: " + (end - start) + " ms.");
                    FileUtils.writeStringToFile(new File(out), tokenized);
                } else if (line.contains(":tokfile")) {
                    String[] splits = line.split(":tokfile");
                    String in = splits[0];

                    String content = FileUtils.readFileToString(new File(in));
                    long start = new Date().getTime();
                    NukeTokenizer tokenizer = new NukeTokenizer3(content, model, isUseDict);
                    String tokenized = tokenizer.tokenize();
                    long end = new Date().getTime();

                    System.out.println(tokenized);
                    System.out.println("Time to tokenize: " + (end - start) + " ms.");
                } else if (line.contains(":tok:")) {
                    String[] splits = line.split(":tok:");
                    String inText = splits[0];
                    String out = splits[1];

                    long start = new Date().getTime();
                    NukeTokenizer tokenizer = new NukeTokenizer3(inText, model, isUseDict);
                    String tokenized = tokenizer.tokenize();
                    long end = new Date().getTime();
                    System.out.println("Time to tokenize: " + (end - start) + " ms.");
                    FileUtils.writeStringToFile(new File(out), tokenized);
                } else if (line.contains(":tok")) {
                    String[] splits = line.split(":tok");
                    String inText = splits[0];

                    long start = new Date().getTime();
                    NukeTokenizer tokenizer = new NukeTokenizer3(inText, model, isUseDict);
                    String tokenized = tokenizer.tokenize();
                    long end = new Date().getTime();

                    System.out.println(tokenized);
                    System.out.println("Time to tokenize: " + (end - start) + " ms.");
                } else {
                    System.out.println("Unknown command");
                }
            } catch (Exception e) {
                System.out.println("Error. See the exception.");
                e.printStackTrace();
            }

        }
    }

}

From source file:com.hp.avmon.trap.service.TrapService.java

public static void main(String[] args) {
    String text = "{3}123{3}{10}";

    Pattern p = Pattern.compile(".*?(\\{.+?\\})");

    Matcher m = p.matcher(text);
    while (m.find()) {
        System.out.println(m.group(1));
    }//  w  w  w.  j  a  v  a  2s. c om
}

From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step5LinguisticPreprocessing.java

public static void main(String[] args) throws Exception {
    // input dir - list of xml query containers
    // step4-boiler-plate/
    File inputDir = new File(args[0]);

    // output dir
    File outputDir = new File(args[1]);
    if (!outputDir.exists()) {
        outputDir.mkdirs();//from   w  ww .  j  ava 2  s.co  m
    }

    // iterate over query containers
    for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) {
        QueryResultContainer queryResultContainer = QueryResultContainer
                .fromXML(FileUtils.readFileToString(f, "utf-8"));

        for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) {
            //                System.out.println(rankedResults.plainText);

            if (rankedResults.plainText != null) {
                String[] lines = StringUtils.split(rankedResults.plainText, "\n");

                // collecting all cleaned lines
                List<String> cleanLines = new ArrayList<>(lines.length);
                // collecting line tags
                List<String> lineTags = new ArrayList<>(lines.length);

                for (String line : lines) {
                    // get the tag
                    String tag = null;
                    Matcher m = OPENING_TAG_PATTERN.matcher(line);

                    if (m.find()) {
                        tag = m.group(1);
                    }

                    if (tag == null) {
                        throw new IllegalArgumentException("No html tag found for line:\n" + line);
                    }

                    // replace the tag at the beginning and the end
                    String noTagText = line.replaceAll("^<\\S+>", "").replaceAll("</\\S+>$", "");

                    // do some html cleaning
                    noTagText = noTagText.replaceAll("&nbsp;", " ");

                    noTagText = noTagText.trim();

                    // add to the output
                    if (!noTagText.isEmpty()) {
                        cleanLines.add(noTagText);
                        lineTags.add(tag);
                    }
                }

                if (cleanLines.isEmpty()) {
                    // the document is empty
                    System.err.println("Document " + rankedResults.clueWebID + " in query "
                            + queryResultContainer.qID + " is empty");
                } else {
                    // now join them back to paragraphs
                    String text = StringUtils.join(cleanLines, "\n");

                    // create JCas
                    JCas jCas = JCasFactory.createJCas();
                    jCas.setDocumentText(text);
                    jCas.setDocumentLanguage("en");

                    // annotate WebParagraph
                    SimplePipeline.runPipeline(jCas,
                            AnalysisEngineFactory.createEngineDescription(WebParagraphAnnotator.class));

                    // fill the original tag information
                    List<WebParagraph> webParagraphs = new ArrayList<>(
                            JCasUtil.select(jCas, WebParagraph.class));

                    // they must be the same size as original ones
                    if (webParagraphs.size() != lineTags.size()) {
                        throw new IllegalStateException(
                                "Different size of annotated paragraphs and original lines");
                    }

                    for (int i = 0; i < webParagraphs.size(); i++) {
                        WebParagraph p = webParagraphs.get(i);
                        // get tag
                        String tag = lineTags.get(i);

                        p.setOriginalHtmlTag(tag);
                    }

                    SimplePipeline.runPipeline(jCas,
                            AnalysisEngineFactory.createEngineDescription(StanfordSegmenter.class,
                                    // only on existing WebParagraph annotations
                                    StanfordSegmenter.PARAM_ZONE_TYPES, WebParagraph.class.getCanonicalName()));

                    // now convert to XMI
                    ByteArrayOutputStream byteOutputStream = new ByteArrayOutputStream();
                    XmiCasSerializer.serialize(jCas.getCas(), byteOutputStream);

                    // encode to base64
                    String encoded = new BASE64Encoder().encode(byteOutputStream.toByteArray());

                    rankedResults.originalXmi = encoded;
                }
            }
        }

        // and save the query to output dir
        File outputFile = new File(outputDir, queryResultContainer.qID + ".xml");
        FileUtils.writeStringToFile(outputFile, queryResultContainer.toXML(), "utf-8");
        System.out.println("Finished " + outputFile);
    }

}

From source file:LogExample.java

public static void main(String argv[]) {

    String logEntryPattern = "^([\\d.]+) (\\S+) (\\S+) \\[([\\w:/]+\\s[+\\-]\\d{4})\\] \"(.+?)\" (\\d{3}) (\\d+) \"([^\"]+)\" \"([^\"]+)\"";

    System.out.println("Using RE Pattern:");
    System.out.println(logEntryPattern);

    System.out.println("Input line is:");
    System.out.println(logEntryLine);

    Pattern p = Pattern.compile(logEntryPattern);
    Matcher matcher = p.matcher(logEntryLine);
    if (!matcher.matches() || NUM_FIELDS != matcher.groupCount()) {
        System.err.println("Bad log entry (or problem with RE?):");
        System.err.println(logEntryLine);
        return;/*w ww  .  j a  va  2 s  .c  o  m*/
    }
    System.out.println("IP Address: " + matcher.group(1));
    System.out.println("Date&Time: " + matcher.group(4));
    System.out.println("Request: " + matcher.group(5));
    System.out.println("Response: " + matcher.group(6));
    System.out.println("Bytes Sent: " + matcher.group(7));
    if (!matcher.group(8).equals("-"))
        System.out.println("Referer: " + matcher.group(8));
    System.out.println("Browser: " + matcher.group(9));
}

From source file:edu.illinois.cs.cogcomp.ner.BenchmarkOutputParser.java

/**
 * This main method will take one required argument, idenfitying the file containing 
 * the results. Optionally, "-single" may also be passed indicating it will extract
 * the F1 value for single token values only.
 * @param args//from www  . j a  v  a  2s. com
 * @throws IOException 
 */
public static void main(String[] args) throws IOException {
    parseArgs(args);
    System.out.println("L1lr,L1t,L2lr,L2t,L1 token,L2 token,F1,F2");
    for (File file : resultsfile.listFiles()) {
        if (file.getName().startsWith("L1r")) {
            File resultsfile = new File(file, "ner/results.out");
            if (resultsfile.exists()) {
                try {
                    Parameters p = parseFilename(file);
                    String lines = FileUtils.readFileToString(resultsfile);

                    // get the token level score.
                    String tokenL2 = null, tokenL1 = null;
                    Matcher matcher = l2tokenlevelpattern.matcher(lines);
                    if (matcher.find())
                        tokenL2 = matcher.group(1);
                    else {
                        matcher = ol2tokenlevelpattern.matcher(lines);
                        if (matcher.find())
                            tokenL2 = matcher.group(1);
                        else
                            System.err.println("No token level match");
                    }

                    matcher = l1tokenlevelpattern.matcher(lines);
                    if (matcher.find())
                        tokenL1 = matcher.group(1);
                    else {
                        matcher = ol1tokenlevelpattern.matcher(lines);
                        if (matcher.find())
                            tokenL1 = matcher.group(1);
                        else
                            System.err.println("No token level match");
                    }

                    matcher = phraselevelpattern.matcher(lines);
                    matcher.find();
                    String phraseL1 = matcher.group(1);
                    String phraseL2 = matcher.group(2);
                    System.out.println(
                            p.toString() + "," + tokenL1 + "," + tokenL2 + "," + phraseL1 + "," + phraseL2);
                } catch (java.lang.IllegalStateException ise) {
                    System.err.println("The results file could not be parsed : \"" + resultsfile + "\"");
                }
            } else {
                System.err.println("no results in " + resultsfile);
            }

        }
    }
}

From source file:de.mpg.escidoc.services.cone.util.CCCrawler.java

public static void main(String[] args) throws Exception {
    HttpClient httpClient = new HttpClient();

    querier = QuerierFactory.newQuerier(false);

    // field_commercial
    for (YesNo fieldCommercial : YesNo.values()) {
        // field_derivatives
        for (YesNo fieldDerivatives : YesNo.values()) {
            // field_derivatives
            for (Jurisdiction fieldJurisdiction : Jurisdiction.values()) {
                String licenceUrl = ccUrl + "&field_commercial=" + fieldCommercial.toString()
                        + "&field_derivatives=" + fieldDerivatives.toString() + "&field_jurisdiction="
                        + fieldJurisdiction.toString() + "&lang=de_DE";
                System.out.println(licenceUrl);
                GetMethod method = new GetMethod(licenceUrl);
                ProxyHelper.executeMethod(httpClient, method);

                if (method.getStatusCode() == 200) {
                    TreeFragment fragment = new TreeFragment();

                    String key1 = "urn:cone:commercial";
                    String key3 = "urn:cone:jurisdiction";

                    List<LocalizedTripleObject> list = new ArrayList<LocalizedTripleObject>();
                    list.add(new LocalizedString(fieldCommercial.toBoolean()));
                    fragment.put(key1, list);

                    List<LocalizedTripleObject> list2 = new ArrayList<LocalizedTripleObject>();
                    list2.add(new LocalizedString(fieldDerivatives.toBoolean()));
                    fragment.put("urn:cone:derivatives", list2);

                    List<LocalizedTripleObject> list3 = new ArrayList<LocalizedTripleObject>();
                    list3.add(new LocalizedString(fieldJurisdiction.toString()));
                    fragment.put(key3, list3);

                    String codeToCopy = extractCode(method);

                    Pattern urlPattern = Pattern.compile("href=\"([^\"]+)\"");
                    Matcher urlMatcher = urlPattern.matcher(codeToCopy);
                    if (urlMatcher.find()) {
                        String url = urlMatcher.group(1);
                        fragment.setSubject(url);

                        Pattern versionPattern = Pattern.compile("/(\\d+\\.\\d+)/[^/]+/$");
                        Matcher versionMatcher = versionPattern.matcher(url);
                        if (versionMatcher.find()) {
                            list = new ArrayList<LocalizedTripleObject>();
                            list.add(new LocalizedString(versionMatcher.group(1)));
                            fragment.put("urn:cone:version", list);
                        }//from  ww  w  .j  av  a 2  s .  c o  m

                        Pattern imgPattern = Pattern.compile("src=\"([^\"]+)\"");
                        Matcher imgMatcher = imgPattern.matcher(codeToCopy);
                        if (imgMatcher.find()) {
                            list = new ArrayList<LocalizedTripleObject>();
                            list.add(new LocalizedString(imgMatcher.group(1)));
                            fragment.put("http://xmlns.com/foaf/0.1/depiction", list);
                        }

                        GetMethod method2 = new GetMethod(url);
                        ProxyHelper.executeMethod(httpClient, method2);
                        String page = method2.getResponseBodyAsString();

                        Pattern namePattern = Pattern.compile("<h2 property=\"dc:title\">([^<]+)</h2>");
                        Matcher nameMatcher = namePattern.matcher(page);
                        if (nameMatcher.find()) {
                            list = new ArrayList<LocalizedTripleObject>();
                            list.add(new LocalizedString(nameMatcher.group(1)));
                            fragment.put("http://purl.org/dc/elements/1.1/title", list);
                        }

                        List<LocalizedTripleObject> languages = extractLanguages(page, url);

                        fragment.put("urn:cone:translation", languages);

                        querier.delete("cclicences", url);
                        querier.create("cclicences", url, fragment);
                    }

                } else {
                    System.out.println("Not found: " + licenceUrl);
                }
            }
        }
    }
    querier.release();
}

From source file:com.bigdata.rdf.sail.webapp.AbstractProtocolTest.java

/**
 * Sanity check the {@link #charset} pattern
 * @param argv// w w  w.  jav  a 2  s .c  om
 */
public static void main(String argv[]) {
    for (final String t : new String[] { "text/html ; charset=iso-8856-1",
            "text/html ; charset=iso-8856-1; foo = bar", "text/html ;charset=iso-8856-1; foo = bar",
            "text/html ; charset= \"iso-8856-1\"", "text/html ; charset=iso-8856-1; foo = bar",
            "text/html ; charset = iso-8856-1; foo = bar", "text/html ; foo = bar", "text/html",

    }) {
        final Matcher m = charset.matcher(t);
        System.err.println(t + " ====> " + (m.find() ? m.group(1) : ""));
    }
}

From source file:net.cloudkit.relaxation.HttpClientTest.java

public static void main(String[] args) throws Exception {

    InetAddress[] addresss = InetAddress.getAllByName("google.com");
    for (InetAddress address : addresss) {

        System.out.println(address);

    }//  w  w  w. j a v a  2  s  . c  o m

    CloseableHttpClient httpclient = HttpClients.createDefault();

    String __VIEWSTATE = "";
    String __EVENTVALIDATION = "";

    HttpGet httpGet = new HttpGet("http://query.customs.gov.cn/MNFTQ/MRoadQuery.aspx?" + Math.random() * 1000);
    httpGet.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
    httpGet.setHeader("Accept-Encoding", "gzip, deflate, sdch");
    httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.8,en;q=0.6");
    httpGet.setHeader("Cache-Control", "no-cache");
    // httpGet.setHeader("Connection", "keep-alive");
    httpGet.setHeader("Host", "query.customs.gov.cn");
    httpGet.setHeader("Pragma", "no-cache");
    httpGet.setHeader("Upgrade-Insecure-Requests", "1");
    httpGet.setHeader("User-Agent",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36");

    HttpClientContext context = HttpClientContext.create();
    // CloseableHttpResponse response1 = httpclient.execute(httpGet, context);
    CloseableHttpResponse response1 = httpclient.execute(httpGet);
    // Header[] headers = response1.getHeaders(HttpHeaders.CONTENT_TYPE);
    // System.out.println("context cookies:" + context.getCookieStore().getCookies());
    // String setCookie = response1.getFirstHeader("Set-Cookie").getValue();
    // System.out.println("context cookies:" + setCookie);

    try {
        System.out.println(response1.getStatusLine());
        HttpEntity entity1 = response1.getEntity();
        // do something useful with the response body and ensure it is fully consumed

        String result = IOUtils.toString(entity1.getContent(), "GBK");
        // System.out.println(result);

        Matcher m1 = Pattern.compile(
                "<input type=\\\"hidden\\\" name=\\\"__VIEWSTATE\\\" id=\\\"__VIEWSTATE\\\" value=\\\"(.*)\\\" />")
                .matcher(result);
        __VIEWSTATE = m1.find() ? m1.group(1) : "";
        Matcher m2 = Pattern.compile(
                "<input type=\\\"hidden\\\" name=\\\"__EVENTVALIDATION\\\" id=\\\"__EVENTVALIDATION\\\" value=\\\"(.*)\\\" />")
                .matcher(result);
        __EVENTVALIDATION = m2.find() ? m2.group(1) : "";

        System.out.println(__VIEWSTATE);
        System.out.println(__EVENTVALIDATION);

        /*
        File storeFile = new File("D:\\customs\\customs"+ i +".jpg");
        FileOutputStream output = new FileOutputStream(storeFile);
        IOUtils.copy(input, output);
        output.close();
        */
        EntityUtils.consume(entity1);
    } finally {
        response1.close();
    }

    HttpPost httpPost = new HttpPost(
            "http://query.customs.gov.cn/MNFTQ/MRoadQuery.aspx?" + Math.random() * 1000);
    httpPost.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
    httpPost.setHeader("Accept-Encoding", "gzip, deflate");
    httpPost.setHeader("Accept-Language", "zh-CN,zh;q=0.8,en;q=0.6");
    httpPost.setHeader("Cache-Control", "no-cache");
    // httpPost.setHeader("Connection", "keep-alive");
    httpPost.setHeader("Content-Type", "application/x-www-form-urlencoded");
    httpPost.setHeader("Cookie", "ASP.NET_SessionId=t1td453hcuy4oqiplekkqe55");
    httpPost.setHeader("Host", "query.customs.gov.cn");
    httpPost.setHeader("Origin", "http://query.customs.gov.cn");
    httpPost.setHeader("Pragma", "no-cache");
    httpPost.setHeader("Referer", "http://query.customs.gov.cn/MNFTQ/MRoadQuery.aspx");
    httpPost.setHeader("Upgrade-Insecure-Requests", "1");
    httpPost.setHeader("User-Agent",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36");

    List<NameValuePair> nvps = new ArrayList<NameValuePair>();
    nvps.add(new BasicNameValuePair("__VIEWSTATE", __VIEWSTATE));
    nvps.add(new BasicNameValuePair("__EVENTVALIDATION", __EVENTVALIDATION));
    nvps.add(new BasicNameValuePair("ScrollTop", ""));
    nvps.add(new BasicNameValuePair("__essVariable", ""));
    nvps.add(new BasicNameValuePair("MRoadQueryCtrl1$txtManifestID", "5100312462240"));
    nvps.add(new BasicNameValuePair("MRoadQueryCtrl1$txtBillNo", "7PH650021105"));
    nvps.add(new BasicNameValuePair("MRoadQueryCtrl1$txtCode", "a778"));
    nvps.add(new BasicNameValuePair("MRoadQueryCtrl1$btQuery", "   "));
    nvps.add(new BasicNameValuePair("select", ""));
    nvps.add(new BasicNameValuePair("select1", ""));
    nvps.add(new BasicNameValuePair("select2", ""));
    nvps.add(new BasicNameValuePair("select3", ""));
    nvps.add(new BasicNameValuePair("select4", ""));
    nvps.add(new BasicNameValuePair("select5", "??"));
    nvps.add(new BasicNameValuePair("select6", ""));
    nvps.add(new BasicNameValuePair("select7", ""));
    nvps.add(new BasicNameValuePair("select8", ""));

    httpPost.setEntity(new UrlEncodedFormEntity(nvps, "GBK"));

    CloseableHttpResponse response2 = httpclient.execute(httpPost);

    try {
        System.out.println(response2.getStatusLine());
        HttpEntity entity2 = response2.getEntity();
        // do something useful with the response body
        // and ensure it is fully consumed
        // System.out.println(entity2.getContent());
        System.out.println(IOUtils.toString(response2.getEntity().getContent(), "GBK"));

        EntityUtils.consume(entity2);
    } finally {
        response2.close();
    }

}

From source file:uk.ac.ebi.intact.editor.controller.misc.MyNotesController.java

public static void main(String[] args) {
    Pattern p = Pattern.compile("\\{(\\w+):(\\w+)\\s(.+)\\}");

    Matcher matcher = p.matcher(
            "{query:Lalalala select exp from Experiment exp where exp.bioSource.cvTissue.ac = 'EBI-2609142'}");

    while (matcher.find()) {
        System.out.println(matcher.group(1));
        System.out.println(matcher.group(2));
        System.out.println(matcher.group(3));
    }/*from  ww  w  .  j  a  v  a 2 s  . c om*/
}

From source file:RegExpExample.java

public static void main(String args[]) {
    String fileName = "RETestSource.java";

    String unadornedClassRE = "^\\s*class (\\w+)";
    String doubleIdentifierRE = "\\b(\\w+)\\s+\\1\\b";

    Pattern classPattern = Pattern.compile(unadornedClassRE);
    Pattern doublePattern = Pattern.compile(doubleIdentifierRE);
    Matcher classMatcher, doubleMatcher;

    int lineNumber = 0;

    try {/*from w w  w  . j av  a  2s  .c  o  m*/
        BufferedReader br = new BufferedReader(new FileReader(fileName));
        String line;

        while ((line = br.readLine()) != null) {
            lineNumber++;

            classMatcher = classPattern.matcher(line);
            doubleMatcher = doublePattern.matcher(line);

            if (classMatcher.find()) {
                System.out.println("The class [" + classMatcher.group(1) + "] is not public");
            }

            while (doubleMatcher.find()) {
                System.out.println("The word \"" + doubleMatcher.group(1) + "\" occurs twice at position "
                        + doubleMatcher.start() + " on line " + lineNumber);
            }
        }
    } catch (IOException ioe) {
        System.out.println("IOException: " + ioe);
        ioe.printStackTrace();
    }
}