Example usage for java.util.regex Matcher find

List of usage examples for java.util.regex Matcher find

Introduction

In this page you can find the example usage for java.util.regex Matcher find.

Prototype

public boolean find() 

Source Link

Document

Attempts to find the next subsequence of the input sequence that matches the pattern.

Usage

From source file:BGrep.java

public static void main(String[] args) {
    String encodingName = "UTF-8"; // Default to UTF-8 encoding
    int flags = Pattern.MULTILINE; // Default regexp flags

    try { // Fatal exceptions are handled after this try block
        // First, process any options
        int nextarg = 0;
        while (args[nextarg].charAt(0) == '-') {
            String option = args[nextarg++];
            if (option.equals("-e")) {
                encodingName = args[nextarg++];
            } else if (option.equals("-i")) { // case-insensitive matching
                flags |= Pattern.CASE_INSENSITIVE;
            } else if (option.equals("-s")) { // Strict Unicode processing
                flags |= Pattern.UNICODE_CASE; // case-insensitive Unicode
                flags |= Pattern.CANON_EQ; // canonicalize Unicode
            } else {
                System.err.println("Unknown option: " + option);
                usage();//w ww  .  j a va  2s.  c  om
            }
        }

        // Get the Charset for converting bytes to chars
        Charset charset = Charset.forName(encodingName);

        // Next argument must be a regexp. Compile it to a Pattern object
        Pattern pattern = Pattern.compile(args[nextarg++], flags);

        // Require that at least one file is specified
        if (nextarg == args.length)
            usage();

        // Loop through each of the specified filenames
        while (nextarg < args.length) {
            String filename = args[nextarg++];
            CharBuffer chars; // This will hold complete text of the file
            try { // Handle per-file errors locally
                // Open a FileChannel to the named file
                FileInputStream stream = new FileInputStream(filename);
                FileChannel f = stream.getChannel();

                // Memory-map the file into one big ByteBuffer. This is
                // easy but may be somewhat inefficient for short files.
                ByteBuffer bytes = f.map(FileChannel.MapMode.READ_ONLY, 0, f.size());

                // We can close the file once it is is mapped into memory.
                // Closing the stream closes the channel, too.
                stream.close();

                // Decode the entire ByteBuffer into one big CharBuffer
                chars = charset.decode(bytes);
            } catch (IOException e) { // File not found or other problem
                System.err.println(e); // Print error message
                continue; // and move on to the next file
            }

            // This is the basic regexp loop for finding all matches in a
            // CharSequence. Note that CharBuffer implements CharSequence.
            // A Matcher holds state for a given Pattern and text.
            Matcher matcher = pattern.matcher(chars);
            while (matcher.find()) { // While there are more matches
                // Print out details of the match
                System.out.println(filename + ":" + // file name
                        matcher.start() + ": " + // character pos
                        matcher.group()); // matching text
            }
        }
    }
    // These are the things that can go wrong in the code above
    catch (UnsupportedCharsetException e) { // Bad encoding name
        System.err.println("Unknown encoding: " + encodingName);
    } catch (PatternSyntaxException e) { // Bad pattern
        System.err.println("Syntax error in search pattern:\n" + e.getMessage());
    } catch (ArrayIndexOutOfBoundsException e) { // Wrong number of arguments
        usage();
    }
}

From source file:eu.annocultor.converters.geonames.GeonamesDumpToRdf.java

public static void main(String[] args) throws Exception {
    File root = new File("input_source");

    // load country-continent match
    countryToContinent/*from   ww  w .jav  a 2s.  c o m*/
            .load((new GeonamesDumpToRdf()).getClass().getResourceAsStream("/country-to-continent.properties"));

    // creating files
    Map<String, BufferedWriter> files = new HashMap<String, BufferedWriter>();
    Map<String, Boolean> started = new HashMap<String, Boolean>();

    for (Object string : countryToContinent.keySet()) {
        String continent = countryToContinent.getProperty(string.toString());
        File dir = new File(root, continent);
        if (!dir.exists()) {
            dir.mkdir();
        }
        files.put(string.toString(), new BufferedWriter(new OutputStreamWriter(
                new FileOutputStream(new File(root, continent + "/" + string + ".rdf")), "UTF-8")));
        System.out.println(continent + "/" + string + ".rdf");
        started.put(string.toString(), false);
    }

    System.out.println(started);

    Pattern countryPattern = Pattern
            .compile("<inCountry rdf\\:resource\\=\"http\\://www\\.geonames\\.org/countries/\\#(\\w\\w)\"/>");
    long counter = 0;
    LineIterator it = FileUtils.lineIterator(new File(root, "all-geonames-rdf.txt"), "UTF-8");
    try {
        while (it.hasNext()) {
            String text = it.nextLine();
            if (text.startsWith("http://sws.geonames"))
                continue;

            // progress
            counter++;
            if (counter % 100000 == 0) {
                System.out.print("*");
            }
            //         System.out.println(counter);
            // get country
            String country = null;
            Matcher matcher = countryPattern.matcher(text);
            if (matcher.find()) {
                country = matcher.group(1);
            }
            //         System.out.println(country);
            if (country == null)
                country = "null";
            text = text.replace("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?><rdf:RDF",
                    "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?><rdf:RDF");
            if (started.get(country) == null)
                throw new Exception("Unknow country " + country);
            if (started.get(country).booleanValue()) {
                // remove RDF opening
                text = text.substring(text.indexOf("<rdf:RDF "));
                text = text.substring(text.indexOf(">") + 1);
            }
            // remove RDF ending
            text = text.substring(0, text.indexOf("</rdf:RDF>"));
            files.get(country).append(text + "\n");
            if (!started.get(country).booleanValue()) {
                // System.out.println("Started with country " + country);
            }
            started.put(country, true);
        }
    } finally {
        LineIterator.closeQuietly(it);
    }

    for (Object string : countryToContinent.keySet()) {
        boolean hasStarted = started.get(string.toString()).booleanValue();
        if (hasStarted) {
            BufferedWriter bf = files.get(string.toString());
            bf.append("</rdf:RDF>");
            bf.flush();
            bf.close();
        }
    }
    return;
}

From source file:gobblin.util.CLIPasswordEncryptor.java

public static void main(String[] args) throws ParseException {
    CommandLine cl = parseArgs(args);/*  www . j  ava2 s .com*/
    if (shouldPrintUsageAndExit(cl)) {
        printUsage();
        return;
    }
    String masterPassword = getMasterPassword(cl);
    TextEncryptor encryptor = getEncryptor(cl, masterPassword);

    if (cl.hasOption(ENCRYPTED_PWD_OPTION)) {
        Matcher matcher = ENCRYPTED_PATTERN.matcher(cl.getOptionValue(ENCRYPTED_PWD_OPTION));
        if (matcher.find()) {
            String encrypted = matcher.group(1);
            System.out.println(encryptor.decrypt(encrypted));
        } else {
            throw new RuntimeException("Input encrypted password does not match pattern \"ENC(...)\"");
        }
    } else if (cl.hasOption(PLAIN_PWD_OPTION)) {
        System.out.println("ENC(" + encryptor.encrypt(cl.getOptionValue(PLAIN_PWD_OPTION)) + ")");
    } else {
        printUsage();
        throw new RuntimeException(
                String.format("Must provide -%s or -%s option.", PLAIN_PWD_OPTION, ENCRYPTED_PWD_OPTION));
    }
}

From source file:WordCount.java

public static void main(String args[]) throws Exception {
    String filename = "WordCount.java";

    // Map File from filename to byte buffer
    FileInputStream input = new FileInputStream(filename);
    FileChannel channel = input.getChannel();
    int fileLength = (int) channel.size();
    MappedByteBuffer buffer = channel.map(FileChannel.MapMode.READ_ONLY, 0, fileLength);

    // Convert to character buffer
    Charset charset = Charset.forName("ISO-8859-1");
    CharsetDecoder decoder = charset.newDecoder();
    CharBuffer charBuffer = decoder.decode(buffer);

    // Create line pattern
    Pattern linePattern = Pattern.compile(".*$", Pattern.MULTILINE);

    // Create word pattern
    Pattern wordBreakPattern = Pattern.compile("[\\p{Punct}\\s}]");

    // Match line pattern to buffer
    Matcher lineMatcher = linePattern.matcher(charBuffer);

    Map map = new TreeMap();
    Integer ONE = new Integer(1);

    // For each line
    while (lineMatcher.find()) {
        // Get line
        CharSequence line = lineMatcher.group();

        // Get array of words on line
        String words[] = wordBreakPattern.split(line);

        // For each word
        for (int i = 0, n = words.length; i < n; i++) {
            if (words[i].length() > 0) {
                Integer frequency = (Integer) map.get(words[i]);
                if (frequency == null) {
                    frequency = ONE;//from  w w  w . j  a v a 2 s  . c o m
                } else {
                    int value = frequency.intValue();
                    frequency = new Integer(value + 1);
                }
                map.put(words[i], frequency);
            }
        }
    }
    System.out.println(map);
}

From source file:com.github.liyp.test.TestMain.java

@SuppressWarnings("unchecked")
public static void main(String[] args) {
    // add a shutdown hook to stop the server
    Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
        @Override//from w w  w .j a  va2  s .c  o  m
        public void run() {
            System.out.println("########### shoutdown begin....");
            try {
                Thread.sleep(10000);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            System.out.println("########### shoutdown end....");
        }
    }));

    System.out.println(args.length);
    Iterator<String> iterator1 = IteratorUtils
            .arrayIterator(new String[] { "one", "two", "three", "11", "22", "AB" });
    Iterator<String> iterator2 = IteratorUtils.arrayIterator(new String[] { "a", "b", "c", "33", "ab", "aB" });

    Iterator<String> chainedIter = IteratorUtils.chainedIterator(iterator1, iterator2);

    System.out.println("==================");

    Iterator<String> iter = IteratorUtils.filteredIterator(chainedIter, new Predicate() {
        @Override
        public boolean evaluate(Object arg0) {
            System.out.println("xx:" + arg0.toString());
            String str = (String) arg0;
            return str.matches("([a-z]|[A-Z]){2}");
        }
    });
    while (iter.hasNext()) {
        System.out.println(iter.next());
    }

    System.out.println("===================");

    System.out.println("asas".matches("[a-z]{4}"));

    System.out.println("Y".equals(null));

    System.out.println(String.format("%02d", 1000L));

    System.out.println(ArrayUtils.toString(splitAndTrim(" 11, 21,12 ,", ",")));

    System.out.println(new ArrayList<String>().toString());

    JSONObject json = new JSONObject("{\"keynull\":null}");
    json.put("bool", false);
    json.put("keya", "as");
    json.put("key2", 2212222222222222222L);
    System.out.println(json);
    System.out.println(json.get("keynull").equals(null));

    String a = String.format("{\"id\":%d,\"method\":\"testCrossSync\"," + "\"circle\":%d},\"isEnd\":true", 1,
            1);
    System.out.println(a.getBytes().length);

    System.out.println(new String[] { "a", "b" });

    System.out.println(new JSONArray("[\"aa\",\"\"]"));

    String data = String.format("%9d %s", 1, RandomStringUtils.randomAlphanumeric(10));
    System.out.println(data.getBytes().length);

    System.out.println(ArrayUtils.toString("1|2| 3|  333||| 3".split("\\|")));

    JSONObject j1 = new JSONObject("{\"a\":\"11111\"}");
    JSONObject j2 = new JSONObject(j1.toString());
    j2.put("b", "22222");
    System.out.println(j1 + " | " + j2);

    System.out.println("======================");

    String regex = "\\d+(\\-\\d+){2} \\d+(:\\d+){2}";
    Pattern pattern = Pattern.compile(regex);
    Matcher matcher = pattern.matcher("2015-12-28 15:46:14  _NC250_MD:motion de\n");
    String eventDate = matcher.find() ? matcher.group() : "";

    System.out.println(eventDate);
}

From source file:com.wittawat.wordseg.Main.java

public static void main(String[] args) throws Exception {
    Console con = System.console();
    if (con == null) {
        System.out.println("The system must support console to run the program.");
        System.exit(1);/*from w  ww . ja  va 2  s  .com*/
    }
    // Load model
    System.out.println("Loading model ...");
    Classifier model = Data.getDefaultModel();

    System.out.println("Finished loading model.");
    System.out.println(getAgreement());

    boolean isUseDict = true;

    // Dummy statement to eliminate all lazy loading
    System.out.println("\n" + new NukeTokenizer3(
            "?????",
            model, isUseDict).tokenize() + "\n");

    System.out.println(getHelp());

    final String SET_DICT_PAT_STR = "\\s*set\\s+dict\\s+(true|false)\\s*";
    final Pattern SET_DICT_PAT = Pattern.compile(SET_DICT_PAT_STR);
    while (true) {
        System.out.print(">> ");
        String line = con.readLine();
        if (line != null && !line.trim().equals("")) {

            line = line.trim();
            try {
                if (line.equals("h") || line.equals("help")) {
                    System.out.println(getHelp());
                } else if (line.equals("about")) {
                    System.out.println(getAbout());
                } else if (line.equals("agreement")) {
                    System.out.println(getAgreement());
                } else if (SET_DICT_PAT.matcher(line).find()) {
                    Matcher m = SET_DICT_PAT.matcher(line);
                    m.find();
                    String v = m.group(1);
                    isUseDict = v.equals("true");
                    System.out.println("Dictionary will " + (isUseDict ? "" : "not ") + "be used.");
                } else if (line.matches("q|quit|exit")) {
                    System.out.println("Bye");
                    System.exit(0);
                } else if (line.contains(":tokfile:")) {
                    String[] splits = line.split(":tokfile:");
                    String in = splits[0];
                    String out = splits[1];
                    String content = FileUtils.readFileToString(new File(in));
                    long start = new Date().getTime();

                    NukeTokenizer tokenizer = new NukeTokenizer3(content, model, isUseDict);

                    String tokenized = tokenizer.tokenize();
                    long end = new Date().getTime();
                    System.out.println("Time to tokenize: " + (end - start) + " ms.");
                    FileUtils.writeStringToFile(new File(out), tokenized);
                } else if (line.contains(":tokfile")) {
                    String[] splits = line.split(":tokfile");
                    String in = splits[0];

                    String content = FileUtils.readFileToString(new File(in));
                    long start = new Date().getTime();
                    NukeTokenizer tokenizer = new NukeTokenizer3(content, model, isUseDict);
                    String tokenized = tokenizer.tokenize();
                    long end = new Date().getTime();

                    System.out.println(tokenized);
                    System.out.println("Time to tokenize: " + (end - start) + " ms.");
                } else if (line.contains(":tok:")) {
                    String[] splits = line.split(":tok:");
                    String inText = splits[0];
                    String out = splits[1];

                    long start = new Date().getTime();
                    NukeTokenizer tokenizer = new NukeTokenizer3(inText, model, isUseDict);
                    String tokenized = tokenizer.tokenize();
                    long end = new Date().getTime();
                    System.out.println("Time to tokenize: " + (end - start) + " ms.");
                    FileUtils.writeStringToFile(new File(out), tokenized);
                } else if (line.contains(":tok")) {
                    String[] splits = line.split(":tok");
                    String inText = splits[0];

                    long start = new Date().getTime();
                    NukeTokenizer tokenizer = new NukeTokenizer3(inText, model, isUseDict);
                    String tokenized = tokenizer.tokenize();
                    long end = new Date().getTime();

                    System.out.println(tokenized);
                    System.out.println("Time to tokenize: " + (end - start) + " ms.");
                } else {
                    System.out.println("Unknown command");
                }
            } catch (Exception e) {
                System.out.println("Error. See the exception.");
                e.printStackTrace();
            }

        }
    }

}

From source file:com.hp.avmon.trap.service.TrapService.java

public static void main(String[] args) {
    String text = "{3}123{3}{10}";

    Pattern p = Pattern.compile(".*?(\\{.+?\\})");

    Matcher m = p.matcher(text);
    while (m.find()) {
        System.out.println(m.group(1));
    }//ww  w. j av a 2s  . co m
}

From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step5LinguisticPreprocessing.java

public static void main(String[] args) throws Exception {
    // input dir - list of xml query containers
    // step4-boiler-plate/
    File inputDir = new File(args[0]);

    // output dir
    File outputDir = new File(args[1]);
    if (!outputDir.exists()) {
        outputDir.mkdirs();/*w  w w .  j a v  a2 s  . c  o  m*/
    }

    // iterate over query containers
    for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) {
        QueryResultContainer queryResultContainer = QueryResultContainer
                .fromXML(FileUtils.readFileToString(f, "utf-8"));

        for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) {
            //                System.out.println(rankedResults.plainText);

            if (rankedResults.plainText != null) {
                String[] lines = StringUtils.split(rankedResults.plainText, "\n");

                // collecting all cleaned lines
                List<String> cleanLines = new ArrayList<>(lines.length);
                // collecting line tags
                List<String> lineTags = new ArrayList<>(lines.length);

                for (String line : lines) {
                    // get the tag
                    String tag = null;
                    Matcher m = OPENING_TAG_PATTERN.matcher(line);

                    if (m.find()) {
                        tag = m.group(1);
                    }

                    if (tag == null) {
                        throw new IllegalArgumentException("No html tag found for line:\n" + line);
                    }

                    // replace the tag at the beginning and the end
                    String noTagText = line.replaceAll("^<\\S+>", "").replaceAll("</\\S+>$", "");

                    // do some html cleaning
                    noTagText = noTagText.replaceAll("&nbsp;", " ");

                    noTagText = noTagText.trim();

                    // add to the output
                    if (!noTagText.isEmpty()) {
                        cleanLines.add(noTagText);
                        lineTags.add(tag);
                    }
                }

                if (cleanLines.isEmpty()) {
                    // the document is empty
                    System.err.println("Document " + rankedResults.clueWebID + " in query "
                            + queryResultContainer.qID + " is empty");
                } else {
                    // now join them back to paragraphs
                    String text = StringUtils.join(cleanLines, "\n");

                    // create JCas
                    JCas jCas = JCasFactory.createJCas();
                    jCas.setDocumentText(text);
                    jCas.setDocumentLanguage("en");

                    // annotate WebParagraph
                    SimplePipeline.runPipeline(jCas,
                            AnalysisEngineFactory.createEngineDescription(WebParagraphAnnotator.class));

                    // fill the original tag information
                    List<WebParagraph> webParagraphs = new ArrayList<>(
                            JCasUtil.select(jCas, WebParagraph.class));

                    // they must be the same size as original ones
                    if (webParagraphs.size() != lineTags.size()) {
                        throw new IllegalStateException(
                                "Different size of annotated paragraphs and original lines");
                    }

                    for (int i = 0; i < webParagraphs.size(); i++) {
                        WebParagraph p = webParagraphs.get(i);
                        // get tag
                        String tag = lineTags.get(i);

                        p.setOriginalHtmlTag(tag);
                    }

                    SimplePipeline.runPipeline(jCas,
                            AnalysisEngineFactory.createEngineDescription(StanfordSegmenter.class,
                                    // only on existing WebParagraph annotations
                                    StanfordSegmenter.PARAM_ZONE_TYPES, WebParagraph.class.getCanonicalName()));

                    // now convert to XMI
                    ByteArrayOutputStream byteOutputStream = new ByteArrayOutputStream();
                    XmiCasSerializer.serialize(jCas.getCas(), byteOutputStream);

                    // encode to base64
                    String encoded = new BASE64Encoder().encode(byteOutputStream.toByteArray());

                    rankedResults.originalXmi = encoded;
                }
            }
        }

        // and save the query to output dir
        File outputFile = new File(outputDir, queryResultContainer.qID + ".xml");
        FileUtils.writeStringToFile(outputFile, queryResultContainer.toXML(), "utf-8");
        System.out.println("Finished " + outputFile);
    }

}

From source file:edu.illinois.cs.cogcomp.ner.BenchmarkOutputParser.java

/**
 * This main method will take one required argument, idenfitying the file containing 
 * the results. Optionally, "-single" may also be passed indicating it will extract
 * the F1 value for single token values only.
 * @param args//from w ww  .java  2s  . c  o m
 * @throws IOException 
 */
public static void main(String[] args) throws IOException {
    parseArgs(args);
    System.out.println("L1lr,L1t,L2lr,L2t,L1 token,L2 token,F1,F2");
    for (File file : resultsfile.listFiles()) {
        if (file.getName().startsWith("L1r")) {
            File resultsfile = new File(file, "ner/results.out");
            if (resultsfile.exists()) {
                try {
                    Parameters p = parseFilename(file);
                    String lines = FileUtils.readFileToString(resultsfile);

                    // get the token level score.
                    String tokenL2 = null, tokenL1 = null;
                    Matcher matcher = l2tokenlevelpattern.matcher(lines);
                    if (matcher.find())
                        tokenL2 = matcher.group(1);
                    else {
                        matcher = ol2tokenlevelpattern.matcher(lines);
                        if (matcher.find())
                            tokenL2 = matcher.group(1);
                        else
                            System.err.println("No token level match");
                    }

                    matcher = l1tokenlevelpattern.matcher(lines);
                    if (matcher.find())
                        tokenL1 = matcher.group(1);
                    else {
                        matcher = ol1tokenlevelpattern.matcher(lines);
                        if (matcher.find())
                            tokenL1 = matcher.group(1);
                        else
                            System.err.println("No token level match");
                    }

                    matcher = phraselevelpattern.matcher(lines);
                    matcher.find();
                    String phraseL1 = matcher.group(1);
                    String phraseL2 = matcher.group(2);
                    System.out.println(
                            p.toString() + "," + tokenL1 + "," + tokenL2 + "," + phraseL1 + "," + phraseL2);
                } catch (java.lang.IllegalStateException ise) {
                    System.err.println("The results file could not be parsed : \"" + resultsfile + "\"");
                }
            } else {
                System.err.println("no results in " + resultsfile);
            }

        }
    }
}

From source file:ReplaceDemo.java

public static void main(String[] argv) {

    // Make an RE pattern to match almost any form (deamon, demon, etc.).
    String patt = "d[ae]{1,2}mon"; // i.e., 1 or 2 'a' or 'e' any combo

    // A test input.
    String input = "Unix hath demons and deamons in it!";
    System.out.println("Input: " + input);

    // Run it from a RE instance and see that it works
    Pattern r = Pattern.compile(patt);
    Matcher m = r.matcher(input);
    System.out.println("ReplaceAll: " + m.replaceAll("daemon"));

    // Show the appendReplacement method
    m.reset();// ww  w  . j  a v  a2 s.  c o  m
    StringBuffer sb = new StringBuffer();
    System.out.print("Append methods: ");
    while (m.find()) {
        m.appendReplacement(sb, "daemon"); // Copy to before first match,
        // plus the word "daemon"
    }
    m.appendTail(sb); // copy remainder
    System.out.println(sb.toString());
}