List of usage examples for java.util.regex Pattern CANON_EQ
int CANON_EQ
To view the source code for java.util.regex Pattern CANON_EQ.
Click Source Link
From source file:MainClass.java
public static void main(String[] args) { String pattStr = "\u00e9gal"; // gal String[] input = { "\u00e9gal", // gal - this one had better match :-) "e\u0301gal", // e + "Combining acute accent" "e\u02cagal", // e + "modifier letter acute accent" "e'gal", // e + single quote "e\u00b4gal", // e + Latin-1 "acute" };//from w w w. j a v a 2 s. c om Pattern pattern = Pattern.compile(pattStr, Pattern.CANON_EQ); for (int i = 0; i < input.length; i++) { if (pattern.matcher(input[i]).matches()) { System.out.println(pattStr + " matches input " + input[i]); } else { System.out.println(pattStr + " does not match input " + input[i]); } } }
From source file:BGrep.java
public static void main(String[] args) { String encodingName = "UTF-8"; // Default to UTF-8 encoding int flags = Pattern.MULTILINE; // Default regexp flags try { // Fatal exceptions are handled after this try block // First, process any options int nextarg = 0; while (args[nextarg].charAt(0) == '-') { String option = args[nextarg++]; if (option.equals("-e")) { encodingName = args[nextarg++]; } else if (option.equals("-i")) { // case-insensitive matching flags |= Pattern.CASE_INSENSITIVE; } else if (option.equals("-s")) { // Strict Unicode processing flags |= Pattern.UNICODE_CASE; // case-insensitive Unicode flags |= Pattern.CANON_EQ; // canonicalize Unicode } else { System.err.println("Unknown option: " + option); usage();//from www . j a v a 2 s. c om } } // Get the Charset for converting bytes to chars Charset charset = Charset.forName(encodingName); // Next argument must be a regexp. Compile it to a Pattern object Pattern pattern = Pattern.compile(args[nextarg++], flags); // Require that at least one file is specified if (nextarg == args.length) usage(); // Loop through each of the specified filenames while (nextarg < args.length) { String filename = args[nextarg++]; CharBuffer chars; // This will hold complete text of the file try { // Handle per-file errors locally // Open a FileChannel to the named file FileInputStream stream = new FileInputStream(filename); FileChannel f = stream.getChannel(); // Memory-map the file into one big ByteBuffer. This is // easy but may be somewhat inefficient for short files. ByteBuffer bytes = f.map(FileChannel.MapMode.READ_ONLY, 0, f.size()); // We can close the file once it is is mapped into memory. // Closing the stream closes the channel, too. stream.close(); // Decode the entire ByteBuffer into one big CharBuffer chars = charset.decode(bytes); } catch (IOException e) { // File not found or other problem System.err.println(e); // Print error message continue; // and move on to the next file } // This is the basic regexp loop for finding all matches in a // CharSequence. Note that CharBuffer implements CharSequence. // A Matcher holds state for a given Pattern and text. Matcher matcher = pattern.matcher(chars); while (matcher.find()) { // While there are more matches // Print out details of the match System.out.println(filename + ":" + // file name matcher.start() + ": " + // character pos matcher.group()); // matching text } } } // These are the things that can go wrong in the code above catch (UnsupportedCharsetException e) { // Bad encoding name System.err.println("Unknown encoding: " + encodingName); } catch (PatternSyntaxException e) { // Bad pattern System.err.println("Syntax error in search pattern:\n" + e.getMessage()); } catch (ArrayIndexOutOfBoundsException e) { // Wrong number of arguments usage(); } }
From source file:CanonEqDemo.java
public static void main(String[] args) { String pattStr = "\u00e9gal"; // Zgal String[] input = { "\u00e9gal", // Zgal - this one had better match :-) "e\u0301gal", // e + "Combining acute accent" "e\u02cagal", // e + "modifier letter acute accent" "e'gal", // e + single quote "e\u00b4gal", // e + Latin-1 "acute" };/* w w w . j a v a 2 s.c om*/ Pattern pattern = Pattern.compile(pattStr, Pattern.CANON_EQ); for (int i = 0; i < input.length; i++) { if (pattern.matcher(input[i]).matches()) { System.out.println(pattStr + " matches input " + input[i]); } else { System.out.println(pattStr + " does not match input " + input[i]); } } }
From source file:Normalization.TextNormalization.java
public String removeEmojiFromString(String content) { String utf8tweet = ""; try {/*from w w w . j a v a 2s . c o m*/ byte[] utf8Bytes = content.getBytes("UTF-8"); utf8tweet = new String(utf8Bytes, "UTF-8"); } catch (UnsupportedEncodingException e) { } Pattern unicodeOutliers = Pattern.compile( "[\ud83c\udc00-\ud83c\udfff]|[\ud83d\udc00-\ud83d\udfff]|[\u2600-\u27ff]", Pattern.UNICODE_CASE | Pattern.CANON_EQ | Pattern.CASE_INSENSITIVE); Matcher unicodeOutlierMatcher = unicodeOutliers.matcher(utf8tweet); utf8tweet = unicodeOutlierMatcher.replaceAll(""); return utf8tweet; }
From source file:nz.net.orcon.kanban.automation.actions.RegexAction.java
public String extract(String text, String expressionString, int match, int group, String options) throws IOException { if (text == null) { text = ""; }//from w w w . j av a2 s .c o m if (expressionString == null) { throw new IllegalArgumentException( "No Regular Expression has been provided to carry out this operation."); } int optionsInEffect = 0; if (options != null) { for (String option : options.toUpperCase().split("\\|")) { optionsInEffect |= (option.equals("CANON_EQ")) ? Pattern.CANON_EQ : (option.equals("CASE_INSENSITIVE")) ? Pattern.CASE_INSENSITIVE : (option.equals("COMMENTS")) ? Pattern.COMMENTS : (option.equals("DOTALL")) ? Pattern.DOTALL : (option.equals("LITERAL")) ? Pattern.LITERAL : (option.equals("MULTILINE")) ? Pattern.MULTILINE : (option.equals("UNICODE_CASE")) ? Pattern.UNICODE_CASE : (option.equals("UNIX_LINES")) ? Pattern.UNIX_LINES : 0; } } Pattern expression = Pattern.compile(expressionString, optionsInEffect); Matcher matches = expression.matcher(text); int matchIndex = 1; while (matches.find()) { for (int groupIndex = 0; matches.groupCount() + 1 > groupIndex; groupIndex++) { if (matchIndex == match && groupIndex == group) { return matches.group(groupIndex); } } matchIndex++; } return ""; }
From source file:org.opennms.netmgt.provision.adapters.link.config.linkadapter.LinkPattern.java
/** * <p>setPattern</p>/*from w w w. ja v a2 s .c o m*/ * * @param pattern a {@link java.lang.String} object. */ public void setPattern(final String pattern) { if (pattern != null) { m_compiledPattern = Pattern.compile(pattern, Pattern.CANON_EQ | Pattern.DOTALL); m_pattern = pattern; } }
From source file:com.github.rwitzel.streamflyer.support.ProcessEndOfStreamTest.java
protected long rewriteContent(InputStream input, OutputStream output, String encoding, boolean flush) throws IOException { Charset charset = Charset.forName(encoding); String oldPath = "something"; String newPath = "anything"; String regex = "((https?://)([^/]+/))?(" + oldPath + ")"; String replacement = "$1" + newPath; // FastRegexModifier modifier = new FastRegexModifier(regex, Pattern.CASE_INSENSITIVE | Pattern.CANON_EQ, // replacement); RegexModifier modifier = new RegexModifier(regex, Pattern.CASE_INSENSITIVE | Pattern.CANON_EQ, replacement); Reader reader = new ModifyingReader(new InputStreamReader(input, charset), modifier); Writer writer = new OutputStreamWriter(output, charset); int copied = IOUtils.copy(reader, writer); if (flush) {/*www .j a v a 2s.c o m*/ writer.flush(); } return copied; }
From source file:pl.otros.logview.gui.message.pattern.PropertyPatternMessageColorizer.java
public void init(InputStream in) throws ConfigurationException { propertiesConfiguration = new PropertiesConfiguration(); propertiesConfiguration.setDelimiterParsingDisabled(true); propertiesConfiguration.load(in, "UTF-8"); configuration = new DataConfiguration(propertiesConfiguration); configuration.setDelimiterParsingDisabled(true); String pa = configuration.getString(PROP_PATTERN); int flags = 0; flags = flags | (configuration.getBoolean(PROP_PATTERN_CANON_EQ, false) ? Pattern.CANON_EQ : 0); flags = flags// w w w . j av a 2 s . c o m | (configuration.getBoolean(PROP_PATTERN_CASE_INSENSITIVE, false) ? Pattern.CASE_INSENSITIVE : 0); flags = flags | (configuration.getBoolean(PROP_PATTERN_COMMENTS, false) ? Pattern.COMMENTS : 0); flags = flags | (configuration.getBoolean(PROP_PATTERN_DOTALL, false) ? Pattern.DOTALL : 0); flags = flags | (configuration.getBoolean(PROP_PATTERN_LITERAL, false) ? Pattern.LITERAL : 0); flags = flags | (configuration.getBoolean(PROP_PATTERN_MULTILINE, false) ? Pattern.MULTILINE : 0); flags = flags | (configuration.getBoolean(PROP_PATTERN_UNICODE_CASE, false) ? Pattern.UNICODE_CASE : 0); flags = flags | (configuration.getBoolean(PROP_PATTERN_UNIX_LINES, false) ? Pattern.UNIX_LINES : 0); pattern = Pattern.compile(pa, flags); groupCount = countGroups(pattern); name = configuration.getString(PROP_NAME, "NAME NOT SET!"); description = configuration.getString(PROP_DESCRIPTION, "DESCRIPTION NOT SET!"); testMessage = configuration.getString(PROP_TEST_MESSAGE, ""); version = configuration.getInt(PROP_VERSION, 1); }
From source file:com.google.code.configprocessor.processing.ModifyAction.java
protected int parseFlags() { int flagsToUse = 0; String flagsToTest = getFlags() == null ? DEFAULT_PATTERN_FLAGS : getFlags(); String[] flagArray = StringUtils.split(flagsToTest, PATTERN_FLAG_SEPARATOR); for (String flag : flagArray) { if ("UNIX_LINES".equals(flag)) { flagsToUse |= Pattern.UNIX_LINES; } else if ("CASE_INSENSITIVE".equals(flag)) { flagsToUse |= Pattern.CASE_INSENSITIVE; } else if ("COMMENTS".equals(flag)) { flagsToUse |= Pattern.COMMENTS; } else if ("MULTILINE".equals(flag)) { flagsToUse |= Pattern.MULTILINE; } else if ("LITERAL".equals(flag)) { flagsToUse |= Pattern.LITERAL; } else if ("DOTALL".equals(flag)) { flagsToUse |= Pattern.DOTALL; } else if ("UNICODE_CASE".equals(flag)) { flagsToUse |= Pattern.UNICODE_CASE; } else if ("CANON_EQ".equals(flag)) { flagsToUse |= Pattern.CANON_EQ; } else {/*from www .j av a 2 s .c om*/ throw new IllegalArgumentException("Unknown flag: " + flag); } } return flagsToUse; }
From source file:org.talend.core.model.utils.ContextParameterUtils.java
/** * //w w w .j ava 2 s . co m * ggu Comment method "getVariableFromCode". * * only for new script code and the first variables. and if there is no variable in code, return null. */ public static String getVariableFromCode(String code) { if (code == null) { return null; } // if (isContainContextParam(code)) { String pattern = null; String varPattern = "(.+?)"; //$NON-NLS-1$ String wordPattern = "\\b"; //$NON-NLS-1$ pattern = wordPattern + replaceCharForRegex(JAVA_NEW_CONTEXT_PREFIX) + varPattern + wordPattern; if (pattern != null) { Pattern regex = Pattern.compile(pattern, Pattern.CANON_EQ); Matcher regexMatcher = regex.matcher(code); if (regexMatcher.find()) { try { String var = regexMatcher.group(1); if (var != null && ContextParameterUtils.isValidParameterName(var)) { return var; } } catch (RuntimeException re) { // not match } } } // } return null; }