List of usage examples for java.util.regex Pattern UNICODE_CHARACTER_CLASS
int UNICODE_CHARACTER_CLASS
To view the source code for java.util.regex Pattern UNICODE_CHARACTER_CLASS.
Click Source Link
From source file:org.xbib.elasticsearch.index.analysis.langdetect.LangdetectService.java
@Override protected void doStart() throws ElasticsearchException { load(settings);//w w w . j av a 2s . co m this.priorMap = null; this.n_trial = settings.getAsInt("number_of_trials", 7); this.alpha = settings.getAsDouble("alpha", 0.5); this.alpha_width = settings.getAsDouble("alpha_width", 0.05); this.iteration_limit = settings.getAsInt("iteration_limit", 10000); this.prob_threshold = settings.getAsDouble("prob_threshold", 0.1); this.conv_threshold = settings.getAsDouble("conv_threshold", 0.99999); this.base_freq = settings.getAsInt("base_freq", 10000); this.filterPattern = settings.get("pattern") != null ? Pattern.compile(settings.get("pattern"), Pattern.UNICODE_CHARACTER_CLASS) : null; }
From source file:io.bibleget.HTTPCaller.java
/** * * @param myQuery//from w ww .ja v a2 s . c o m * @param selectedVersions * @return * @throws java.lang.ClassNotFoundException * @throws java.io.UnsupportedEncodingException */ public boolean integrityCheck(String myQuery, List<String> selectedVersions) throws ClassNotFoundException, UnsupportedEncodingException { String versionsStr = StringUtils.join(selectedVersions.toArray(), ','); //System.out.println("Starting integrity check on query "+myQuery+" for versions: "+versionsStr); if (indexes == null) { indexes = Indexes.getInstance(); } //build indexes based on versions //final result is true until proved false //set finFlag to false for non-breaking errors, or simply return false for breaking errors boolean finFlag = true; errorMessages.removeAll(errorMessages); List<String> queries = new ArrayList<>(); //if english notation is found, translate to european notation if (myQuery.contains(":") && myQuery.contains(".")) { errorMessages.add(__( "Mixed notations have been detected. Please use either english notation or european notation.")); return false; } else if (myQuery.contains(":")) { if (myQuery.contains(",")) { myQuery = myQuery.replace(",", "."); } myQuery = myQuery.replace(":", ","); } if (myQuery.isEmpty() == false) { if (myQuery.contains(";")) { //System.out.println("We have a semicolon"); queries.addAll(Arrays.asList(myQuery.split(";"))); for (Iterator<String> it = queries.iterator(); it.hasNext();) { if (it.next().isEmpty()) { it.remove(); // NOTE: Iterator's remove method, not ArrayList's, is used. } } } else { //System.out.println("There is no semicolon"); queries.add(myQuery); } } boolean first = true; String currBook = ""; if (queries.isEmpty()) { errorMessages.add(__("You cannot send an empty query.")); return false; } for (String querie : queries) { //System.out.println(querie); querie = toProperCase(querie); //System.out.println(querie); //RULE 1: at least the first query must have a book indicator if (first) { if (querie.matches("^[1-3]{0,1}((\\p{L}\\p{M}*)+)(.*)") == false) { errorMessages.add(MessageFormat.format(__( "The first query <{0}> in the querystring <{1}> must start with a valid book indicator!"), querie, myQuery)); finFlag = false; } first = false; } //RULE 2: for every query that starts with a book indicator, // the book indicator must be followed by valid chapter indicator; // else query must start with valid chapter indicator int bBooksContains; int myidx = -1; String tempBook = ""; if (querie.matches("^[1-3]{0,1}((\\p{L}\\p{M}*)+)(.*)") == true) { //while we're at it, let's capture the book value from the query Pattern pattern = Pattern.compile("^[1-3]{0,1}((\\p{L}\\p{M}*)+)", Pattern.UNICODE_CHARACTER_CLASS); Matcher matcher = pattern.matcher(querie); if (matcher.find()) { tempBook = matcher.group(); bBooksContains = isValidBook(tempBook); myidx = bBooksContains + 1; //if(bBooksContains == false && bBooksAbbrevsContains == false){ if (bBooksContains == -1) { errorMessages.add(MessageFormat.format(__( "The book indicator <{0}> in the query <{1}> is not valid. Please check the documentation for a list of valid book indicators."), tempBook, querie)); finFlag = false; } else { //if(bBooksContains) currBook = tempBook; //querie = querie.replace(tempBook,""); } } Pattern pattern1 = Pattern.compile("^[1-3]{0,1}((\\p{L}\\p{M}*)+)", Pattern.UNICODE_CHARACTER_CLASS); Pattern pattern2 = Pattern.compile("^[1-3]{0,1}((\\p{L}\\p{M}*)+)[1-9][0-9]{0,2}", Pattern.UNICODE_CHARACTER_CLASS); Matcher matcher1 = pattern1.matcher(querie); Matcher matcher2 = pattern2.matcher(querie); int count1 = 0; while (matcher1.find()) { count1++; } int count2 = 0; while (matcher2.find()) { count2++; } if (querie.matches("^[1-3]{0,1}((\\p{L}\\p{M}*)+)[1-9][0-9]{0,2}(.*)") == false || count1 != count2) { errorMessages.add(__("You must have a valid chapter following the book indicator!")); finFlag = false; } querie = querie.replace(tempBook, ""); } else { if (querie.matches("^[1-9][0-9]{0,2}(.*)") == false) { errorMessages.add(__( "A query that doesn't start with a book indicator must however start with a valid chapter indicator!")); finFlag = false; } } //RULE 3: Queries with a dot operator must first have a comma operator; and cannot have more commas than dots if (querie.contains(".")) { Pattern pattern11 = Pattern.compile("[,|\\-|\\.][1-9][0-9]{0,2}\\."); Matcher matcher11 = pattern11.matcher(querie); if (querie.contains(",") == false || matcher11.find() == false) { errorMessages.add(__( "You cannot use a dot without first using a comma or a dash. A dot is a liason between verses, which are separated from the chapter by a comma.")); finFlag = false; } Pattern pattern3 = Pattern.compile("(?<![0-9])(?=(([1-9][0-9]{0,2})\\.([1-9][0-9]{0,2})))"); Matcher matcher3 = pattern3.matcher(querie); int count = 0; while (matcher3.find()) { //RULE 4: verse numbers around dot operators must be sequential if (Integer.parseInt(matcher3.group(2)) >= Integer.parseInt(matcher3.group(3))) { errorMessages.add(MessageFormat.format(__( "Verses concatenated by a dot must be consecutive, instead <{0}> is greater than or equal to <{1}> in the expression <{2}> in the query <{3}>"), matcher3.group(2), matcher3.group(3), matcher3.group(1), querie)); finFlag = false; } count++; } //RULE 5: Dot operators must be preceded and followed by a number from one to three digits, of which the first digit cannot be a 0 if (count == 0 || count != StringUtils.countMatches(querie, ".")) { errorMessages.add(__( "A dot must be preceded and followed by 1 to 3 digits of which the first digit cannot be zero.") + " <" + querie + ">"); finFlag = false; } } //RULE 6: Comma operators must be preceded and followed by a number from one to three digits, of which the first digit cannot be 0 if (querie.contains(",")) { Pattern pattern4 = Pattern.compile("([1-9][0-9]{0,2})\\,[1-9][0-9]{0,2}"); Matcher matcher4 = pattern4.matcher(querie); int count = 0; List<Integer> chapters = new ArrayList<>(); while (matcher4.find()) { //System.out.println("group0="+matcher4.group(0)+", group1="+matcher4.group(1)); chapters.add(Integer.parseInt(matcher4.group(1))); count++; } if (count == 0 || count != StringUtils.countMatches(querie, ",")) { errorMessages.add(__( "A comma must be preceded and followed by 1 to 3 digits of which the first digit cannot be zero.") + " <" + querie + ">" + "(count=" + Integer.toString(count) + ",comma count=" + StringUtils.countMatches(querie, ",") + "); chapters=" + chapters.toString()); finFlag = false; } else { // let's check the validity of the chapter numbers against the version indexes //for each chapter captured in the querystring for (int chapter : chapters) { if (indexes.isValidChapter(chapter, myidx, selectedVersions) == false) { int[] chapterLimit = indexes.getChapterLimit(myidx, selectedVersions); errorMessages.add(MessageFormat.format(__( "A chapter in the query is out of bounds: there is no chapter <{0}> in the book <{1}> in the requested version <{2}>, the last possible chapter is <{3}>"), Integer.toString(chapter), currBook, StringUtils.join(selectedVersions, ","), StringUtils.join(chapterLimit, ','))); finFlag = false; } } } } if (StringUtils.countMatches(querie, ",") > 1) { if (!querie.contains("-")) { errorMessages.add(__("You cannot have more than one comma and not have a dash!")); finFlag = false; } String[] parts = StringUtils.split(querie, "-"); if (parts.length != 2) { errorMessages .add(__("You seem to have a malformed querystring, there should be only one dash.")); finFlag = false; } for (String p : parts) { Integer[] pp = new Integer[2]; String[] tt = StringUtils.split(p, ","); int x = 0; for (String t : tt) { pp[x++] = Integer.parseInt(t); } if (indexes.isValidChapter(pp[0], myidx, selectedVersions) == false) { int[] chapterLimit; chapterLimit = indexes.getChapterLimit(myidx, selectedVersions); // System.out.print("chapterLimit = "); // System.out.println(Arrays.toString(chapterLimit)); errorMessages.add(MessageFormat.format(__( "A chapter in the query is out of bounds: there is no chapter <{0}> in the book <{1}> in the requested version <{2}>, the last possible chapter is <{3}>"), Integer.toString(pp[0]), currBook, StringUtils.join(selectedVersions, ","), StringUtils.join(chapterLimit, ','))); finFlag = false; } else { if (indexes.isValidVerse(pp[1], pp[0], myidx, selectedVersions) == false) { int[] verseLimit = indexes.getVerseLimit(pp[0], myidx, selectedVersions); // System.out.print("verseLimit = "); // System.out.println(Arrays.toString(verseLimit)); errorMessages.add(MessageFormat.format(__( "A verse in the query is out of bounds: there is no verse <{0}> in the book <{1}> at chapter <{2}> in the requested version <{3}>, the last possible verse is <{4}>"), Integer.toString(pp[1]), currBook, Integer.toString(pp[0]), StringUtils.join(selectedVersions, ","), StringUtils.join(verseLimit, ','))); finFlag = false; } } } } else if (StringUtils.countMatches(querie, ",") == 1) { String[] parts = StringUtils.split(querie, ","); //System.out.println(Arrays.toString(parts)); if (indexes.isValidChapter(Integer.parseInt(parts[0]), myidx, selectedVersions) == false) { int[] chapterLimit = indexes.getChapterLimit(myidx, selectedVersions); errorMessages.add(MessageFormat.format(__( "A chapter in the query is out of bounds: there is no chapter <{0}> in the book <{1}> in the requested version <{2}>, the last possible chapter is <{3}>"), parts[0], currBook, StringUtils.join(selectedVersions, ","), StringUtils.join(chapterLimit, ','))); finFlag = false; } else { if (parts[1].contains("-")) { Deque<Integer> highverses = new ArrayDeque<>(); Pattern pattern11 = Pattern.compile("[,\\.][1-9][0-9]{0,2}\\-([1-9][0-9]{0,2})"); Matcher matcher11 = pattern11.matcher(querie); while (matcher11.find()) { highverses.push(Integer.parseInt(matcher11.group(1))); } int highverse = highverses.pop(); if (indexes.isValidVerse(highverse, Integer.parseInt(parts[0]), myidx, selectedVersions) == false) { int[] verseLimit = indexes.getVerseLimit(Integer.parseInt(parts[0]), myidx, selectedVersions); errorMessages.add(MessageFormat.format(__( "A verse in the query is out of bounds: there is no verse <{0}> in the book <{1}> at chapter <{2}> in the requested version <{3}>, the last possible verse is <{4}>"), highverse, currBook, parts[0], StringUtils.join(selectedVersions, ","), StringUtils.join(verseLimit, ','))); finFlag = false; } } else { Pattern pattern12 = Pattern.compile(",([1-9][0-9]{0,2})"); Matcher matcher12 = pattern12.matcher(querie); int highverse = -1; while (matcher12.find()) { highverse = Integer.parseInt(matcher12.group(1)); //System.out.println("[line 376]:highverse="+Integer.toString(highverse)); } if (highverse != -1) { //System.out.println("Checking verse validity for book "+myidx+" chapter "+parts[0]+"..."); if (indexes.isValidVerse(highverse, Integer.parseInt(parts[0]), myidx, selectedVersions) == false) { int[] verseLimit = indexes.getVerseLimit(Integer.parseInt(parts[0]), myidx, selectedVersions); errorMessages.add(MessageFormat.format(__( "A verse in the query is out of bounds: there is no verse <{0}> in the book <{1}> at chapter <{2}> in the requested version <{3}>, the last possible verse is <{4}>"), highverse, currBook, parts[0], StringUtils.join(selectedVersions, ","), StringUtils.join(verseLimit, ','))); finFlag = false; } } } Pattern pattern13 = Pattern.compile("\\.([1-9][0-9]{0,2})$"); Matcher matcher13 = pattern13.matcher(querie); int highverse = -1; while (matcher13.find()) { highverse = Integer.parseInt(matcher13.group(1)); } if (highverse != -1) { if (indexes.isValidVerse(highverse, Integer.parseInt(parts[0]), myidx, selectedVersions) == false) { int[] verseLimit = indexes.getVerseLimit(Integer.parseInt(parts[0]), myidx, selectedVersions); errorMessages.add(MessageFormat.format(__( "A verse in the query is out of bounds: there is no verse <{0}> in the book <{1}> at chapter <{2}> in the requested version <{3}>, the last possible verse is <{4}>"), highverse, currBook, parts[0], StringUtils.join(selectedVersions, ","), StringUtils.join(verseLimit, ','))); finFlag = false; } } } } else { //if there is no comma, it's either a single chapter or an extension of chapters with a dash //System.out.println("no comma found"); String[] parts = StringUtils.split(querie, "-"); //System.out.println(Arrays.toString(parts)); int highchapter = Integer.parseInt(parts[parts.length - 1]); if (indexes.isValidChapter(highchapter, myidx, selectedVersions) == false) { int[] chapterLimit = indexes.getChapterLimit(myidx, selectedVersions); errorMessages.add(MessageFormat.format(__( "A chapter in the query is out of bounds: there is no chapter <{0}> in the book <{1}> in the requested version <{2}>, the last possible chapter is <{3}>"), Integer.toString(highchapter), currBook, StringUtils.join(selectedVersions, ","), StringUtils.join(chapterLimit, ','))); finFlag = false; } } if (querie.contains("-")) { //RULE 7: If there are multiple dashes in a query, there cannot be more dashes than there are dots minus 1 int dashcount = StringUtils.countMatches(querie, "-"); int dotcount = StringUtils.countMatches(querie, "."); if (dashcount > 1) { if (dashcount - 1 > dotcount) { errorMessages.add(__( "There are multiple dashes in the query, but there are not enough dots. There can only be one more dash than dots.") + " <" + querie + ">"); finFlag = false; } } //RULE 8: Dash operators must be preceded and followed by a number from one to three digits, of which the first digit cannot be 0 Pattern pattern5 = Pattern.compile("([1-9][0-9]{0,2}\\-[1-9][0-9]{0,2})"); Matcher matcher5 = pattern5.matcher(querie); int count = 0; while (matcher5.find()) { count++; } if (count == 0 || count != StringUtils.countMatches(querie, "-")) { errorMessages.add(__( "A dash must be preceded and followed by 1 to 3 digits of which the first digit cannot be zero.") + " <" + querie + ">"); finFlag = false; } //RULE 9: If a comma construct follows a dash, there must also be a comma construct preceding the dash Pattern pattern6 = Pattern.compile("\\-([1-9][0-9]{0,2})\\,"); Matcher matcher6 = pattern6.matcher(querie); if (matcher6.find()) { Pattern pattern7 = Pattern.compile("\\,[1-9][0-9]{0,2}\\-"); Matcher matcher7 = pattern7.matcher(querie); if (matcher7.find() == false) { errorMessages.add(__( "If there is a chapter-verse construct following a dash, there must also be a chapter-verse construct preceding the same dash.") + " <" + querie + ">"); finFlag = false; } else { //RULE 10: Chapters before and after dashes must be sequential int chap1 = -1; int chap2 = -1; Pattern pattern8 = Pattern.compile("([1-9][0-9]{0,2})\\,[1-9][0-9]{0,2}\\-"); Matcher matcher8 = pattern8.matcher(querie); if (matcher8.find()) { chap1 = Integer.parseInt(matcher8.group(1)); } Pattern pattern9 = Pattern.compile("\\-([1-9][0-9]{0,2})\\,"); Matcher matcher9 = pattern9.matcher(querie); if (matcher9.find()) { chap2 = Integer.parseInt(matcher9.group(1)); } if (chap1 >= chap2) { errorMessages.add(MessageFormat.format(__( "Chapters must be consecutive. Instead the first chapter indicator <{0}> is greater than or equal to the second chapter indicator <{1}> in the expression <{2}>"), chap1, chap2, querie)); finFlag = false; } } } else { //if there are no comma constructs immediately following the dash //RULE 11: Verses (or chapters if applicable) around each of the dash operator(s) must be sequential Pattern pattern10 = Pattern.compile("([1-9][0-9]{0,2})\\-([1-9][0-9]{0,2})"); Matcher matcher10 = pattern10.matcher(querie); while (matcher10.find()) { int num1 = Integer.parseInt(matcher10.group(1)); int num2 = Integer.parseInt(matcher10.group(2)); if (num1 >= num2) { errorMessages.add(MessageFormat.format(__( "Verses (or chapters if applicable) around the dash operator must be consecutive. Instead <{0}> is greater than or equal to <{1}> in the expression <{2}>"), num1, num2, querie)); finFlag = false; } } } } } return finFlag; }
From source file:com.joliciel.talismane.tokeniser.filters.TokenRegexFilterImpl.java
Pattern getPattern() { if (pattern == null) { // we may need to replace WordLists by the list contents String myRegex = this.regex; if (LOG.isTraceEnabled()) { LOG.trace("Regex: " + myRegex); }//from w ww .j ava2s .co m if (this.autoWordBoundaries) { Boolean startsWithLetter = null; for (int i = 0; i < myRegex.length() && startsWithLetter == null; i++) { char c = myRegex.charAt(i); if (c == '\\') { i++; c = myRegex.charAt(i); if (c == 'd' || c == 'w') { startsWithLetter = true; } else if (c == 's' || c == 'W' || c == 'b' || c == 'B') { startsWithLetter = false; } else if (c == 'p') { i += 2; // skip the open curly brackets int closeCurlyBrackets = myRegex.indexOf('}', i); int openParentheses = myRegex.indexOf('(', i); int endIndex = closeCurlyBrackets; if (openParentheses > 0 && openParentheses < closeCurlyBrackets) endIndex = openParentheses; if (endIndex > 0) { String specialClass = myRegex.substring(i, endIndex); if (specialClass.equals("WordList")) { startsWithLetter = true; } } } break; } else if (c == '[' || c == '(') { // do nothing } else if (Character.isLetter(c) || Character.isDigit(c)) { startsWithLetter = true; } else { startsWithLetter = false; } } Boolean endsWithLetter = null; for (int i = myRegex.length() - 1; i >= 0 && endsWithLetter == null; i--) { char c = myRegex.charAt(i); char prevC = ' '; if (i >= 1) prevC = myRegex.charAt(i - 1); if (prevC == '\\') { if (c == 'd' || c == 'w') { endsWithLetter = true; } else if (c == 's' || c == 'W' || c == 'b' || c == 'B') { endsWithLetter = false; } else if (c == 'p') { i += 2; // skip the open curly brackets int closeCurlyBrackets = myRegex.indexOf('}', i); int openParentheses = myRegex.indexOf('(', i); int endIndex = closeCurlyBrackets; if (openParentheses < closeCurlyBrackets) endIndex = openParentheses; if (endIndex > 0) { String specialClass = myRegex.substring(i, endIndex); if (specialClass.equals("WordList") || specialClass.equals("Alpha") || specialClass.equals("Lower") || specialClass.equals("Upper") || specialClass.equals("ASCII") || specialClass.equals("Digit")) { startsWithLetter = true; } } } break; } else if (c == ']' || c == ')' || c == '+') { // do nothing } else if (c == '}') { int startIndex = myRegex.lastIndexOf('{') + 1; int closeCurlyBrackets = myRegex.indexOf('}', startIndex); int openParentheses = myRegex.indexOf('(', startIndex); int endIndex = closeCurlyBrackets; if (openParentheses > 0 && openParentheses < closeCurlyBrackets) endIndex = openParentheses; if (endIndex > 0) { String specialClass = myRegex.substring(startIndex, endIndex); if (specialClass.equals("WordList") || specialClass.equals("Alpha") || specialClass.equals("Lower") || specialClass.equals("Upper") || specialClass.equals("ASCII") || specialClass.equals("Digit")) { endsWithLetter = true; } } break; } else if (Character.isLetter(c) || Character.isDigit(c)) { endsWithLetter = true; } else { endsWithLetter = false; } } if (startsWithLetter != null && startsWithLetter) { myRegex = "\\b" + myRegex; } if (endsWithLetter != null && endsWithLetter) { myRegex = myRegex + "\\b"; } if (LOG.isTraceEnabled()) { LOG.trace("After autoWordBoundaries: " + myRegex); } } if (!this.caseSensitive || !this.diacriticSensitive) { StringBuilder regexBuilder = new StringBuilder(); for (int i = 0; i < myRegex.length(); i++) { char c = myRegex.charAt(i); if (c == '\\') { // escape - skip next regexBuilder.append(c); i++; c = myRegex.charAt(i); regexBuilder.append(c); } else if (c == '[') { // character group, don't change it regexBuilder.append(c); while (c != ']' && i < myRegex.length()) { i++; c = myRegex.charAt(i); regexBuilder.append(c); } } else if (c == '{') { // command, don't change it regexBuilder.append(c); while (c != '}' && i < myRegex.length()) { i++; c = myRegex.charAt(i); regexBuilder.append(c); } } else if (Character.isLetter(c)) { Set<String> chars = new TreeSet<String>(); chars.add("" + c); char noAccent = diacriticPattern.matcher(Normalizer.normalize("" + c, Form.NFD)) .replaceAll("").charAt(0); if (!this.caseSensitive) { chars.add("" + Character.toUpperCase(c)); chars.add("" + Character.toLowerCase(c)); chars.add("" + Character.toUpperCase(noAccent)); } if (!this.diacriticSensitive) { chars.add("" + noAccent); if (!this.caseSensitive) { chars.add("" + Character.toLowerCase(noAccent)); } } if (chars.size() == 1) { regexBuilder.append(c); } else { regexBuilder.append('['); for (String oneChar : chars) { regexBuilder.append(oneChar); } regexBuilder.append(']'); } } else { regexBuilder.append(c); } } myRegex = regexBuilder.toString(); if (LOG.isTraceEnabled()) { LOG.trace("After caseSensitive: " + myRegex); } } Matcher matcher = wordListPattern.matcher(myRegex); StringBuilder regexBuilder = new StringBuilder(); int lastIndex = 0; while (matcher.find()) { String[] params = matcher.group(1).split(","); int start = matcher.start(); int end = matcher.end(); regexBuilder.append(myRegex.substring(lastIndex, start)); String wordListName = params[0]; boolean uppercaseOptional = false; boolean diacriticsOptional = false; boolean lowercaseOptional = false; boolean firstParam = true; for (String param : params) { if (firstParam) { /* word list name */ } else if (param.equals("diacriticsOptional")) diacriticsOptional = true; else if (param.equals("uppercaseOptional")) uppercaseOptional = true; else if (param.equals("lowercaseOptional")) lowercaseOptional = true; else throw new TalismaneException( "Unknown parameter in word list " + matcher.group(1) + ": " + param); firstParam = false; } ExternalWordList wordList = externalResourceFinder.getExternalWordList(wordListName); if (wordList == null) throw new TalismaneException("Unknown word list: " + wordListName); StringBuilder sb = new StringBuilder(); boolean firstWord = true; for (String word : wordList.getWordList()) { if (!firstWord) sb.append("|"); word = Normalizer.normalize(word, Form.NFC); if (uppercaseOptional || diacriticsOptional) { String wordNoDiacritics = Normalizer.normalize(word, Form.NFD) .replaceAll("\\p{InCombiningDiacriticalMarks}+", ""); String wordLowercase = word.toLowerCase(Locale.ENGLISH); String wordLowercaseNoDiacritics = Normalizer.normalize(wordLowercase, Form.NFD) .replaceAll("\\p{InCombiningDiacriticalMarks}+", ""); String wordUppercase = wordNoDiacritics.toUpperCase(Locale.ENGLISH); boolean needsGrouping = false; if (uppercaseOptional && !word.equals(wordLowercase)) needsGrouping = true; if (diacriticsOptional && !word.equals(wordNoDiacritics)) needsGrouping = true; if (lowercaseOptional && !word.equals(wordUppercase)) needsGrouping = true; if (needsGrouping) { for (int i = 0; i < word.length(); i++) { char c = word.charAt(i); boolean grouped = false; if (uppercaseOptional && c != wordLowercase.charAt(i)) grouped = true; if (diacriticsOptional && c != wordNoDiacritics.charAt(i)) grouped = true; if (lowercaseOptional && c != wordUppercase.charAt(i)) grouped = true; if (!grouped) sb.append(c); else { sb.append("["); String group = "" + c; if (uppercaseOptional && group.indexOf(wordLowercase.charAt(i)) < 0) group += (wordLowercase.charAt(i)); if (lowercaseOptional && group.indexOf(wordUppercase.charAt(i)) < 0) group += (wordUppercase.charAt(i)); if (diacriticsOptional && group.indexOf(wordNoDiacritics.charAt(i)) < 0) group += (wordNoDiacritics.charAt(i)); if (uppercaseOptional && diacriticsOptional && group.indexOf(wordLowercaseNoDiacritics.charAt(i)) < 0) group += (wordLowercaseNoDiacritics.charAt(i)); sb.append(group); sb.append("]"); } // does this letter need grouping? } // next letter } else { sb.append(word); } // any options activated? } else { sb.append(word); } firstWord = false; } // next word in list regexBuilder.append(sb.toString()); lastIndex = end; } // next match regexBuilder.append(myRegex.substring(lastIndex)); myRegex = regexBuilder.toString(); this.pattern = Pattern.compile(myRegex, Pattern.UNICODE_CHARACTER_CLASS); } return pattern; }
From source file:com.screenslicer.core.util.Util.java
public static List<String> transformUrlStrings(List<String> urls, UrlTransform[] urlTransforms, boolean forExport) { List<String> newUrls = new ArrayList<String>(); if (urlTransforms != null && urlTransforms.length != 0 && urls != null) { for (String url : urls) { String newUrl = url;//from w w w. java 2 s. c o m for (int i = 0; urlTransforms != null && i < urlTransforms.length; i++) { if (!CommonUtil.isEmpty(urlTransforms[i].regex) && newUrl != null && urlTransforms[i] != null && ((forExport && urlTransforms[i].transformForExportOnly) || (!forExport && !urlTransforms[i].transformForExportOnly))) { Pattern pattern = Pattern.compile(urlTransforms[i].regex, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS); Matcher matcher = pattern.matcher(newUrl); if (matcher.find()) { if (urlTransforms[i].replaceAll) { if (urlTransforms[i].replaceAllRecursive) { String transformed = matcher.replaceAll(urlTransforms[i].replacement); String transformedRec = pattern.matcher(transformed) .replaceAll(urlTransforms[i].replacement); while (!transformed.equals(transformedRec)) { transformed = transformedRec; transformedRec = pattern.matcher(transformedRec) .replaceAll(urlTransforms[i].replacement); } newUrl = transformed; } else { newUrl = matcher.replaceAll(urlTransforms[i].replacement); } } else { newUrl = matcher.replaceFirst(urlTransforms[i].replacement); } if (!urlTransforms[i].multipleTransforms) { break; } } } } newUrls.add(newUrl); } } else { return urls; } return newUrls; }
From source file:no.kantega.publishing.common.ContentIdHelperImpl.java
@Override public void setServletContext(ServletContext servletContext) { CONTENT_URL_PATTERN = Pattern.compile( ContentPatterns.getPatternWithContextPath(servletContext.getContextPath()), Pattern.UNICODE_CHARACTER_CLASS); }
From source file:org.apache.nifi.processors.standard.EvaluateRegularExpression.java
int getCompileFlags(ProcessContext context) { int flags = (context.getProperty(UNIX_LINES).asBoolean() ? Pattern.UNIX_LINES : 0) | (context.getProperty(CASE_INSENSITIVE).asBoolean() ? Pattern.CASE_INSENSITIVE : 0) | (context.getProperty(COMMENTS).asBoolean() ? Pattern.COMMENTS : 0) | (context.getProperty(MULTILINE).asBoolean() ? Pattern.MULTILINE : 0) | (context.getProperty(LITERAL).asBoolean() ? Pattern.LITERAL : 0) | (context.getProperty(DOTALL).asBoolean() ? Pattern.DOTALL : 0) | (context.getProperty(UNICODE_CASE).asBoolean() ? Pattern.UNICODE_CASE : 0) | (context.getProperty(CANON_EQ).asBoolean() ? Pattern.CANON_EQ : 0) | (context.getProperty(UNICODE_CHARACTER_CLASS).asBoolean() ? Pattern.UNICODE_CHARACTER_CLASS : 0); return flags; }