List of usage examples for java.util.regex Matcher end
public int end(String name)
From source file:com.example.app.support.address.AddressParser.java
/** * Passes the address to the jgeocoder address parser and * formats the results into a string map. Also handles * the strange case of highways -- that is, street type * PRECEDING street_number.//w ww .jav a 2 s .c om * * @param address the address * @param autoCorrectSpelling flag. * * @return the results. */ private static Map<AddressComponent, String> prepareAddressForParsingAndParse(String address, boolean autoCorrectSpelling) { Map<AddressComponent, String> results = jGeocodeParseAddress(address, autoCorrectSpelling); if (results == null) { return null; // Something went wrong in the process of parsing the address } String splitStreetNumber = results.get(NUMBER); String splitStreetDir = results.get(PREDIR); String splitStreetName = results.get(STREET); String splitStreetType = results.get(TYPE); String splitUnitNumber = results.get(LINE2); Collection<String> streetDirs = getStreetDirs(); /* if the street name is a direction then chances * are that the street direction was pulled into * the street name incorrectly and the street name * is in the unit number (following conditional) */ if (splitStreetName != null && streetDirs.contains(splitStreetName.toLowerCase()) && splitStreetDir == null) { splitStreetDir = splitStreetName; splitStreetName = null; } // We need to do some switching if the street name is null (usually a result of Hwy 19 or Road 5) // -- Second case --> Or if street type is null and unit number is non null we assume that part of the street // name got pulled into the unit number if ((splitStreetName == null && splitStreetType != null && splitUnitNumber != null) || (splitStreetType == null && splitUnitNumber != null)) { if (splitStreetName == null) { splitStreetName = ""; } // if we have a street type then we are going to want to pull in // the first part of the unit number into street name (e.g. hwy 19 or road 52) if (splitStreetType != null) { Pattern p1 = Pattern.compile("^\\s?(\\S+)\\s?"); Matcher m1 = p1.matcher(splitUnitNumber); if (m1.find()) { String streetTypePart = m1.group(1); splitStreetName = createStreetName(splitStreetName, splitStreetType, streetTypePart); splitUnitNumber = splitUnitNumber.substring(m1.end(0)); // since we pulled in the street type we don't want to pull it in again if the next matcher matches splitStreetType = null; } } // Get the indexes of unit or a hash mark as clues for where to split int unitIndex = splitUnitNumber.toLowerCase().indexOf("unit"); int hashIndex = splitUnitNumber.indexOf(HASHCODE_VALUE); // Part of the unit number to be removed and added to street name String partOfStreetName = ""; // We want to leave the part of the unit number that has a hash mark or the string "unit" // in the unit number if (unitIndex != -1 || hashIndex != -1) { if (unitIndex != -1) { if (unitIndex == 0) // Unit number starts with a # so the whole unit number stays as the unit number { partOfStreetName = ""; } else { partOfStreetName = splitUnitNumber.substring(0, unitIndex - 1); } splitUnitNumber = splitUnitNumber.substring(unitIndex); } else { if (hashIndex == 0) // Unit number starts with a # so the whole unit number stays as the unit number { partOfStreetName = ""; } else { partOfStreetName = splitUnitNumber.substring(0, hashIndex - 1); } splitUnitNumber = splitUnitNumber.substring(hashIndex); } splitStreetName = createStreetName(splitStreetName, splitStreetType, partOfStreetName); } else // We can't find any clues regarding what part of the unit number is actually the unit number { // Pattern to get the last string so we can look at it to see if it looks like a unit number // We assume that any string less than 3 characters or that is a number is a unit number Pattern p2 = Pattern.compile("\\s?(\\S+)$"); Matcher m2 = p2.matcher(splitUnitNumber); if (m2.find()) { // the last word in the unit number is what we look at to try to guess // whether it is a unit number String lastWord = m2.group(1); if (lastWord.length() < 3 || isNumeric(lastWord)) { partOfStreetName = splitUnitNumber.substring(0, m2.start(0)); splitStreetName = createStreetName(splitStreetName, splitStreetType, partOfStreetName); splitUnitNumber = lastWord; } else { splitStreetName += ' ' + splitUnitNumber; splitUnitNumber = null; } } } // Either streetType was pulled into streetName or it started as null so it should be null splitStreetType = null; // set splitUnitNumber to null if it's empty if (splitUnitNumber != null && splitUnitNumber.isEmpty()) { splitUnitNumber = null; } } results.put(NUMBER, splitStreetNumber); results.put(PREDIR, splitStreetDir); results.put(STREET, splitStreetName); results.put(TYPE, splitStreetType); results.put(LINE2, splitUnitNumber); return results; }
From source file:io.github.seleniumquery.by.common.preparser.NotEqualsAttributeSelectorFix.java
/** * Boldly attempts to change all "element[attribute!=value]" into a "element:not([attribute=value])". * * It uses regex, not a parser. In an idel world, the CSS parser would accept such syntax, but it'd * be too much work to fork and extend it (and keep it updated). * * The method tries to ignore declarations inside strings, such as: "element:contains('[attribute!=value]')", * and ":contains()" contents, even when not escaped, such as: "element:contains([attribute!=value])", * which is a valid selector, btw.//from ww w . j a va 2 s .c o m * * @param input An ordinary selector. * @return The same selector with the "[attribute!=value]"s turned to ":not([attribute=value])". */ public String turnAttributeNotEqualsIntoNotAttributeEquals(String input) { if (!input.matches(".*" + ATTR_NEQ_REGEX + ".*")) { return input; } String inputWithoutStrings = removeStrings(input); String inputWithoutStringsAndContains = removeContains(inputWithoutStrings); StringBuilder sb = new StringBuilder(input); Matcher m = ATTR_NEQ_PATTERN.matcher(inputWithoutStringsAndContains); while (m.find()) { String leftPart = input.substring(m.start(1), m.end(1)); String rightPart = input.substring(m.start(2), m.end(2)); sb.replace(m.start(1), m.end(2), ":not(" + leftPart + rightPart + ")"); } return sb.toString(); }
From source file:com.edgenius.wiki.render.filter.UserFilter.java
public List<Region> getRegions(CharSequence input) { final List<Region> list = new ArrayList<Region>(); regexProvider.replaceByTokenVisitor(input, new TokenVisitor<Matcher>() { public void handleMatch(StringBuffer buffer, Matcher matcher) { int contentStart = matcher.start(2); int contentEnd = matcher.end(2); int start = contentStart - 1; int end = contentEnd + 1; list.add(new Region(UserFilter.this, true, start, end, contentStart, contentEnd)); }//www . j a va2 s . c om }); return list; }
From source file:info.magnolia.cms.filters.Mapping.java
/** * Determines the index of the first pathInfo character. If the uri does not match any mapping * this method returns -1.// www .j av a 2 s .c o m */ private int determineMatchingEnd(Matcher matcher) { if (matcher == null) { return -1; } if (matcher.groupCount() > 0) { return matcher.end(1); } return matcher.end(); }
From source file:com.edgenius.wiki.render.filter.ImageFilter.java
public List<Region> getRegions(CharSequence input) { final List<Region> list = new ArrayList<Region>(); regexProvider.replaceByTokenVisitor(input, new TokenVisitor<Matcher>() { public void handleMatch(StringBuffer buffer, Matcher matcher) { int contentStart = matcher.start(2); int contentEnd = matcher.end(2); int start = contentStart - 1; int end = contentEnd + 1; //!image! only 1 region, and it is immutable list.add(new Region(ImageFilter.this, true, start, end, contentStart, contentEnd)); }/* ww w . ja v a2s .co m*/ }); return list; }
From source file:com.agloco.util.StringUtil.java
public static String replace(String source, String regex, int index, String replacement) { StringBuffer result = new StringBuffer(); if (source != null && regex != null) { Pattern p = Pattern.compile(regex); Matcher m = p.matcher(source); boolean find = m.find(0); if (find && m.groupCount() >= index) { if (m.start(index) > 0) result.append(source.substring(0, m.start(index))); result.append(replacement);// w w w.j a v a2 s . com if (m.end(index) < source.length()) result.append(source.substring(m.end(index))); } else { result.append(source); } } return result.toString(); }
From source file:com.boozallen.cognition.ingest.storm.bolt.starter.LineRegexReplaceInRegionBolt.java
String replace(String record, Pattern pattern, String regex, String replacement) { Matcher match = pattern.matcher(record); if (match.find() && match.groupCount() > 0) { // only replace the first group int startPos = match.start(1); int stopPos = match.end(1); String replaceString = match.group(1).replaceAll(regex, replacement); return record.substring(0, startPos) + replaceString + record.substring(stopPos); } else {/* ww w . jav a 2 s . c o m*/ // no match, returns original return record; } }
From source file:fr.eurecom.nerd.core.proxy.SaploClient.java
private List<TEntity> parse(List<SaploTag> tags, String text, OntologyType otype) { List<TEntity> result = new LinkedList<TEntity>(); Map<String, Integer> map = new HashMap<String, Integer>(); for (SaploTag tag : tags) { String label = tag.getTagWord(); String type = tag.getCategory().toString(); String nerdType = OntoFactory.mapper.getNerdType(otype, label, SOURCE, type).toString(); Double confidence = tag.getRelevance(); //logic to compute the startchar and endchar of the entity within the text Integer startchar = null, endchar = null; if (map.containsKey(label)) { int value = map.get(label); map.remove(label);/* ww w . j av a 2 s . c om*/ map.put(label, new Integer(value + 1)); } else map.put(label, new Integer(1)); try { Pattern p = Pattern.compile("\\b" + label + "\\b"); Matcher m = p.matcher(text); for (int j = 0; j < map.get(label) && m.find(); j++) { startchar = m.start(0); endchar = m.end(0); if (containsAtIndex(result, startchar, endchar)) j--; } if (startchar != null && endchar != null) { TEntity extraction = new TEntity(label, type, null, nerdType.toString(), startchar, endchar, confidence, SOURCE); result.add(extraction); } } catch (PatternSyntaxException eregex) { eregex.printStackTrace(); } } return result; }
From source file:i18nplugin.PropertiesEntryNode.java
private List<String> getArgumentList(String input) { List<String> args = new ArrayList<String>(); Matcher argumentMatcher = PATTERN_ARGUMENTS.matcher(input); int index = 0; while (argumentMatcher.matches()) { String argument = argumentMatcher.group(1); index += argumentMatcher.end(1); args.add(argument);//from w w w. ja v a 2 s.co m argumentMatcher = PATTERN_ARGUMENTS.matcher(input.substring(index)); } Collections.sort(args); return args; }
From source file:com.zextras.zimbradrive.soap.SearchRequestHdlr.java
private String getStandardQuery(String query) { StringBuilder parsedQueryBuilder = new StringBuilder(); Pattern nonQuotedTokenSValuePattern = Pattern.compile("([^ :]+:)([^\"]*?)( |$)"); //preTokenDelimiter tokenName : nonQuotedTokenValue postTokenDelimiter Matcher nonQuotedTokenSValueMatcher = nonQuotedTokenSValuePattern.matcher(query); int lastMatchEndIndex = 0; while (nonQuotedTokenSValueMatcher.find()) { String preMatchValueQuery = query.substring(lastMatchEndIndex, nonQuotedTokenSValueMatcher.end(1)); String matchValueQuery = query.substring(nonQuotedTokenSValueMatcher.start(2), nonQuotedTokenSValueMatcher.end(2)); parsedQueryBuilder.append(preMatchValueQuery).append("\"").append(matchValueQuery).append("\""); lastMatchEndIndex = nonQuotedTokenSValueMatcher.end(2); }/* w ww .j ava 2 s. com*/ parsedQueryBuilder.append(query.substring(lastMatchEndIndex)); return parsedQueryBuilder.toString(); }