List of usage examples for java.util.regex Matcher start
public int start(String name)
From source file:com.edgenius.wiki.render.filter.HeadingFilter.java
public List<Region> getRegions(CharSequence input) { final List<Region> list = new ArrayList<Region>(); regexProvider.replaceByTokenVisitor(input, new TokenVisitor<Matcher>() { public void handleMatch(StringBuffer buffer, Matcher matcher) { int contentStart = matcher.start(4); int contentEnd = matcher.end(4); int start = contentStart; int end = contentEnd; list.add(new Region(HeadingFilter.this, false, start, end, contentStart, contentEnd)); }//from ww w . j a v a 2s. c o m }); return list; }
From source file:com.agloco.util.StringUtil.java
public static String replace(String source, String regex, int index, String replacement) { StringBuffer result = new StringBuffer(); if (source != null && regex != null) { Pattern p = Pattern.compile(regex); Matcher m = p.matcher(source); boolean find = m.find(0); if (find && m.groupCount() >= index) { if (m.start(index) > 0) result.append(source.substring(0, m.start(index))); result.append(replacement);//from w ww . ja v a 2s.c o m if (m.end(index) < source.length()) result.append(source.substring(m.end(index))); } else { result.append(source); } } return result.toString(); }
From source file:com.edgenius.wiki.render.filter.UserFilter.java
public List<Region> getRegions(CharSequence input) { final List<Region> list = new ArrayList<Region>(); regexProvider.replaceByTokenVisitor(input, new TokenVisitor<Matcher>() { public void handleMatch(StringBuffer buffer, Matcher matcher) { int contentStart = matcher.start(2); int contentEnd = matcher.end(2); int start = contentStart - 1; int end = contentEnd + 1; list.add(new Region(UserFilter.this, true, start, end, contentStart, contentEnd)); }//from ww w .jav a 2 s .c o m }); return list; }
From source file:io.github.seleniumquery.by.common.preparser.NotEqualsAttributeSelectorFix.java
/** * Boldly attempts to change all "element[attribute!=value]" into a "element:not([attribute=value])". * * It uses regex, not a parser. In an idel world, the CSS parser would accept such syntax, but it'd * be too much work to fork and extend it (and keep it updated). * * The method tries to ignore declarations inside strings, such as: "element:contains('[attribute!=value]')", * and ":contains()" contents, even when not escaped, such as: "element:contains([attribute!=value])", * which is a valid selector, btw./* www . j ava2 s .c om*/ * * @param input An ordinary selector. * @return The same selector with the "[attribute!=value]"s turned to ":not([attribute=value])". */ public String turnAttributeNotEqualsIntoNotAttributeEquals(String input) { if (!input.matches(".*" + ATTR_NEQ_REGEX + ".*")) { return input; } String inputWithoutStrings = removeStrings(input); String inputWithoutStringsAndContains = removeContains(inputWithoutStrings); StringBuilder sb = new StringBuilder(input); Matcher m = ATTR_NEQ_PATTERN.matcher(inputWithoutStringsAndContains); while (m.find()) { String leftPart = input.substring(m.start(1), m.end(1)); String rightPart = input.substring(m.start(2), m.end(2)); sb.replace(m.start(1), m.end(2), ":not(" + leftPart + rightPart + ")"); } return sb.toString(); }
From source file:com.edgenius.wiki.render.filter.ImageFilter.java
public List<Region> getRegions(CharSequence input) { final List<Region> list = new ArrayList<Region>(); regexProvider.replaceByTokenVisitor(input, new TokenVisitor<Matcher>() { public void handleMatch(StringBuffer buffer, Matcher matcher) { int contentStart = matcher.start(2); int contentEnd = matcher.end(2); int start = contentStart - 1; int end = contentEnd + 1; //!image! only 1 region, and it is immutable list.add(new Region(ImageFilter.this, true, start, end, contentStart, contentEnd)); }//from www.jav a 2s.c o m }); return list; }
From source file:com.boozallen.cognition.ingest.storm.bolt.starter.LineRegexReplaceInRegionBolt.java
String replace(String record, Pattern pattern, String regex, String replacement) { Matcher match = pattern.matcher(record); if (match.find() && match.groupCount() > 0) { // only replace the first group int startPos = match.start(1); int stopPos = match.end(1); String replaceString = match.group(1).replaceAll(regex, replacement); return record.substring(0, startPos) + replaceString + record.substring(stopPos); } else {//from w w w . j a v a 2s .co m // no match, returns original return record; } }
From source file:fr.eurecom.nerd.core.proxy.SaploClient.java
private List<TEntity> parse(List<SaploTag> tags, String text, OntologyType otype) { List<TEntity> result = new LinkedList<TEntity>(); Map<String, Integer> map = new HashMap<String, Integer>(); for (SaploTag tag : tags) { String label = tag.getTagWord(); String type = tag.getCategory().toString(); String nerdType = OntoFactory.mapper.getNerdType(otype, label, SOURCE, type).toString(); Double confidence = tag.getRelevance(); //logic to compute the startchar and endchar of the entity within the text Integer startchar = null, endchar = null; if (map.containsKey(label)) { int value = map.get(label); map.remove(label);//w ww. ja v a 2s. co m map.put(label, new Integer(value + 1)); } else map.put(label, new Integer(1)); try { Pattern p = Pattern.compile("\\b" + label + "\\b"); Matcher m = p.matcher(text); for (int j = 0; j < map.get(label) && m.find(); j++) { startchar = m.start(0); endchar = m.end(0); if (containsAtIndex(result, startchar, endchar)) j--; } if (startchar != null && endchar != null) { TEntity extraction = new TEntity(label, type, null, nerdType.toString(), startchar, endchar, confidence, SOURCE); result.add(extraction); } } catch (PatternSyntaxException eregex) { eregex.printStackTrace(); } } return result; }
From source file:com.example.app.support.address.AddressParser.java
/** * Passes the address to the jgeocoder address parser and * formats the results into a string map. Also handles * the strange case of highways -- that is, street type * PRECEDING street_number./* www.j a v a 2s . c o m*/ * * @param address the address * @param autoCorrectSpelling flag. * * @return the results. */ private static Map<AddressComponent, String> prepareAddressForParsingAndParse(String address, boolean autoCorrectSpelling) { Map<AddressComponent, String> results = jGeocodeParseAddress(address, autoCorrectSpelling); if (results == null) { return null; // Something went wrong in the process of parsing the address } String splitStreetNumber = results.get(NUMBER); String splitStreetDir = results.get(PREDIR); String splitStreetName = results.get(STREET); String splitStreetType = results.get(TYPE); String splitUnitNumber = results.get(LINE2); Collection<String> streetDirs = getStreetDirs(); /* if the street name is a direction then chances * are that the street direction was pulled into * the street name incorrectly and the street name * is in the unit number (following conditional) */ if (splitStreetName != null && streetDirs.contains(splitStreetName.toLowerCase()) && splitStreetDir == null) { splitStreetDir = splitStreetName; splitStreetName = null; } // We need to do some switching if the street name is null (usually a result of Hwy 19 or Road 5) // -- Second case --> Or if street type is null and unit number is non null we assume that part of the street // name got pulled into the unit number if ((splitStreetName == null && splitStreetType != null && splitUnitNumber != null) || (splitStreetType == null && splitUnitNumber != null)) { if (splitStreetName == null) { splitStreetName = ""; } // if we have a street type then we are going to want to pull in // the first part of the unit number into street name (e.g. hwy 19 or road 52) if (splitStreetType != null) { Pattern p1 = Pattern.compile("^\\s?(\\S+)\\s?"); Matcher m1 = p1.matcher(splitUnitNumber); if (m1.find()) { String streetTypePart = m1.group(1); splitStreetName = createStreetName(splitStreetName, splitStreetType, streetTypePart); splitUnitNumber = splitUnitNumber.substring(m1.end(0)); // since we pulled in the street type we don't want to pull it in again if the next matcher matches splitStreetType = null; } } // Get the indexes of unit or a hash mark as clues for where to split int unitIndex = splitUnitNumber.toLowerCase().indexOf("unit"); int hashIndex = splitUnitNumber.indexOf(HASHCODE_VALUE); // Part of the unit number to be removed and added to street name String partOfStreetName = ""; // We want to leave the part of the unit number that has a hash mark or the string "unit" // in the unit number if (unitIndex != -1 || hashIndex != -1) { if (unitIndex != -1) { if (unitIndex == 0) // Unit number starts with a # so the whole unit number stays as the unit number { partOfStreetName = ""; } else { partOfStreetName = splitUnitNumber.substring(0, unitIndex - 1); } splitUnitNumber = splitUnitNumber.substring(unitIndex); } else { if (hashIndex == 0) // Unit number starts with a # so the whole unit number stays as the unit number { partOfStreetName = ""; } else { partOfStreetName = splitUnitNumber.substring(0, hashIndex - 1); } splitUnitNumber = splitUnitNumber.substring(hashIndex); } splitStreetName = createStreetName(splitStreetName, splitStreetType, partOfStreetName); } else // We can't find any clues regarding what part of the unit number is actually the unit number { // Pattern to get the last string so we can look at it to see if it looks like a unit number // We assume that any string less than 3 characters or that is a number is a unit number Pattern p2 = Pattern.compile("\\s?(\\S+)$"); Matcher m2 = p2.matcher(splitUnitNumber); if (m2.find()) { // the last word in the unit number is what we look at to try to guess // whether it is a unit number String lastWord = m2.group(1); if (lastWord.length() < 3 || isNumeric(lastWord)) { partOfStreetName = splitUnitNumber.substring(0, m2.start(0)); splitStreetName = createStreetName(splitStreetName, splitStreetType, partOfStreetName); splitUnitNumber = lastWord; } else { splitStreetName += ' ' + splitUnitNumber; splitUnitNumber = null; } } } // Either streetType was pulled into streetName or it started as null so it should be null splitStreetType = null; // set splitUnitNumber to null if it's empty if (splitUnitNumber != null && splitUnitNumber.isEmpty()) { splitUnitNumber = null; } } results.put(NUMBER, splitStreetNumber); results.put(PREDIR, splitStreetDir); results.put(STREET, splitStreetName); results.put(TYPE, splitStreetType); results.put(LINE2, splitUnitNumber); return results; }
From source file:fr.eurecom.nerd.core.proxy.WikimetaClient.java
public List<TEntity> parse(String json, String text, OntologyType otype) throws IOException { List<TEntity> result = new LinkedList<TEntity>(); Map<String, Integer> map = new HashMap<String, Integer>(); try {/*from ww w. j a va2 s .c o m*/ JSONObject o = new JSONObject(json); JSONArray jadocument = o.getJSONArray("document"); // 3 items is Named Entities JSONObject jodocument = jadocument.getJSONObject(2); JSONArray jsonarray = jodocument.getJSONArray("Named Entities"); for (int i = 0; i < jsonarray.length(); i++) { JSONObject jo = jsonarray.getJSONObject(i); String entity = jo.getString("EN"); String type = (jo.getString("type").equals("")) ? null : jo.getString("type"); String nerdType = OntoFactory.mapper.getNerdType(otype, entity, SOURCE, type).toString(); String uri = jo.getString("URI"); //logic to compute the startchar and endchar of the entity within the text Integer startchar = null, endchar = null; if (map.containsKey(entity)) { int value = map.get(entity); map.remove(entity); map.put(entity, new Integer(value + 1)); } else map.put(entity, new Integer(1)); try { Pattern p = Pattern.compile("\\b" + entity + "\\b"); Matcher m = p.matcher(text); for (int j = 0; j < map.get(entity) && m.find(); j++) { startchar = m.start(0); endchar = m.end(0); if (containsAtIndex(result, startchar, endchar)) j--; } double confidence = 0.0; if (!jo.getString("confidenceScore").equals("")) confidence = Double.parseDouble(jo.getString("confidenceScore")); if (startchar != null && endchar != null) { TEntity extraction = new TEntity(entity, type, uri, nerdType.toString(), startchar, endchar, confidence, SOURCE); result.add(extraction); } } catch (PatternSyntaxException eregex) { eregex.printStackTrace(); } } } catch (JSONException e) { e.printStackTrace(); } return result; }