Example usage for java.util.regex Matcher end

Introduction

In this page you can find the example usage for java.util.regex Matcher end.

Prototype

public int end(String name)

Source Link

Document

Returns the offset after the last character of the subsequence captured by the given named-capturing group during the previous match operation.

Usage

From source file:com.example.app.support.address.AddressParser.java

/**
 * Passes the address to the jgeocoder address parser and
 * formats the results into a string map. Also handles
 * the strange case of highways -- that is, street type
 * PRECEDING street_number.//w  ww  .jav  a  2  s .c om
 *
 * @param address the address
 * @param autoCorrectSpelling flag.
 *
 * @return the results.
 */
private static Map<AddressComponent, String> prepareAddressForParsingAndParse(String address,
        boolean autoCorrectSpelling) {
    Map<AddressComponent, String> results = jGeocodeParseAddress(address, autoCorrectSpelling);
    if (results == null) {
        return null; // Something went wrong in the process of parsing the address
    }

    String splitStreetNumber = results.get(NUMBER);
    String splitStreetDir = results.get(PREDIR);
    String splitStreetName = results.get(STREET);
    String splitStreetType = results.get(TYPE);
    String splitUnitNumber = results.get(LINE2);

    Collection<String> streetDirs = getStreetDirs();

    /* if the street name is a direction then chances
     * are that the street direction was pulled into
     * the street name incorrectly and the street name
     * is in the unit number (following conditional)
     */
    if (splitStreetName != null && streetDirs.contains(splitStreetName.toLowerCase())
            && splitStreetDir == null) {
        splitStreetDir = splitStreetName;
        splitStreetName = null;
    }

    // We need to do some switching if the street name is null (usually a result of Hwy 19 or Road 5)
    // -- Second case --> Or if street type is null and unit number is non null we assume that part of the street
    // name got pulled into the unit number
    if ((splitStreetName == null && splitStreetType != null && splitUnitNumber != null)
            || (splitStreetType == null && splitUnitNumber != null)) {
        if (splitStreetName == null) {
            splitStreetName = "";
        }

        // if we have a street type then we are going to want to pull in
        // the first part of the unit number into street name (e.g. hwy 19 or road 52)
        if (splitStreetType != null) {
            Pattern p1 = Pattern.compile("^\\s?(\\S+)\\s?");
            Matcher m1 = p1.matcher(splitUnitNumber);

            if (m1.find()) {
                String streetTypePart = m1.group(1);
                splitStreetName = createStreetName(splitStreetName, splitStreetType, streetTypePart);
                splitUnitNumber = splitUnitNumber.substring(m1.end(0));
                // since we pulled in the street type we don't want to pull it in again if the next matcher matches
                splitStreetType = null;
            }
        }

        // Get the indexes of unit or a hash mark as clues for where to split
        int unitIndex = splitUnitNumber.toLowerCase().indexOf("unit");
        int hashIndex = splitUnitNumber.indexOf(HASHCODE_VALUE);

        // Part of the unit number to be removed and added to street name
        String partOfStreetName = "";
        // We want to leave the part of the unit number that has a hash mark or the string "unit"
        // in the unit number
        if (unitIndex != -1 || hashIndex != -1) {
            if (unitIndex != -1) {

                if (unitIndex == 0) // Unit number starts with a # so the whole unit number stays as the unit number
                {
                    partOfStreetName = "";
                } else {
                    partOfStreetName = splitUnitNumber.substring(0, unitIndex - 1);
                }
                splitUnitNumber = splitUnitNumber.substring(unitIndex);
            } else {
                if (hashIndex == 0) // Unit number starts with a # so the whole unit number stays as the unit number
                {
                    partOfStreetName = "";
                } else {
                    partOfStreetName = splitUnitNumber.substring(0, hashIndex - 1);
                }
                splitUnitNumber = splitUnitNumber.substring(hashIndex);
            }
            splitStreetName = createStreetName(splitStreetName, splitStreetType, partOfStreetName);
        } else // We can't find any clues regarding what part of the unit number is actually the unit number
        {
            // Pattern to get the last string so we can look at it to see if it looks like a unit number
            // We assume that any string less than 3 characters or that is a number is a unit number
            Pattern p2 = Pattern.compile("\\s?(\\S+)$");
            Matcher m2 = p2.matcher(splitUnitNumber);

            if (m2.find()) {
                // the last word in the unit number is what we look at to try to guess
                // whether it is a unit number
                String lastWord = m2.group(1);
                if (lastWord.length() < 3 || isNumeric(lastWord)) {
                    partOfStreetName = splitUnitNumber.substring(0, m2.start(0));
                    splitStreetName = createStreetName(splitStreetName, splitStreetType, partOfStreetName);
                    splitUnitNumber = lastWord;
                } else {
                    splitStreetName += ' ' + splitUnitNumber;
                    splitUnitNumber = null;
                }
            }
        }
        // Either streetType was pulled into streetName or it started as null so it should be null
        splitStreetType = null;

        // set splitUnitNumber to null if it's empty
        if (splitUnitNumber != null && splitUnitNumber.isEmpty()) {
            splitUnitNumber = null;
        }
    }

    results.put(NUMBER, splitStreetNumber);
    results.put(PREDIR, splitStreetDir);
    results.put(STREET, splitStreetName);
    results.put(TYPE, splitStreetType);
    results.put(LINE2, splitUnitNumber);

    return results;
}

From source file:io.github.seleniumquery.by.common.preparser.NotEqualsAttributeSelectorFix.java

/**
 * Boldly attempts to change all "element[attribute!=value]" into a "element:not([attribute=value])".
 *
 * It uses regex, not a parser. In an idel world, the CSS parser would accept such syntax, but it'd
 * be too much work to fork and extend it (and keep it updated).
 *
 * The method tries to ignore declarations inside strings, such as: "element:contains('[attribute!=value]')",
 * and ":contains()" contents, even when not escaped, such as: "element:contains([attribute!=value])",
 * which is a valid selector, btw.//from  ww  w  . j a va 2  s  .c  o  m
 *
 * @param input An ordinary selector.
 * @return The same selector with the "[attribute!=value]"s turned to ":not([attribute=value])".
 */
public String turnAttributeNotEqualsIntoNotAttributeEquals(String input) {
    if (!input.matches(".*" + ATTR_NEQ_REGEX + ".*")) {
        return input;
    }
    String inputWithoutStrings = removeStrings(input);
    String inputWithoutStringsAndContains = removeContains(inputWithoutStrings);

    StringBuilder sb = new StringBuilder(input);

    Matcher m = ATTR_NEQ_PATTERN.matcher(inputWithoutStringsAndContains);
    while (m.find()) {
        String leftPart = input.substring(m.start(1), m.end(1));
        String rightPart = input.substring(m.start(2), m.end(2));
        sb.replace(m.start(1), m.end(2), ":not(" + leftPart + rightPart + ")");
    }

    return sb.toString();
}

From source file:com.edgenius.wiki.render.filter.UserFilter.java

public List<Region> getRegions(CharSequence input) {
    final List<Region> list = new ArrayList<Region>();
    regexProvider.replaceByTokenVisitor(input, new TokenVisitor<Matcher>() {
        public void handleMatch(StringBuffer buffer, Matcher matcher) {
            int contentStart = matcher.start(2);
            int contentEnd = matcher.end(2);
            int start = contentStart - 1;
            int end = contentEnd + 1;
            list.add(new Region(UserFilter.this, true, start, end, contentStart, contentEnd));
        }//www . j  a  va2  s  .  c om

    });
    return list;
}

From source file:info.magnolia.cms.filters.Mapping.java

/**
 * Determines the index of the first pathInfo character. If the uri does not match any mapping
 * this method returns -1.//  www .j av  a  2 s .c  o  m
 */
private int determineMatchingEnd(Matcher matcher) {
    if (matcher == null) {
        return -1;
    }
    if (matcher.groupCount() > 0) {
        return matcher.end(1);
    }
    return matcher.end();
}

From source file:com.edgenius.wiki.render.filter.ImageFilter.java

public List<Region> getRegions(CharSequence input) {
    final List<Region> list = new ArrayList<Region>();
    regexProvider.replaceByTokenVisitor(input, new TokenVisitor<Matcher>() {
        public void handleMatch(StringBuffer buffer, Matcher matcher) {
            int contentStart = matcher.start(2);
            int contentEnd = matcher.end(2);
            int start = contentStart - 1;
            int end = contentEnd + 1;

            //!image! only 1 region, and it is immutable 
            list.add(new Region(ImageFilter.this, true, start, end, contentStart, contentEnd));
        }/*  ww w  .  ja v a2s  .co  m*/
    });
    return list;
}

From source file:com.agloco.util.StringUtil.java

public static String replace(String source, String regex, int index, String replacement) {
    StringBuffer result = new StringBuffer();
    if (source != null && regex != null) {
        Pattern p = Pattern.compile(regex);
        Matcher m = p.matcher(source);
        boolean find = m.find(0);
        if (find && m.groupCount() >= index) {
            if (m.start(index) > 0)
                result.append(source.substring(0, m.start(index)));
            result.append(replacement);//  w w w.j a  v a2  s .  com
            if (m.end(index) < source.length())
                result.append(source.substring(m.end(index)));
        } else {
            result.append(source);
        }
    }
    return result.toString();
}

From source file:com.boozallen.cognition.ingest.storm.bolt.starter.LineRegexReplaceInRegionBolt.java

String replace(String record, Pattern pattern, String regex, String replacement) {
    Matcher match = pattern.matcher(record);
    if (match.find() && match.groupCount() > 0) {
        // only replace the first group
        int startPos = match.start(1);
        int stopPos = match.end(1);

        String replaceString = match.group(1).replaceAll(regex, replacement);
        return record.substring(0, startPos) + replaceString + record.substring(stopPos);
    } else {/* ww  w .  jav a  2  s . c  o  m*/
        // no match, returns original
        return record;
    }
}

From source file:fr.eurecom.nerd.core.proxy.SaploClient.java

private List<TEntity> parse(List<SaploTag> tags, String text, OntologyType otype) {
    List<TEntity> result = new LinkedList<TEntity>();
    Map<String, Integer> map = new HashMap<String, Integer>();

    for (SaploTag tag : tags) {
        String label = tag.getTagWord();
        String type = tag.getCategory().toString();
        String nerdType = OntoFactory.mapper.getNerdType(otype, label, SOURCE, type).toString();
        Double confidence = tag.getRelevance();

        //logic to compute the startchar and endchar of the entity within the text
        Integer startchar = null, endchar = null;
        if (map.containsKey(label)) {
            int value = map.get(label);
            map.remove(label);/* ww w .  j  av a  2 s .  c om*/
            map.put(label, new Integer(value + 1));
        } else
            map.put(label, new Integer(1));

        try {
            Pattern p = Pattern.compile("\\b" + label + "\\b");
            Matcher m = p.matcher(text);
            for (int j = 0; j < map.get(label) && m.find(); j++) {
                startchar = m.start(0);
                endchar = m.end(0);
                if (containsAtIndex(result, startchar, endchar))
                    j--;
            }

            if (startchar != null && endchar != null) {
                TEntity extraction = new TEntity(label, type, null, nerdType.toString(), startchar, endchar,
                        confidence, SOURCE);

                result.add(extraction);
            }
        } catch (PatternSyntaxException eregex) {
            eregex.printStackTrace();
        }
    }

    return result;
}

From source file:i18nplugin.PropertiesEntryNode.java

private List<String> getArgumentList(String input) {
    List<String> args = new ArrayList<String>();
    Matcher argumentMatcher = PATTERN_ARGUMENTS.matcher(input);
    int index = 0;
    while (argumentMatcher.matches()) {
        String argument = argumentMatcher.group(1);
        index += argumentMatcher.end(1);
        args.add(argument);//from w  w w.  ja v  a 2 s.co m
        argumentMatcher = PATTERN_ARGUMENTS.matcher(input.substring(index));
    }
    Collections.sort(args);
    return args;
}

From source file:com.zextras.zimbradrive.soap.SearchRequestHdlr.java

private String getStandardQuery(String query) {
    StringBuilder parsedQueryBuilder = new StringBuilder();

    Pattern nonQuotedTokenSValuePattern = Pattern.compile("([^ :]+:)([^\"]*?)( |$)"); //preTokenDelimiter tokenName : nonQuotedTokenValue postTokenDelimiter
    Matcher nonQuotedTokenSValueMatcher = nonQuotedTokenSValuePattern.matcher(query);
    int lastMatchEndIndex = 0;
    while (nonQuotedTokenSValueMatcher.find()) {
        String preMatchValueQuery = query.substring(lastMatchEndIndex, nonQuotedTokenSValueMatcher.end(1));

        String matchValueQuery = query.substring(nonQuotedTokenSValueMatcher.start(2),
                nonQuotedTokenSValueMatcher.end(2));

        parsedQueryBuilder.append(preMatchValueQuery).append("\"").append(matchValueQuery).append("\"");

        lastMatchEndIndex = nonQuotedTokenSValueMatcher.end(2);
    }/* w ww .j ava 2 s. com*/

    parsedQueryBuilder.append(query.substring(lastMatchEndIndex));

    return parsedQueryBuilder.toString();
}