Example usage for java.util.regex Matcher start

Introduction

In this page you can find the example usage for java.util.regex Matcher start.

Prototype

public int start(String name)

Source Link

Document

Returns the start index of the subsequence captured by the given named-capturing group during the previous match operation.

Usage

From source file:com.edgenius.wiki.render.filter.HeadingFilter.java

public List<Region> getRegions(CharSequence input) {
    final List<Region> list = new ArrayList<Region>();
    regexProvider.replaceByTokenVisitor(input, new TokenVisitor<Matcher>() {
        public void handleMatch(StringBuffer buffer, Matcher matcher) {
            int contentStart = matcher.start(4);
            int contentEnd = matcher.end(4);
            int start = contentStart;
            int end = contentEnd;
            list.add(new Region(HeadingFilter.this, false, start, end, contentStart, contentEnd));
        }//from  ww w . j a v  a 2s.  c  o m

    });
    return list;
}

From source file:com.agloco.util.StringUtil.java

public static String replace(String source, String regex, int index, String replacement) {
    StringBuffer result = new StringBuffer();
    if (source != null && regex != null) {
        Pattern p = Pattern.compile(regex);
        Matcher m = p.matcher(source);
        boolean find = m.find(0);
        if (find && m.groupCount() >= index) {
            if (m.start(index) > 0)
                result.append(source.substring(0, m.start(index)));
            result.append(replacement);//from w ww  .  ja v a  2s.c o  m
            if (m.end(index) < source.length())
                result.append(source.substring(m.end(index)));
        } else {
            result.append(source);
        }
    }
    return result.toString();
}

From source file:com.edgenius.wiki.render.filter.UserFilter.java

public List<Region> getRegions(CharSequence input) {
    final List<Region> list = new ArrayList<Region>();
    regexProvider.replaceByTokenVisitor(input, new TokenVisitor<Matcher>() {
        public void handleMatch(StringBuffer buffer, Matcher matcher) {
            int contentStart = matcher.start(2);
            int contentEnd = matcher.end(2);
            int start = contentStart - 1;
            int end = contentEnd + 1;
            list.add(new Region(UserFilter.this, true, start, end, contentStart, contentEnd));
        }//from ww  w .jav  a 2  s  .c o m

    });
    return list;
}

From source file:io.github.seleniumquery.by.common.preparser.NotEqualsAttributeSelectorFix.java

/**
 * Boldly attempts to change all "element[attribute!=value]" into a "element:not([attribute=value])".
 *
 * It uses regex, not a parser. In an idel world, the CSS parser would accept such syntax, but it'd
 * be too much work to fork and extend it (and keep it updated).
 *
 * The method tries to ignore declarations inside strings, such as: "element:contains('[attribute!=value]')",
 * and ":contains()" contents, even when not escaped, such as: "element:contains([attribute!=value])",
 * which is a valid selector, btw./*  www .  j  ava2  s  .c  om*/
 *
 * @param input An ordinary selector.
 * @return The same selector with the "[attribute!=value]"s turned to ":not([attribute=value])".
 */
public String turnAttributeNotEqualsIntoNotAttributeEquals(String input) {
    if (!input.matches(".*" + ATTR_NEQ_REGEX + ".*")) {
        return input;
    }
    String inputWithoutStrings = removeStrings(input);
    String inputWithoutStringsAndContains = removeContains(inputWithoutStrings);

    StringBuilder sb = new StringBuilder(input);

    Matcher m = ATTR_NEQ_PATTERN.matcher(inputWithoutStringsAndContains);
    while (m.find()) {
        String leftPart = input.substring(m.start(1), m.end(1));
        String rightPart = input.substring(m.start(2), m.end(2));
        sb.replace(m.start(1), m.end(2), ":not(" + leftPart + rightPart + ")");
    }

    return sb.toString();
}

From source file:com.edgenius.wiki.render.filter.ImageFilter.java

public List<Region> getRegions(CharSequence input) {
    final List<Region> list = new ArrayList<Region>();
    regexProvider.replaceByTokenVisitor(input, new TokenVisitor<Matcher>() {
        public void handleMatch(StringBuffer buffer, Matcher matcher) {
            int contentStart = matcher.start(2);
            int contentEnd = matcher.end(2);
            int start = contentStart - 1;
            int end = contentEnd + 1;

            //!image! only 1 region, and it is immutable 
            list.add(new Region(ImageFilter.this, true, start, end, contentStart, contentEnd));
        }//from   www.jav  a 2s.c  o m
    });
    return list;
}

From source file:com.boozallen.cognition.ingest.storm.bolt.starter.LineRegexReplaceInRegionBolt.java

String replace(String record, Pattern pattern, String regex, String replacement) {
    Matcher match = pattern.matcher(record);
    if (match.find() && match.groupCount() > 0) {
        // only replace the first group
        int startPos = match.start(1);
        int stopPos = match.end(1);

        String replaceString = match.group(1).replaceAll(regex, replacement);
        return record.substring(0, startPos) + replaceString + record.substring(stopPos);
    } else {//from w w w .  j a  v a  2s  .co m
        // no match, returns original
        return record;
    }
}

From source file:fr.eurecom.nerd.core.proxy.SaploClient.java

private List<TEntity> parse(List<SaploTag> tags, String text, OntologyType otype) {
    List<TEntity> result = new LinkedList<TEntity>();
    Map<String, Integer> map = new HashMap<String, Integer>();

    for (SaploTag tag : tags) {
        String label = tag.getTagWord();
        String type = tag.getCategory().toString();
        String nerdType = OntoFactory.mapper.getNerdType(otype, label, SOURCE, type).toString();
        Double confidence = tag.getRelevance();

        //logic to compute the startchar and endchar of the entity within the text
        Integer startchar = null, endchar = null;
        if (map.containsKey(label)) {
            int value = map.get(label);
            map.remove(label);//w  ww. ja v a  2s. co m
            map.put(label, new Integer(value + 1));
        } else
            map.put(label, new Integer(1));

        try {
            Pattern p = Pattern.compile("\\b" + label + "\\b");
            Matcher m = p.matcher(text);
            for (int j = 0; j < map.get(label) && m.find(); j++) {
                startchar = m.start(0);
                endchar = m.end(0);
                if (containsAtIndex(result, startchar, endchar))
                    j--;
            }

            if (startchar != null && endchar != null) {
                TEntity extraction = new TEntity(label, type, null, nerdType.toString(), startchar, endchar,
                        confidence, SOURCE);

                result.add(extraction);
            }
        } catch (PatternSyntaxException eregex) {
            eregex.printStackTrace();
        }
    }

    return result;
}

From source file:com.example.app.support.address.AddressParser.java

/**
 * Passes the address to the jgeocoder address parser and
 * formats the results into a string map. Also handles
 * the strange case of highways -- that is, street type
 * PRECEDING street_number./*  www.j a v  a 2s .  c o m*/
 *
 * @param address the address
 * @param autoCorrectSpelling flag.
 *
 * @return the results.
 */
private static Map<AddressComponent, String> prepareAddressForParsingAndParse(String address,
        boolean autoCorrectSpelling) {
    Map<AddressComponent, String> results = jGeocodeParseAddress(address, autoCorrectSpelling);
    if (results == null) {
        return null; // Something went wrong in the process of parsing the address
    }

    String splitStreetNumber = results.get(NUMBER);
    String splitStreetDir = results.get(PREDIR);
    String splitStreetName = results.get(STREET);
    String splitStreetType = results.get(TYPE);
    String splitUnitNumber = results.get(LINE2);

    Collection<String> streetDirs = getStreetDirs();

    /* if the street name is a direction then chances
     * are that the street direction was pulled into
     * the street name incorrectly and the street name
     * is in the unit number (following conditional)
     */
    if (splitStreetName != null && streetDirs.contains(splitStreetName.toLowerCase())
            && splitStreetDir == null) {
        splitStreetDir = splitStreetName;
        splitStreetName = null;
    }

    // We need to do some switching if the street name is null (usually a result of Hwy 19 or Road 5)
    // -- Second case --> Or if street type is null and unit number is non null we assume that part of the street
    // name got pulled into the unit number
    if ((splitStreetName == null && splitStreetType != null && splitUnitNumber != null)
            || (splitStreetType == null && splitUnitNumber != null)) {
        if (splitStreetName == null) {
            splitStreetName = "";
        }

        // if we have a street type then we are going to want to pull in
        // the first part of the unit number into street name (e.g. hwy 19 or road 52)
        if (splitStreetType != null) {
            Pattern p1 = Pattern.compile("^\\s?(\\S+)\\s?");
            Matcher m1 = p1.matcher(splitUnitNumber);

            if (m1.find()) {
                String streetTypePart = m1.group(1);
                splitStreetName = createStreetName(splitStreetName, splitStreetType, streetTypePart);
                splitUnitNumber = splitUnitNumber.substring(m1.end(0));
                // since we pulled in the street type we don't want to pull it in again if the next matcher matches
                splitStreetType = null;
            }
        }

        // Get the indexes of unit or a hash mark as clues for where to split
        int unitIndex = splitUnitNumber.toLowerCase().indexOf("unit");
        int hashIndex = splitUnitNumber.indexOf(HASHCODE_VALUE);

        // Part of the unit number to be removed and added to street name
        String partOfStreetName = "";
        // We want to leave the part of the unit number that has a hash mark or the string "unit"
        // in the unit number
        if (unitIndex != -1 || hashIndex != -1) {
            if (unitIndex != -1) {

                if (unitIndex == 0) // Unit number starts with a # so the whole unit number stays as the unit number
                {
                    partOfStreetName = "";
                } else {
                    partOfStreetName = splitUnitNumber.substring(0, unitIndex - 1);
                }
                splitUnitNumber = splitUnitNumber.substring(unitIndex);
            } else {
                if (hashIndex == 0) // Unit number starts with a # so the whole unit number stays as the unit number
                {
                    partOfStreetName = "";
                } else {
                    partOfStreetName = splitUnitNumber.substring(0, hashIndex - 1);
                }
                splitUnitNumber = splitUnitNumber.substring(hashIndex);
            }
            splitStreetName = createStreetName(splitStreetName, splitStreetType, partOfStreetName);
        } else // We can't find any clues regarding what part of the unit number is actually the unit number
        {
            // Pattern to get the last string so we can look at it to see if it looks like a unit number
            // We assume that any string less than 3 characters or that is a number is a unit number
            Pattern p2 = Pattern.compile("\\s?(\\S+)$");
            Matcher m2 = p2.matcher(splitUnitNumber);

            if (m2.find()) {
                // the last word in the unit number is what we look at to try to guess
                // whether it is a unit number
                String lastWord = m2.group(1);
                if (lastWord.length() < 3 || isNumeric(lastWord)) {
                    partOfStreetName = splitUnitNumber.substring(0, m2.start(0));
                    splitStreetName = createStreetName(splitStreetName, splitStreetType, partOfStreetName);
                    splitUnitNumber = lastWord;
                } else {
                    splitStreetName += ' ' + splitUnitNumber;
                    splitUnitNumber = null;
                }
            }
        }
        // Either streetType was pulled into streetName or it started as null so it should be null
        splitStreetType = null;

        // set splitUnitNumber to null if it's empty
        if (splitUnitNumber != null && splitUnitNumber.isEmpty()) {
            splitUnitNumber = null;
        }
    }

    results.put(NUMBER, splitStreetNumber);
    results.put(PREDIR, splitStreetDir);
    results.put(STREET, splitStreetName);
    results.put(TYPE, splitStreetType);
    results.put(LINE2, splitUnitNumber);

    return results;
}

From source file:fr.eurecom.nerd.core.proxy.WikimetaClient.java

public List<TEntity> parse(String json, String text, OntologyType otype) throws IOException {
    List<TEntity> result = new LinkedList<TEntity>();
    Map<String, Integer> map = new HashMap<String, Integer>();

    try {/*from ww  w. j a  va2  s  .c  o m*/
        JSONObject o = new JSONObject(json);
        JSONArray jadocument = o.getJSONArray("document");

        // 3 items is Named Entities
        JSONObject jodocument = jadocument.getJSONObject(2);
        JSONArray jsonarray = jodocument.getJSONArray("Named Entities");

        for (int i = 0; i < jsonarray.length(); i++) {
            JSONObject jo = jsonarray.getJSONObject(i);
            String entity = jo.getString("EN");
            String type = (jo.getString("type").equals("")) ? null : jo.getString("type");
            String nerdType = OntoFactory.mapper.getNerdType(otype, entity, SOURCE, type).toString();
            String uri = jo.getString("URI");

            //logic to compute the startchar and endchar of the entity within the text
            Integer startchar = null, endchar = null;
            if (map.containsKey(entity)) {
                int value = map.get(entity);
                map.remove(entity);
                map.put(entity, new Integer(value + 1));
            } else
                map.put(entity, new Integer(1));

            try {
                Pattern p = Pattern.compile("\\b" + entity + "\\b");
                Matcher m = p.matcher(text);
                for (int j = 0; j < map.get(entity) && m.find(); j++) {
                    startchar = m.start(0);
                    endchar = m.end(0);
                    if (containsAtIndex(result, startchar, endchar))
                        j--;
                }

                double confidence = 0.0;
                if (!jo.getString("confidenceScore").equals(""))
                    confidence = Double.parseDouble(jo.getString("confidenceScore"));

                if (startchar != null && endchar != null) {
                    TEntity extraction = new TEntity(entity, type, uri, nerdType.toString(), startchar, endchar,
                            confidence, SOURCE);

                    result.add(extraction);
                }
            } catch (PatternSyntaxException eregex) {
                eregex.printStackTrace();
            }
        }
    } catch (JSONException e) {
        e.printStackTrace();
    }
    return result;
}