Example usage for java.util.regex Matcher end

List of usage examples for java.util.regex Matcher end

Introduction

In this page you can find the example usage for java.util.regex Matcher end.

Prototype

public int end(String name) 

Source Link

Document

Returns the offset after the last character of the subsequence captured by the given named-capturing group during the previous match operation.

Usage

From source file:fr.eurecom.nerd.core.proxy.WikimetaClient.java

public List<TEntity> parse(String json, String text, OntologyType otype) throws IOException {
    List<TEntity> result = new LinkedList<TEntity>();
    Map<String, Integer> map = new HashMap<String, Integer>();

    try {/* ww  w .ja v  a 2  s.  c  o m*/
        JSONObject o = new JSONObject(json);
        JSONArray jadocument = o.getJSONArray("document");

        // 3 items is Named Entities
        JSONObject jodocument = jadocument.getJSONObject(2);
        JSONArray jsonarray = jodocument.getJSONArray("Named Entities");

        for (int i = 0; i < jsonarray.length(); i++) {
            JSONObject jo = jsonarray.getJSONObject(i);
            String entity = jo.getString("EN");
            String type = (jo.getString("type").equals("")) ? null : jo.getString("type");
            String nerdType = OntoFactory.mapper.getNerdType(otype, entity, SOURCE, type).toString();
            String uri = jo.getString("URI");

            //logic to compute the startchar and endchar of the entity within the text
            Integer startchar = null, endchar = null;
            if (map.containsKey(entity)) {
                int value = map.get(entity);
                map.remove(entity);
                map.put(entity, new Integer(value + 1));
            } else
                map.put(entity, new Integer(1));

            try {
                Pattern p = Pattern.compile("\\b" + entity + "\\b");
                Matcher m = p.matcher(text);
                for (int j = 0; j < map.get(entity) && m.find(); j++) {
                    startchar = m.start(0);
                    endchar = m.end(0);
                    if (containsAtIndex(result, startchar, endchar))
                        j--;
                }

                double confidence = 0.0;
                if (!jo.getString("confidenceScore").equals(""))
                    confidence = Double.parseDouble(jo.getString("confidenceScore"));

                if (startchar != null && endchar != null) {
                    TEntity extraction = new TEntity(entity, type, uri, nerdType.toString(), startchar, endchar,
                            confidence, SOURCE);

                    result.add(extraction);
                }
            } catch (PatternSyntaxException eregex) {
                eregex.printStackTrace();
            }
        }
    } catch (JSONException e) {
        e.printStackTrace();
    }
    return result;
}

From source file:com.norconex.commons.lang.io.TextReader.java

/**
 * Reads the next chunk of text, up to the maximum read size specified.
 * It tries as much as possible to break long text into paragraph,
 * sentences or words, before returning.  See class documentation.
 * @return text read//  ww w. ja  va 2s . c om
 * @throws IOException problem reading text.
 */
public String readText() throws IOException {
    char[] text = new char[maxReadSize - buffer.length()];
    int num = reader.read(text);
    if (num == -1) {
        return null;
    }

    buffer.append(String.valueOf(text, 0, num));

    // Return all if we reached the end.
    reader.mark(1);
    if (reader.read() == -1) {
        String t = buffer.toString();
        buffer.setLength(0);
        reader.reset();
        return t;
    } else {
        reader.reset();
    }

    Matcher m = null;

    // Try breaking at paragraph:
    m = paragraphDelimiterPattern.matcher(buffer);
    if (m.find()) {
        int mStart = m.start(m.groupCount());
        int mEnd = m.end(m.groupCount());
        int substringEnd = mEnd;
        if (removeTrailingDelimiter) {
            substringEnd = mStart;
        }
        String t = buffer.substring(0, substringEnd);
        buffer.delete(0, substringEnd);
        return t;
    }

    // Try breaking at sentence:
    m = sentencePattern.matcher(buffer);
    if (m.find()) {
        int mStart = m.start(1);
        int mEnd = m.end(1);
        int substringEnd = mEnd;
        if (removeTrailingDelimiter) {
            substringEnd = mStart;
        }
        String t = buffer.substring(0, substringEnd);
        buffer.delete(0, substringEnd);
        return t;
    }

    // Try breaking at word:
    m = wordDelimiterPattern.matcher(buffer);
    if (m.find()) {
        int mStart = m.start(m.groupCount());
        int mEnd = m.end(m.groupCount());
        int substringEnd = mEnd;
        if (removeTrailingDelimiter) {
            substringEnd = mStart;
        }
        String t = buffer.substring(0, substringEnd);
        buffer.delete(0, substringEnd);
        return t;
    }

    String t = buffer.toString();
    buffer.setLength(0);
    return t;
}

From source file:fr.smile.liferay.LiferayUrlRewriter.java

/**
 * Fix all resources urls and return the result.
 *
 * @param input        The original charSequence to be processed.
 * @param requestUrl   The request URL.//from w w w .j  a v a2s. c  o  m
 * @param baseUrlParam The base URL selected for this request.
 * @return the result of this renderer.
 */
public CharSequence rewriteHtml(CharSequence input, String requestUrl, Pattern pattern, String baseUrlParam,
        String visibleBaseUrl) {
    if (LOG.isDebugEnabled()) {
        LOG.debug("input=" + input);
        LOG.debug("rewriteHtml (requestUrl=" + requestUrl + ", pattern=" + pattern + ",baseUrlParam)"
                + baseUrlParam + ",strVisibleBaseUrl=" + visibleBaseUrl + ")");
    }

    StringBuffer result = new StringBuffer(input.length());
    Matcher m = pattern.matcher(input);
    while (m.find()) {
        if (LOG.isTraceEnabled()) {
            LOG.trace("found match: " + m);
        }
        String url = input.subSequence(m.start(3) + 1, m.end(3) - 1).toString();
        url = rewriteUrl(url, requestUrl, baseUrlParam, visibleBaseUrl);
        url = url.replaceAll("\\$", "\\\\\\$"); // replace '$' -> '\$' as it
        // denotes group
        StringBuffer tagReplacement = new StringBuffer("<$1$2=\"").append(url).append("\"");
        if (m.groupCount() > 3) {
            tagReplacement.append("$4");
        }
        tagReplacement.append('>');
        if (LOG.isTraceEnabled()) {
            LOG.trace("replacement: " + tagReplacement);
        }
        m.appendReplacement(result, tagReplacement.toString());
    }
    m.appendTail(result);

    return result;
}

From source file:de.tudarmstadt.ukp.dkpro.core.textnormalizer.ReplacementFileNormalizer.java

@Override
protected Map<Integer, List<SofaChangeAnnotation>> createSofaChangesMap(JCas jcas) {
    Map<Integer, List<SofaChangeAnnotation>> changesMap = new TreeMap<Integer, List<SofaChangeAnnotation>>();
    int mapKey = 1;

    String coveredText = jcas.getDocumentText().toLowerCase();

    List<SofaChangeAnnotation> scaChangesList = new ArrayList<SofaChangeAnnotation>();
    for (Map.Entry<String, String> entry : replacementMap.entrySet()) {
        String replacementKey = entry.getKey().toLowerCase();
        String replacementValue = targetSurroundings + entry.getValue() + targetSurroundings;

        String regex = srcSurroundingsStart + "(" + Pattern.quote(replacementKey) + ")" + srcSurroundingsEnd;
        Pattern pattern = Pattern.compile(regex);
        Matcher matcher = pattern.matcher(coveredText);

        int groupNumberOfKey = (matcher.groupCount() == 1) ? 1 : 2;

        while (matcher.find()) {
            int start = matcher.start(groupNumberOfKey);
            int end = matcher.end(groupNumberOfKey);

            SofaChangeAnnotation sca = new SofaChangeAnnotation(jcas);
            sca.setBegin(start);/*from   ww w.  j a va  2s  .  com*/
            sca.setEnd(end);
            sca.setOperation(OP_REPLACE);
            sca.setValue(replacementValue);
            scaChangesList.add(sca);

            System.out.println(matcher.group(0));
        }

    }
    changesMap.put(mapKey++, scaChangesList);

    return changesMap;
}

From source file:com.edgenius.wiki.render.filter.LinkFilter.java

public List<Region> getRegions(CharSequence input) {
    final List<Region> list = new ArrayList<Region>();
    regexProvider.replaceByTokenVisitor(input, new TokenVisitor<Matcher>() {
        public void handleMatch(StringBuffer buffer, Matcher matcher) {

            int contentStart = matcher.start(1);
            int contentEnd = matcher.end(1);
            int start = contentStart - 1;
            int end = contentEnd + 1;

            String full = matcher.group(1);
            int sep;
            //link has possible 2 Region, [view>link], entire text is immutable region, but view is mutable. 
            if ((sep = StringUtil.indexSeparatorWithoutEscaped(full, ">")) != -1) {
                //entire is immutable
                Region bodyRegion = new Region(LinkFilter.this, true, start, end, contentStart, contentEnd);

                //view part is normal mutable, it needs independent render 
                Region viewPartRegion = new Region(LinkFilter.this, false, contentStart, contentStart + sep,
                        contentStart, contentStart + sep);
                bodyRegion.setSubRegion(viewPartRegion);

                list.add(bodyRegion);/*from w w w. j  a va 2  s. c  o m*/
            } else {
                //[viewAsLink] only 1 region, and it is immutable 
                list.add(new Region(LinkFilter.this, true, start, end, contentStart, contentEnd));
            }
        }

    });
    return list;
}

From source file:net.healeys.lexic.online.OnlineGame.java

public boolean start() {
    Pattern pat = Pattern.compile("(\\w+):(.+)");
    for (int attempt = 0; attempt < MAX_ATTEMPTS; attempt++) {
        try {/*from w w w.j  a  va2 s.  c o m*/
            HttpClient httpClient = new DefaultHttpClient();
            HttpGet get = new HttpGet(uri);
            addHeaders(get);

            HttpResponse resp = httpClient.execute(get);

            BufferedReader br = new BufferedReader(new InputStreamReader(resp.getEntity().getContent()));

            String line;
            while ((line = br.readLine()) != null) {
                // Log.d(TAG,"line:"+line);
                Matcher mat = pat.matcher(line);
                if (mat.find()) {
                    String key = line.substring(mat.start(1), mat.end(1));
                    String value = line.substring(mat.start(2), mat.end(2));
                    // Log.d(TAG,"key:"+key);
                    // Log.d(TAG,"value:"+value);

                    if (key.equals("board")) {
                        String[] letters = value.split(",");
                        if (letters.length == 16) {
                            setBoard(new FourByFourBoard(letters));
                        } else if (letters.length == 25) {
                            setBoard(new FiveByFiveBoard(letters));
                        }
                    } else if (key.equals("id")) {
                        id = Integer.parseInt(value);
                    } else {
                        urls.put(key, value);
                    }

                }
            }

            super.start();
            return true;
        } catch (Exception e) {
            // Log.e(TAG,"Connection Error in constructor",e);
        }
    }

    super.start();
    return false;
}

From source file:com.joliciel.talismane.filters.SentenceHolderImpl.java

@Override
public List<Sentence> getDetectedSentences(Sentence leftover) {
    if (LOG.isTraceEnabled()) {
        LOG.trace("getDetectedSentences. leftover=" + leftover);
    }//from ww w .j av  a 2  s.co  m

    List<Sentence> sentences = new ArrayList<Sentence>();

    int currentIndex = 0;
    boolean haveLeftOvers = this.getText().length() > 0;
    if (this.sentenceBoundaries.size() > 0) {
        haveLeftOvers = false;
        int lastSentenceBoundary = this.sentenceBoundaries.descendingIterator().next();
        if (lastSentenceBoundary < this.getText().length() - 1) {
            haveLeftOvers = true;
        }
        if (LOG.isTraceEnabled()) {
            LOG.trace("haveLeftOvers? " + lastSentenceBoundary + " < " + (this.getText().length() - 1) + " = "
                    + haveLeftOvers);
        }
    }

    List<Integer> allBoundaries = new ArrayList<Integer>(this.sentenceBoundaries);
    if (haveLeftOvers)
        allBoundaries.add(this.getText().length() - 1);

    for (int sentenceBoundary : allBoundaries) {
        boolean isLeftover = haveLeftOvers && sentenceBoundary == this.getText().length() - 1;

        Sentence sentence = filterService.getSentence();
        int leftOverTextLength = 0;
        String text = "";
        if (leftover != null) {
            sentence = leftover;
            leftOverTextLength = leftover.getText().length();
            text = leftover.getText() + this.getText().substring(currentIndex, sentenceBoundary + 1);
            leftover = null;
        } else {
            text = this.getText().substring(currentIndex, sentenceBoundary + 1);
        }

        // handle trim & duplicate white space here
        Matcher matcherOpeningWhiteSpace = openingWhiteSpacePattern.matcher(text);
        int openingWhiteSpaceEnd = 0;
        if (matcherOpeningWhiteSpace.find()) {
            openingWhiteSpaceEnd = matcherOpeningWhiteSpace.end(1);
        }

        int closingWhiteSpaceStart = text.length();
        if (!isLeftover) {
            Matcher matcherClosingWhiteSpace = closingWhiteSpacePattern.matcher(text);
            if (matcherClosingWhiteSpace.find()) {
                closingWhiteSpaceStart = matcherClosingWhiteSpace.start(1);
            }
        }

        Matcher matcherDuplicateWhiteSpace = duplicateWhiteSpacePattern.matcher(text);
        Set<Integer> duplicateWhiteSpace = new HashSet<Integer>();
        while (matcherDuplicateWhiteSpace.find()) {
            // remove all white space barring the first
            for (int i = matcherDuplicateWhiteSpace.start() + 1; i < matcherDuplicateWhiteSpace.end(); i++) {
                duplicateWhiteSpace.add(i);
            }
        }

        StringBuilder sb = new StringBuilder();
        int i = currentIndex;
        for (int j = 0; j < text.length(); j++) {
            boolean appendLetter = false;
            if (j < openingWhiteSpaceEnd) {
                // do nothing
            } else if (j >= closingWhiteSpaceStart) {
                // do nothing
            } else if (duplicateWhiteSpace.contains(j)) {
                // do nothing
            } else {
                appendLetter = true;
            }

            if (j >= leftOverTextLength) {
                // if we're past the leftovers and onto the new stuff
                if (appendLetter)
                    sentence.addOriginalIndex(this.getOriginalIndexes().get(i));

                if (this.getOriginalTextSegments().containsKey(i))
                    sentence.getOriginalTextSegments().put(sb.length(), this.getOriginalTextSegments().get(i));

                i++;
            }

            if (appendLetter)
                sb.append(text.charAt(j));
        }

        sentence.setText(sb.toString());
        if (LOG.isTraceEnabled()) {
            LOG.trace("sentence.setText |" + sentence.getText() + "|");
        }

        sentence.setComplete(!isLeftover);

        for (Entry<Integer, Integer> newlineLocation : this.newlines.entrySet()) {
            sentence.addNewline(newlineLocation.getKey(), newlineLocation.getValue());
        }

        sentence.setFileName(this.getFileName());

        sentences.add(sentence);
        currentIndex = sentenceBoundary + 1;
    }

    return sentences;
}

From source file:fr.eurecom.nerd.core.proxy.ExtractivClient.java

private List<TEntity> parse(String text, String serviceKey, OntologyType otype) {
    List<TEntity> result = new LinkedList<TEntity>();
    URI endpoint;//  ww w .  ja v a  2  s .c om
    try {
        endpoint = new URI(EXTRACTIV_SERVER_LOCATION);
        HttpMethodBase extractivRequest = getExtractivProcessString(endpoint, text, serviceKey);
        InputStream extractivResults = fetchHttpRequest(extractivRequest);
        Readable jsonReadable = new InputStreamReader(extractivResults);
        ExtractivJSONParser jsonParser = new ExtractivJSONParser(jsonReadable);

        Map<String, Integer> map = new HashMap<String, Integer>();
        for (Document document : jsonParser)
            for (com.extractiv.Entity item : document.getEntities()) {
                String label = item.asString();
                String type = item.getType();
                String nerdType = OntoFactory.mapper.getNerdType(otype, label, SOURCE, type).toString();
                String uri = (item.getLinks().size() > 0) ? item.getLinks().get(0) : "null";
                //                    Integer startChar = item.getOffset();
                //                    Integer endChar = startChar + item.getCharLength();
                //                    TEntity extraction = new TEntity(label, type, uri, nerdType, 
                //                    startChar, endChar, confidence, SOURCE); 
                //                    result.add(extraction);

                //logic to compute the startchar and endchar of the entity within the text
                Integer startchar = null, endchar = null;
                if (map.containsKey(label)) {
                    int value = map.get(label);
                    map.remove(label);
                    map.put(label, new Integer(value + 1));
                } else
                    map.put(label, new Integer(1));

                try {
                    Pattern p = Pattern.compile("\\b" + label + "\\b");
                    Matcher m = p.matcher(text);
                    for (int j = 0; j < map.get(label) && m.find(); j++) {
                        startchar = m.start(0);
                        endchar = m.end(0);
                        if (containsAtIndex(result, startchar, endchar))
                            j--;
                    }

                    Double confidence = 0.5;

                    if (startchar != null && endchar != null) {
                        TEntity extraction = new TEntity(label, type, uri, nerdType.toString(), startchar,
                                endchar, confidence, SOURCE);

                        result.add(extraction);
                    }
                } catch (PatternSyntaxException eregex) {
                    eregex.printStackTrace();
                }
            }
    } catch (URISyntaxException e) {
        e.printStackTrace();
    } catch (BadInputException e) {
        e.printStackTrace();
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    }

    return result;
}

From source file:com.dreamlinx.automation.DINRelay.java

/**
 * Creates an HttpClient to communicate with the DIN relay.
 * @throws MalformedURLException/* w w w.j  a v a2s  .  c om*/
 * @throws HttpException
 * @throws IOException
 */
private void setupHttpClient() throws MalformedURLException, HttpException, IOException {
    httpClient = new HttpClient();
    httpClient.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);

    GetMethod getMethod = new GetMethod("http://" + ipAddress);
    int result = httpClient.executeMethod(getMethod);
    if (result != 200) {
        throw new HttpException(result + " - " + getMethod.getStatusText());
    }

    String response = getMethod.getResponseBodyAsString();
    getMethod.releaseConnection();

    String regex = "name=\"Challenge\" value=\".*\"";
    Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
    Matcher matcher = pattern.matcher(response);
    String challenge = "";
    while (matcher.find()) {
        int start = matcher.start(0);
        int end = matcher.end(0);
        challenge = response.substring(start + 24, end - 1);
    }

    String md5Password = challenge + username + password + challenge;
    md5Password = toMD5(md5Password);

    PostMethod postMethod = new PostMethod("http://" + ipAddress + "/login.tgi");
    postMethod.addParameter("Username", username);
    postMethod.addParameter("Password", md5Password);

    result = httpClient.executeMethod(postMethod);
    if (result != 200) {
        throw new HttpException(result + " - " + postMethod.getStatusText());
    }
    postMethod.releaseConnection();
}

From source file:ch.sourcepond.maven.release.pom.VersionTransferWriter.java

@Override
public void close() throws IOException {
    final Matcher matcher = VERSION_PATTERN.matcher(toString());
    final Matcher originalMatcher = VERSION_PATTERN.matcher(original);
    int originalIdx = 0;
    int startIdx = 0;

    while (find(matcher, originalMatcher, originalIdx)) {
        final String newVersion = matcher.group(VERSION_VALUE);
        startIdx = originalMatcher.start(VERSION_VALUE);
        original.replace(startIdx, originalMatcher.end(VERSION_VALUE), newVersion);
        originalIdx = startIdx + newVersion.length();
    }//from  w w w . ja  va2 s  .co  m

    try (final Writer writer = new BufferedWriter(new FileWriter(file))) {
        writer.write(original.toString());
    }
}