public int end(String name) 

Source Link


Returns the offset after the last character of the subsequence captured by the given named-capturing group during the previous match operation.


From source file:fr.eurecom.nerd.core.proxy.WikimetaClient.java

public List<TEntity> parse(String json, String text, OntologyType otype) throws IOException {
    List<TEntity> result = new LinkedList<TEntity>();
    Map<String, Integer> map = new HashMap<String, Integer>();

    try {/* ww  w .ja v  a 2  s.  c  o m*/
        JSONObject o = new JSONObject(json);
        JSONArray jadocument = o.getJSONArray("document");

        // 3 items is Named Entities
        JSONObject jodocument = jadocument.getJSONObject(2);
        JSONArray jsonarray = jodocument.getJSONArray("Named Entities");

        for (int i = 0; i < jsonarray.length(); i++) {
            JSONObject jo = jsonarray.getJSONObject(i);
            String entity = jo.getString("EN");
            String type = (jo.getString("type").equals("")) ? null : jo.getString("type");
            String nerdType = OntoFactory.mapper.getNerdType(otype, entity, SOURCE, type).toString();
            String uri = jo.getString("URI");

            //logic to compute the startchar and endchar of the entity within the text
            Integer startchar = null, endchar = null;
            if (map.containsKey(entity)) {
                int value = map.get(entity);
                map.put(entity, new Integer(value + 1));
            } else
                map.put(entity, new Integer(1));

            try {
                Pattern p = Pattern.compile("\\b" + entity + "\\b");
                Matcher m = p.matcher(text);
                for (int j = 0; j < map.get(entity) && m.find(); j++) {
                    startchar = m.start(0);
                    endchar = m.end(0);
                    if (containsAtIndex(result, startchar, endchar))

                double confidence = 0.0;
                if (!jo.getString("confidenceScore").equals(""))
                    confidence = Double.parseDouble(jo.getString("confidenceScore"));

                if (startchar != null && endchar != null) {
                    TEntity extraction = new TEntity(entity, type, uri, nerdType.toString(), startchar, endchar,
                            confidence, SOURCE);

            } catch (PatternSyntaxException eregex) {
    } catch (JSONException e) {
    return result;

From source file:com.norconex.commons.lang.io.TextReader.java

 * Reads the next chunk of text, up to the maximum read size specified.
 * It tries as much as possible to break long text into paragraph,
 * sentences or words, before returning.  See class documentation.
 * @return text read//  ww w. ja  va 2s . c om
 * @throws IOException problem reading text.
public String readText() throws IOException {
    char[] text = new char[maxReadSize - buffer.length()];
    int num = reader.read(text);
    if (num == -1) {
        return null;

    buffer.append(String.valueOf(text, 0, num));

    // Return all if we reached the end.
    if (reader.read() == -1) {
        String t = buffer.toString();
        return t;
    } else {

    Matcher m = null;

    // Try breaking at paragraph:
    m = paragraphDelimiterPattern.matcher(buffer);
    if (m.find()) {
        int mStart = m.start(m.groupCount());
        int mEnd = m.end(m.groupCount());
        int substringEnd = mEnd;
        if (removeTrailingDelimiter) {
            substringEnd = mStart;
        String t = buffer.substring(0, substringEnd);
        buffer.delete(0, substringEnd);
        return t;

    // Try breaking at sentence:
    m = sentencePattern.matcher(buffer);
    if (m.find()) {
        int mStart = m.start(1);
        int mEnd = m.end(1);
        int substringEnd = mEnd;
        if (removeTrailingDelimiter) {
            substringEnd = mStart;
        String t = buffer.substring(0, substringEnd);
        buffer.delete(0, substringEnd);
        return t;

    // Try breaking at word:
    m = wordDelimiterPattern.matcher(buffer);
    if (m.find()) {
        int mStart = m.start(m.groupCount());
        int mEnd = m.end(m.groupCount());
        int substringEnd = mEnd;
        if (removeTrailingDelimiter) {
            substringEnd = mStart;
        String t = buffer.substring(0, substringEnd);
        buffer.delete(0, substringEnd);
        return t;

    String t = buffer.toString();
    return t;

From source file:fr.smile.liferay.LiferayUrlRewriter.java

 * Fix all resources urls and return the result.
 * @param input        The original charSequence to be processed.
 * @param requestUrl   The request URL.//from w w w .j  a v a2s. c  o  m
 * @param baseUrlParam The base URL selected for this request.
 * @return the result of this renderer.
public CharSequence rewriteHtml(CharSequence input, String requestUrl, Pattern pattern, String baseUrlParam,
        String visibleBaseUrl) {
    if (LOG.isDebugEnabled()) {
        LOG.debug("input=" + input);
        LOG.debug("rewriteHtml (requestUrl=" + requestUrl + ", pattern=" + pattern + ",baseUrlParam)"
                + baseUrlParam + ",strVisibleBaseUrl=" + visibleBaseUrl + ")");

    StringBuffer result = new StringBuffer(input.length());
    Matcher m = pattern.matcher(input);
    while (m.find()) {
        if (LOG.isTraceEnabled()) {
            LOG.trace("found match: " + m);
        String url = input.subSequence(m.start(3) + 1, m.end(3) - 1).toString();
        url = rewriteUrl(url, requestUrl, baseUrlParam, visibleBaseUrl);
        url = url.replaceAll("\\$", "\\\\\\$"); // replace '$' -> '\$' as it
        // denotes group
        StringBuffer tagReplacement = new StringBuffer("<$1$2=\"").append(url).append("\"");
        if (m.groupCount() > 3) {
        if (LOG.isTraceEnabled()) {
            LOG.trace("replacement: " + tagReplacement);
        m.appendReplacement(result, tagReplacement.toString());

    return result;

From source file:de.tudarmstadt.ukp.dkpro.core.textnormalizer.ReplacementFileNormalizer.java

protected Map<Integer, List<SofaChangeAnnotation>> createSofaChangesMap(JCas jcas) {
    Map<Integer, List<SofaChangeAnnotation>> changesMap = new TreeMap<Integer, List<SofaChangeAnnotation>>();
    int mapKey = 1;

    String coveredText = jcas.getDocumentText().toLowerCase();

    List<SofaChangeAnnotation> scaChangesList = new ArrayList<SofaChangeAnnotation>();
    for (Map.Entry<String, String> entry : replacementMap.entrySet()) {
        String replacementKey = entry.getKey().toLowerCase();
        String replacementValue = targetSurroundings + entry.getValue() + targetSurroundings;

        String regex = srcSurroundingsStart + "(" + Pattern.quote(replacementKey) + ")" + srcSurroundingsEnd;
        Pattern pattern = Pattern.compile(regex);
        Matcher matcher = pattern.matcher(coveredText);

        int groupNumberOfKey = (matcher.groupCount() == 1) ? 1 : 2;

        while (matcher.find()) {
            int start = matcher.start(groupNumberOfKey);
            int end = matcher.end(groupNumberOfKey);

            SofaChangeAnnotation sca = new SofaChangeAnnotation(jcas);
            sca.setBegin(start);/*from   ww w.  j a va  2s  .  com*/


    changesMap.put(mapKey++, scaChangesList);

    return changesMap;

From source file:com.edgenius.wiki.render.filter.LinkFilter.java

public List<Region> getRegions(CharSequence input) {
    final List<Region> list = new ArrayList<Region>();
    regexProvider.replaceByTokenVisitor(input, new TokenVisitor<Matcher>() {
        public void handleMatch(StringBuffer buffer, Matcher matcher) {

            int contentStart = matcher.start(1);
            int contentEnd = matcher.end(1);
            int start = contentStart - 1;
            int end = contentEnd + 1;

            String full = matcher.group(1);
            int sep;
            //link has possible 2 Region, [view>link], entire text is immutable region, but view is mutable. 
            if ((sep = StringUtil.indexSeparatorWithoutEscaped(full, ">")) != -1) {
                //entire is immutable
                Region bodyRegion = new Region(LinkFilter.this, true, start, end, contentStart, contentEnd);

                //view part is normal mutable, it needs independent render 
                Region viewPartRegion = new Region(LinkFilter.this, false, contentStart, contentStart + sep,
                        contentStart, contentStart + sep);

                list.add(bodyRegion);/*from w w w. j  a va 2  s. c  o m*/
            } else {
                //[viewAsLink] only 1 region, and it is immutable 
                list.add(new Region(LinkFilter.this, true, start, end, contentStart, contentEnd));

    return list;

From source file:net.healeys.lexic.online.OnlineGame.java

public boolean start() {
    Pattern pat = Pattern.compile("(\\w+):(.+)");
    for (int attempt = 0; attempt < MAX_ATTEMPTS; attempt++) {
        try {/*from w w w.j  a  va2 s.  c o m*/
            HttpClient httpClient = new DefaultHttpClient();
            HttpGet get = new HttpGet(uri);

            HttpResponse resp = httpClient.execute(get);

            BufferedReader br = new BufferedReader(new InputStreamReader(resp.getEntity().getContent()));

            String line;
            while ((line = br.readLine()) != null) {
                // Log.d(TAG,"line:"+line);
                Matcher mat = pat.matcher(line);
                if (mat.find()) {
                    String key = line.substring(mat.start(1), mat.end(1));
                    String value = line.substring(mat.start(2), mat.end(2));
                    // Log.d(TAG,"key:"+key);
                    // Log.d(TAG,"value:"+value);

                    if (key.equals("board")) {
                        String[] letters = value.split(",");
                        if (letters.length == 16) {
                            setBoard(new FourByFourBoard(letters));
                        } else if (letters.length == 25) {
                            setBoard(new FiveByFiveBoard(letters));
                    } else if (key.equals("id")) {
                        id = Integer.parseInt(value);
                    } else {
                        urls.put(key, value);


            return true;
        } catch (Exception e) {
            // Log.e(TAG,"Connection Error in constructor",e);

    return false;

From source file:com.joliciel.talismane.filters.SentenceHolderImpl.java

public List<Sentence> getDetectedSentences(Sentence leftover) {
    if (LOG.isTraceEnabled()) {
        LOG.trace("getDetectedSentences. leftover=" + leftover);
    }//from ww w .j av  a 2  s.co  m

    List<Sentence> sentences = new ArrayList<Sentence>();

    int currentIndex = 0;
    boolean haveLeftOvers = this.getText().length() > 0;
    if (this.sentenceBoundaries.size() > 0) {
        haveLeftOvers = false;
        int lastSentenceBoundary = this.sentenceBoundaries.descendingIterator().next();
        if (lastSentenceBoundary < this.getText().length() - 1) {
            haveLeftOvers = true;
        if (LOG.isTraceEnabled()) {
            LOG.trace("haveLeftOvers? " + lastSentenceBoundary + " < " + (this.getText().length() - 1) + " = "
                    + haveLeftOvers);

    List<Integer> allBoundaries = new ArrayList<Integer>(this.sentenceBoundaries);
    if (haveLeftOvers)
        allBoundaries.add(this.getText().length() - 1);

    for (int sentenceBoundary : allBoundaries) {
        boolean isLeftover = haveLeftOvers && sentenceBoundary == this.getText().length() - 1;

        Sentence sentence = filterService.getSentence();
        int leftOverTextLength = 0;
        String text = "";
        if (leftover != null) {
            sentence = leftover;
            leftOverTextLength = leftover.getText().length();
            text = leftover.getText() + this.getText().substring(currentIndex, sentenceBoundary + 1);
            leftover = null;
        } else {
            text = this.getText().substring(currentIndex, sentenceBoundary + 1);

        // handle trim & duplicate white space here
        Matcher matcherOpeningWhiteSpace = openingWhiteSpacePattern.matcher(text);
        int openingWhiteSpaceEnd = 0;
        if (matcherOpeningWhiteSpace.find()) {
            openingWhiteSpaceEnd = matcherOpeningWhiteSpace.end(1);

        int closingWhiteSpaceStart = text.length();
        if (!isLeftover) {
            Matcher matcherClosingWhiteSpace = closingWhiteSpacePattern.matcher(text);
            if (matcherClosingWhiteSpace.find()) {
                closingWhiteSpaceStart = matcherClosingWhiteSpace.start(1);

        Matcher matcherDuplicateWhiteSpace = duplicateWhiteSpacePattern.matcher(text);
        Set<Integer> duplicateWhiteSpace = new HashSet<Integer>();
        while (matcherDuplicateWhiteSpace.find()) {
            // remove all white space barring the first
            for (int i = matcherDuplicateWhiteSpace.start() + 1; i < matcherDuplicateWhiteSpace.end(); i++) {

        StringBuilder sb = new StringBuilder();
        int i = currentIndex;
        for (int j = 0; j < text.length(); j++) {
            boolean appendLetter = false;
            if (j < openingWhiteSpaceEnd) {
                // do nothing
            } else if (j >= closingWhiteSpaceStart) {
                // do nothing
            } else if (duplicateWhiteSpace.contains(j)) {
                // do nothing
            } else {
                appendLetter = true;

            if (j >= leftOverTextLength) {
                // if we're past the leftovers and onto the new stuff
                if (appendLetter)

                if (this.getOriginalTextSegments().containsKey(i))
                    sentence.getOriginalTextSegments().put(sb.length(), this.getOriginalTextSegments().get(i));


            if (appendLetter)

        if (LOG.isTraceEnabled()) {
            LOG.trace("sentence.setText |" + sentence.getText() + "|");


        for (Entry<Integer, Integer> newlineLocation : this.newlines.entrySet()) {
            sentence.addNewline(newlineLocation.getKey(), newlineLocation.getValue());


        currentIndex = sentenceBoundary + 1;

    return sentences;

From source file:fr.eurecom.nerd.core.proxy.ExtractivClient.java

private List<TEntity> parse(String text, String serviceKey, OntologyType otype) {
    List<TEntity> result = new LinkedList<TEntity>();
    URI endpoint;//  ww w .  ja v a  2  s .c om
    try {
        endpoint = new URI(EXTRACTIV_SERVER_LOCATION);
        HttpMethodBase extractivRequest = getExtractivProcessString(endpoint, text, serviceKey);
        InputStream extractivResults = fetchHttpRequest(extractivRequest);
        Readable jsonReadable = new InputStreamReader(extractivResults);
        ExtractivJSONParser jsonParser = new ExtractivJSONParser(jsonReadable);

        Map<String, Integer> map = new HashMap<String, Integer>();
        for (Document document : jsonParser)
            for (com.extractiv.Entity item : document.getEntities()) {
                String label = item.asString();
                String type = item.getType();
                String nerdType = OntoFactory.mapper.getNerdType(otype, label, SOURCE, type).toString();
                String uri = (item.getLinks().size() > 0) ? item.getLinks().get(0) : "null";
                //                    Integer startChar = item.getOffset();
                //                    Integer endChar = startChar + item.getCharLength();
                //                    TEntity extraction = new TEntity(label, type, uri, nerdType, 
                //                    startChar, endChar, confidence, SOURCE); 
                //                    result.add(extraction);

                //logic to compute the startchar and endchar of the entity within the text
                Integer startchar = null, endchar = null;
                if (map.containsKey(label)) {
                    int value = map.get(label);
                    map.put(label, new Integer(value + 1));
                } else
                    map.put(label, new Integer(1));

                try {
                    Pattern p = Pattern.compile("\\b" + label + "\\b");
                    Matcher m = p.matcher(text);
                    for (int j = 0; j < map.get(label) && m.find(); j++) {
                        startchar = m.start(0);
                        endchar = m.end(0);
                        if (containsAtIndex(result, startchar, endchar))

                    Double confidence = 0.5;

                    if (startchar != null && endchar != null) {
                        TEntity extraction = new TEntity(label, type, uri, nerdType.toString(), startchar,
                                endchar, confidence, SOURCE);

                } catch (PatternSyntaxException eregex) {
    } catch (URISyntaxException e) {
    } catch (BadInputException e) {
    } catch (FileNotFoundException e) {

    return result;

From source file:com.dreamlinx.automation.DINRelay.java

 * Creates an HttpClient to communicate with the DIN relay.
 * @throws MalformedURLException/* w w w.j  a v a2s  .  c om*/
 * @throws HttpException
 * @throws IOException
private void setupHttpClient() throws MalformedURLException, HttpException, IOException {
    httpClient = new HttpClient();

    GetMethod getMethod = new GetMethod("http://" + ipAddress);
    int result = httpClient.executeMethod(getMethod);
    if (result != 200) {
        throw new HttpException(result + " - " + getMethod.getStatusText());

    String response = getMethod.getResponseBodyAsString();

    String regex = "name=\"Challenge\" value=\".*\"";
    Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
    Matcher matcher = pattern.matcher(response);
    String challenge = "";
    while (matcher.find()) {
        int start = matcher.start(0);
        int end = matcher.end(0);
        challenge = response.substring(start + 24, end - 1);

    String md5Password = challenge + username + password + challenge;
    md5Password = toMD5(md5Password);

    PostMethod postMethod = new PostMethod("http://" + ipAddress + "/login.tgi");
    postMethod.addParameter("Username", username);
    postMethod.addParameter("Password", md5Password);

    result = httpClient.executeMethod(postMethod);
    if (result != 200) {
        throw new HttpException(result + " - " + postMethod.getStatusText());

From source file:ch.sourcepond.maven.release.pom.VersionTransferWriter.java

public void close() throws IOException {
    final Matcher matcher = VERSION_PATTERN.matcher(toString());
    final Matcher originalMatcher = VERSION_PATTERN.matcher(original);
    int originalIdx = 0;
    int startIdx = 0;

    while (find(matcher, originalMatcher, originalIdx)) {
        final String newVersion = matcher.group(VERSION_VALUE);
        startIdx = originalMatcher.start(VERSION_VALUE);
        original.replace(startIdx, originalMatcher.end(VERSION_VALUE), newVersion);
        originalIdx = startIdx + newVersion.length();
    }//from  w w w . ja  va2 s  .co  m

    try (final Writer writer = new BufferedWriter(new FileWriter(file))) {