Example usage for java.util.regex Matcher start

Introduction

In this page you can find the example usage for java.util.regex Matcher start.

Prototype

public int start(String name)

Source Link

Document

Returns the start index of the subsequence captured by the given named-capturing group during the previous match operation.

Usage

From source file:com.streamsets.pipeline.stage.processor.fieldmask.FieldMaskProcessor.java

@VisibleForTesting
String regExMask(Field field, FieldMaskConfig fieldMaskConfig) {
    String value = field.getValueAsString();
    Matcher matcher = regExToPatternMap.get(fieldMaskConfig.regex).matcher(value);
    if (matcher.matches()) {
        int groupCount = matcher.groupCount();
        //create a masked string of the same length as the original string
        StringBuilder resultString = new StringBuilder();
        for (int i = 0; i < value.length(); i++) {
            resultString.append(MASK_CHAR);
        }//from  w  ww .ja  v  a 2 s.c  om
        //for each group that needs to be shown, replace the masked string with the original string characters at those
        //positions
        Set<Integer> groupsToShow = regexToGroupsToShowMap.get(fieldMaskConfig.regex);
        if (groupsToShow != null && !groupsToShow.isEmpty()) {
            for (int i = 1; i <= groupCount; i++) {
                if (groupsToShow.contains(i)) {
                    resultString.replace(matcher.start(i), matcher.end(i), matcher.group(i));
                }
            }
        }
        return resultString.toString();
    }
    return field.getValueAsString();
}

From source file:com.github.gekoh.yagen.ddl.CreateDDL.java

private static String getIfExistsDropStatement(Dialect dialect, String sql, String name) {
    Matcher matcher = DROP_TABLE_PATTERN.matcher(sql);
    if (matcher.find()) {
        StringBuilder res = new StringBuilder();
        res.append(sql.substring(0, matcher.start(1) >= 0 ? matcher.start(1) : matcher.start(2)))
                .append(getNameAndIfExistsWhenSupported(dialect, name == null ? matcher.group(2) : name))
                .append(sql.substring(matcher.end(3) >= 0 ? matcher.end(3) : matcher.end(2)));
        sql = res.toString();//from   w  ww.  j  a va  2  s. c  o m
    }
    return sql;
}

From source file:io.kahu.hawaii.util.logger.DefaultLogManager.java

void maskPasswords(final LoggingConfiguration config) throws JSONException {
    // Look at all fields that may contain a URL, and mask passwords in
    // query string parameters
    for (String field : config.getUrlFields()) {
        Object value = getContext(field);
        if (value == null || value == JSONObject.NULL || value == JSONObject.EXPLICIT_NULL) {
            continue;
        }/*from   w  w w . java  2 s.co m*/
        if (!(value instanceof String)) {
            // If it's not a string, just play it safe and replace the
            // entire parameter
            putContext(field, MASKED_PASSWORD);
            continue;
        }

        // It's a String so iterate over the QS parameters
        String url = (String) value;
        int p1 = url.indexOf('?');
        boolean changed = false;
        while (p1 >= 0) {
            int p2 = url.indexOf('=', p1 + 1);
            if (p2 < 0) {
                break;
            }
            String qsParam = url.substring(p1 + 1, p2);
            int p3 = url.indexOf('&', p2 + 1);
            if (p3 < 0) {
                p3 = url.length();
            }
            String qsValue = url.substring(p2 + 1, p3);
            if (config.getPasswordParameters().contains(qsParam)) {
                qsValue = MASKED_PASSWORD;
                url = url.substring(0, p2 + 1) + qsValue + url.substring(p3);
                changed = true;
            }
            p1 = url.indexOf('&', p1 + 1);
        }
        if (changed) {
            putContext(field, url);
        }
    }

    // Next, look at all parameter objects, which are String -> List of
    // String mappings
    for (String field : config.getParameterFields()) {
        Object value = getContext(field);
        if (value == null || value == JSONObject.NULL || value == JSONObject.EXPLICIT_NULL) {
            continue;
        }
        if (!(value instanceof JSONObject)) {
            // If it's not a JSONObject, just play it safe and replace the
            // entire parameter
            putContext(field, MASKED_PASSWORD);
            continue;
        }

        // It's a JSONObject, so discover and nuke all possible password
        // fields
        JSONObject params = (JSONObject) value;
        boolean changed = false;
        for (String key : config.getPasswordParameters()) {
            if (params.has(key)) {
                JSONArray a = new JSONArray();
                a.put(MASKED_PASSWORD);
                params.put(key, a);
                changed = true;
            }
        }
        if (changed) {
            putContext(field, params);
        }
    }

    // Next, look at all request/response headers, which are stored as an
    // array of strings
    for (String field : config.getHeaderFields()) {
        Object value = getContext(field);
        if (value == null || value == JSONObject.NULL || value == JSONObject.EXPLICIT_NULL) {
            continue;
        }
        if (!(value instanceof JSONArray)) {
            // If it's not a JSONArray, just play it safe and replace the
            // entire parameter
            putContext(field, MASKED_PASSWORD);
            continue;
        }

        // It's a JSONArray, so discover and nuke all possible password
        // headers
        JSONArray headers = (JSONArray) value;
        boolean changed = false;
        for (String key : config.getPasswordParameters()) {
            for (int i = 0; i < headers.length(); i++) {
                String header = headers.getString(i);
                if (header.startsWith(key + ":")) {
                    headers.put(i, key + ": " + MASKED_PASSWORD);
                    changed = true;
                }
            }
        }
        if (changed) {
            putContext(field, headers);
        }
    }

    // Finally, look at all body fields for password patterns
    for (String field : config.getBodyFields()) {
        Object value = getContext(field);
        if (value == null || value == JSONObject.NULL || value == JSONObject.EXPLICIT_NULL) {
            continue;
        }
        if (!(value instanceof String)) {
            // If it's not a string, just play it safe and replace the
            // entire parameter
            putContext(field, MASKED_PASSWORD);
            continue;
        }

        // It's a String, so apply all password patterns to mask the
        // passwords
        String body = (String) value;
        boolean changed = false;
        if (looksLikeJson(body)) {
            body = removePasswordFieldsFromJsonBody(body, Arrays.asList(config.getBodyPasswordFields()));
            changed = (body != null);
        } else {
            for (Pattern pattern : config.getBodyPasswordPatterns()) {
                Matcher m = pattern.matcher(body);
                int i = 0;
                while (m.find(i)) {
                    body = body.substring(0, m.start(1)) + MASKED_PASSWORD + body.substring(m.end(1));
                    i = m.end() - m.group(1).length() + MASKED_PASSWORD.length();
                    m = pattern.matcher(body);
                    changed = true;
                }
            }
        }
        if (changed) {
            putContext(field, body);
        }
    }
}

From source file:com.github.gekoh.yagen.ddl.CreateDDL.java

private static String addAuditColumns(Dialect dialect, String sqlCreate, Set<String> columns) {
    Matcher matcher = TBL_PATTERN.matcher(sqlCreate);

    if (matcher.find()) {
        StringBuilder sb = new StringBuilder(
                sqlCreate.substring(0, matcher.start(TBL_PATTERN_IDX_AFTER_COL_DEF)));
        for (String auditColumn : AUDIT_COLUMNS) {
            if (!columns.contains(auditColumn)) {
                sb.append(", ").append(formatColumn(dialect, AUDIT_COLUMN_DEFINITION.get(auditColumn),
                        Constants.USER_NAME_LEN, null, null));
                columns.add(auditColumn);
            }/*from w ww  .j  a va2 s .c  o  m*/
        }
        sb.append(sqlCreate.substring(matcher.start(TBL_PATTERN_IDX_AFTER_COL_DEF)));
        sqlCreate = sb.toString();
    }

    return sqlCreate;
}

From source file:com.vladsch.idea.multimarkdown.editor.MultiMarkdownPreviewEditor.java

protected String makeHtmlPage(String html) {
    VirtualFile file = FileDocumentManager.getInstance().getFile(document);
    // scan for <table>, </table>, <tr>, </tr> and other tags we modify, this could be done with a custom plugin to pegdown but
    // then it would be more trouble to get un-modified HTML.
    String regex = "(<table>|<thead>|<tbody>|<tr>|<hr/>|<del>|</del>|</p>|<kbd>|</kbd>|<var>|</var>";//|<code>|</code>";
    StringBuilder result = new StringBuilder(html.length() + (html.length() >> 2));

    String gitHubHref = MultiMarkdownPathResolver.getGitHubDocumentURL(project, document, !isWikiDocument);
    String gitHubClose = "";
    if (gitHubHref == null) {
        gitHubHref = "";
    } else {/*from   w  w w  . jav a  2 s .co  m*/
        gitHubHref = "<a href=\"" + gitHubHref + "\" name=\"wikipage\" id=\"wikipage\">";
        gitHubClose = "</a>";
    }
    if (isWikiDocument) {
        result.append("<body class=\"multimarkdown-wiki-preview\">\n<div class=\"content\">\n");
        result.append("" + "<h1 class=\"first-child\">").append(gitHubHref)
                .append(escapeHtml(file == null ? "" : file.getNameWithoutExtension().replace('-', ' ')))
                .append(gitHubClose).append("</h1>\n").append("");
    } else {
        result.append("<body class=\"multimarkdown-preview\">\n<div class=\"content\">\n"
                + "<div class=\"page-header\">").append(gitHubHref)
                .append(escapeHtml(file == null ? "" : file.getName().replace('-', ' '))).append(gitHubClose)
                .append("</div>\n").append("<div class=\"hr\"></div>\n").append("");
        // for now nothing
        regex += "|<h1>";
    }

    String regexTail = "|<li>\\n*\\s*<p>";
    boolean isDarkTheme = isDarkTheme();
    boolean taskLists = isTaskLists();

    if (taskLists) {
        regex += "|<li class=\"task-list-item\">\\n*\\s*<p>|<br\\s*/?>|<li class=\"task-list-item\">|<li>\\[(?:x|X)\\]\\s*|<li>\\[ \\]\\s*|<li>\\n*\\s*<p>\\[x\\]\\s*|<li>\\n*\\s*<p>\\[ \\]\\s*";
    }
    regex += regexTail;
    regex += ")";

    Pattern p = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
    Matcher m = p.matcher(html);
    int lastPos = 0;
    int rowCount = 0;
    boolean[] isOrderedList = new boolean[20];
    int listDepth = -1;
    boolean firstChildH1 = !isWikiDocument;

    while (m.find()) {
        String found = m.group();
        if (lastPos < m.start(0)) {
            result.append(html.substring(lastPos, m.start(0)));
        }

        if (found.equals("</p>")) {
            result.append(found);
        } else if (found.startsWith("<br")) {
            result.append("<br/>\n");
        } else if (found.equals("<table>")) {
            rowCount = 0;
            result.append(found);
        } else if (found.equals("<thead>")) {
            result.append(found);
        } else if (found.equals("<tbody>")) {
            result.append(found);
        } else if (found.equals("/>")) {
            result.append(">");
        } else if (found.equals("<tr>")) {
            rowCount++;
            result.append("<tr class=\"")
                    .append(rowCount == 1 ? "first-child" : (rowCount & 1) != 0 ? "odd-child" : "even-child")
                    .append("\">");
        } else if (found.equals("<hr/>")) {
            result.append("<div class=\"hr\">&nbsp;</div>");
        } else if (found.equals("<h1>")) {
            result.append(firstChildH1 ? "<h1 class=\"first-child\">" : "<h1>");
            firstChildH1 = false;
        } else if (found.equals("<del>")) {
            result.append("<span class=\"del\">");
        } else if (found.equals("</del>")) {
            result.append("</span>");
        } else if (found.equals("<kbd>")) {
            result.append("<span class=\"kbd\">");
        } else if (found.equals("</kbd>")) {
            result.append("</span>");
        } else if (found.equals("<code>")) {
            result.append("<span class=\"code\">");
        } else if (found.equals("</code>")) {
            result.append("</span>");
        } else if (found.equals("<var>")) {
            result.append("<span class=\"var\">");
        } else if (found.equals("</var>")) {
            result.append("</span>");
        } else {
            found = found.trim();
            if (taskLists && found.equals("<li>[x]")) {
                result.append("<li class=\"dtask\">");
            } else if (taskLists && found.equals("<li>[X]")) {
                result.append("<li class=\"dtask\">");
            } else if (taskLists && found.equals("<li>[ ]")) {
                result.append("<li class=\"dtask\">");
            } else if (taskLists && found.equals("<li class=\"task-list-item\">")) {
                result.append("<li class=\"taski\">");
            } else {
                // here we have <li>\n*\s*<p>, need to strip out \n*\s* so we can match them easier
                String foundWithP = found;
                foundWithP = foundWithP.replaceAll("<li>\\n*\\s*<p>", "<li><p>");
                found = foundWithP.replaceAll("<li class=\"task-list-item\">\\n*\\s*<p>",
                        "<li class=\"task\"><p>");
                found = found.trim();
                if (found.equals("<li><p>")) {
                    result.append("<li class=\"p\"><p class=\"p\">");
                } else if (taskLists && found.equals("<li><p>[x]")) {
                    result.append("<li class=\"dtaskp\"><p class=\"p\">");
                } else if (taskLists && found.equals("<li><p>[ ]")) {
                    result.append("<li class=\"dtaskp\"><p class=\"p\">");
                } else if (taskLists && found.equals("<li class=\"task-list-item\"><p>")) {
                    result.append("<li class=\"taskp\"><p class=\"p\">");
                } else {
                    result.append(found);
                }
            }
        }

        lastPos = m.end(0);
    }

    if (lastPos < html.length()) {
        result.append(html.substring(lastPos));
    }

    result.append("\n</div>\n</body>\n");
    return result.toString();
}

From source file:com.icesoft.faces.component.style.OutputStyleRenderer.java

public void encodeEnd(FacesContext facesContext, UIComponent uiComponent) throws IOException {
    validateParameters(facesContext, uiComponent, OutputStyle.class);
    try {//  w  ww .  j av  a2  s.co  m
        DOMContext domContext = DOMContext.attachDOMContext(facesContext, uiComponent);
        if (!domContext.isInitialized()) {
            OutputStyle outputStyle = (OutputStyle) uiComponent;
            Element styleEle = buildCssElement(domContext);
            String href = outputStyle.getHref();
            styleEle.setAttribute(HTML.HREF_ATTR, getResourceURL(facesContext, href));
            domContext.setRootNode(styleEle);
            int browserType = browserType(facesContext, uiComponent);
            if (browserType != DEFAULT_TYPE) {
                if (href.endsWith(CSS_EXTENTION)) {
                    int i = href.indexOf(CSS_EXTENTION);
                    if (i > 0) {
                        String start = href.substring(0, i);
                        Element ieStyleEle = buildCssElement(domContext);
                        String extention = IE_EXTENTION;
                        if (browserType == SAFARI) {
                            extention = SAFARI_EXTENTION;
                        }
                        if (browserType == DT) {
                            extention = DT_EXTENTION;
                        }
                        if (browserType == IE_7) {
                            extention = IE_7_EXTENTION;
                        }
                        if (browserType == IE_8) {
                            extention = IE_8_EXTENSION;
                        }
                        if (browserType == SAFARI_MOBILE) {
                            extention = SAFARI_MOBILE_EXTENTION;
                        }
                        if (browserType == OPERA) {
                            extention = OPERA_EXTENTION;
                        }
                        if (browserType == OPERA_MOBILE) {
                            extention = OPERA_MOBILE_EXTENTION;
                        }
                        String browserSpecificFilename = useSpecific(facesContext, start, extention);
                        if (browserSpecificFilename != null) {
                            // W3C spec: To make a style sheet preferred, set the rel attribute to "stylesheet" and name the style sheet with the title attribute
                            ieStyleEle.setAttribute(HTML.TITLE_ATTR, extention);
                            String hrefURL = CoreUtils.resolveResourceURL(facesContext,
                                    browserSpecificFilename);
                            ieStyleEle.setAttribute(HTML.HREF_ATTR, hrefURL);
                            styleEle.getParentNode().appendChild(ieStyleEle);
                        }
                    } else {
                        throw new RuntimeException("OutputStyle file attribute is too short. "
                                + "Needs at least one character before .css. Current Value is [" + href + "]");
                    }
                } else {
                    Matcher matcher = Pattern
                            .compile(".*javax\\.faces\\.resource/((.*)\\.css)(\\..*)?\\?ln=([^&]*)(&.*|$)")
                            .matcher(href);
                    if (matcher.matches()) {
                        Element ieStyleEle = buildCssElement(domContext);
                        String extension = browserType >= 0 && browserType < extensions.length
                                ? extensions[browserType]
                                : IE_EXTENTION;
                        ieStyleEle.setAttribute(HTML.TITLE_ATTR, extension);
                        String hrefURL = new StringBuffer(matcher.group(0)).insert(matcher.end(2), extension)
                                .toString();
                        ieStyleEle.setAttribute(HTML.HREF_ATTR, hrefURL);
                        String resourceName = new StringBuffer(matcher.group(1))
                                .insert(matcher.end(2) - matcher.start(2), extension).toString();
                        Resource resource = facesContext.getApplication().getResourceHandler()
                                .createResource(resourceName, matcher.group(4));
                        if (resource != null) {
                            styleEle.getParentNode().appendChild(ieStyleEle);
                        }
                    }
                }
            }

        }
        domContext.stepOver();
    } catch (Exception e) {
        log.error("Error in OutputStyleRenderer", e);
    }
}

From source file:com.edgenius.wiki.render.filter.MacroFilter.java

/**
 *///from   w  w  w.j a  v  a  2  s.c  o  m
private void checkGroup(final int initPos, CharSequence input, final LinkedList<GroupProcessor> stack,
        List<GroupProcessor> processors) {
    final List<Region> pairRegions = new ArrayList<Region>();

    singleMacroProvider.replaceByTokenVisitor(input, new TokenVisitor<Matcher>() {
        public void handleMatch(StringBuffer buffer, Matcher result) {
            String macroName = result.group(1);
            if (macroName != null && !macroName.startsWith("$")) {
                Macro macro = macroMgr.getMacro(macroName);

                if (macro != null) {
                    //IMPORTANT: here does not check Macro.isPair() and also put it into pairRegions for following process
                    //it is the sequence of process must keep consistant with physical sequence in input text, 
                    //for example, {table}{cell}...{rowdiv}, rowdiv is single and must be after cell
                    int start = result.start(0);
                    int end = result.end(0);
                    Region pair = new Region(start, end);
                    //no parameter, then mark as unknown, otherwise, must be a start macro
                    if (StringUtils.isBlank(result.group(2))) {
                        pair.setKey(MACRO_REGION_KEY_UNKNOWN);
                    } else {
                        pair.setKey(MACRO_REGION_KEY_START);
                    }

                    //just for temporary to remember the macro name...
                    pair.setContent(macroName);
                    //sum to list
                    pairRegions.add(pair);
                }
            }
        }
    });

    int size = pairRegions.size();
    if (size > 0) {
        StringBuffer inputBuf = new StringBuffer(input);
        for (int idx = 0; idx < size; idx++) {
            Region reg = pairRegions.get(idx);
            Macro macro = macroMgr.getMacro(reg.getContent());
            if (macro.isPaired()) {
                int deep = 0;
                Region pair = null;
                //looking for pairs...
                for (int chIdx = idx + 1; chIdx < size; chIdx++) {
                    Region next = pairRegions.get(chIdx);
                    if (StringUtils.equalsIgnoreCase(reg.getContent(), next.getContent())) {
                        //start is unknown (no attribute), then end must be unknown
                        if (MACRO_REGION_KEY_UNKNOWN.equals(reg.getKey())
                                && MACRO_REGION_KEY_UNKNOWN.equals(next.getKey())) {
                            //matched
                            pair = next;
                            //skip all internal node - which is handle by embedded recursive
                            idx = chIdx;
                            break;
                        }

                        if (MACRO_REGION_KEY_START.equals(reg.getKey())
                                && MACRO_REGION_KEY_UNKNOWN.equals(next.getKey())) {
                            if (deep == 0) {
                                //matched;
                                pair = next;
                                //skip all internal node - which is handle by embedded recursive
                                idx = chIdx;
                                break;
                            } else {
                                //just another inner same name macro matched, deep minus 1
                                deep--;
                            }
                        }
                        if (MACRO_REGION_KEY_START.equals(next.getKey())) {
                            //ok, it gets another start, in 4th scenarios - then add deep
                            deep++;
                        }
                    }
                }
                //ok, success find paired
                if (pair != null) {
                    int start = initPos + reg.getStart();
                    int end = initPos + pair.getEnd();
                    int contentStart = initPos + reg.getEnd();
                    int contentEnd = initPos + pair.getStart();

                    GroupProcessor currProcessor = stack.size() == 0 ? null : stack.getLast();
                    if (currProcessor != null) {
                        currProcessor.adoptChild(macro, start, end);
                    }

                    if (macro.isProcessEmbedded() && (end > start)) {
                        if (macro.hasChildren() != null) {
                            stack.add(((GroupProcessorMacro) macro).newGroupProcessor(macro, start, end));
                        }
                        checkGroup(contentStart,
                                inputBuf.subSequence(contentStart - initPos, contentEnd - initPos), stack,
                                processors);
                        if (macro.hasChildren() != null) {
                            //pop the current one, means it is a completed GroupProcessor
                            processors.add(stack.removeLast());
                        }
                    }
                }
            } else {
                //single macro - directly detect if it is child
                GroupProcessor currProcessor = stack.size() == 0 ? null : stack.getLast();
                if (currProcessor != null) {
                    currProcessor.adoptChild(macro, initPos + reg.getStart(), initPos + reg.getEnd());
                }
            }
        }
    }
}

From source file:cn.dreampie.resource.LessSource.java

private void resolveImports() throws IOException {
    Matcher importMatcher = IMPORT_PATTERN.matcher(normalizedContent);
    while (importMatcher.find()) {
        String importedResource = importMatcher.group(5);
        importedResource = importedResource.matches(".*\\.(le?|c)ss$") ? importedResource
                : importedResource + ".less";
        String importType = importMatcher.group(3) == null
                ? importedResource.substring(importedResource.lastIndexOf(".") + 1)
                : importMatcher.group(3);
        if (importType.equals("less")) {
            logger.debug("Importing %s", importedResource);

            if (!imports.containsKey(importedResource)) {
                LessSource importedLessSource = new LessSource(getImportedResource(importedResource));
                imports.put(importedResource, importedLessSource);

                normalizedContent = includeImportedContent(importedLessSource, importMatcher);
                importMatcher = IMPORT_PATTERN.matcher(normalizedContent);
            } else {
                normalizedContent = normalizedContent.substring(0, importMatcher.start(1))
                        + normalizedContent.substring(importMatcher.end(1));
                importMatcher = IMPORT_PATTERN.matcher(normalizedContent);
            }//from w ww.j ava  2s . c  o m
        }
    }
}

From source file:tr.edu.gsu.nerwip.recognition.internal.modelless.subee.Subee.java

/**
 * Takes advantage of hyperlinks in the text, in order
 * to detect entities. Most of the time, in a Wikipedia
 * article, the hyperlink is defined only for the very 
 * first occurrence of the entity. For this reason,
 * an additional processing is required to find the possible
 * other occurrences (cf. {@link #processOccurrences(Article, List)}). 
 * //from   w ww. ja  va 2  s  .  c  o m
 * @param article
 *       Processed article.
 * @return
 *       The list of entities detected by this method.
 * 
 * @throws ParserException
 *       Problem while parsing the hyperlinks.
 * @throws ClientProtocolException
 *       Problem while accessing Freebase.
 * @throws ParseException
 *       Problem while accessing Freebase.
 * @throws IOException
 *       Problem while accessing Freebase.
 * @throws org.json.simple.parser.ParseException
 *       Problem while accessing Freebase.
 */
private List<AbstractEntity<?>> processHyperlinks(Article article) throws ParserException,
        ClientProtocolException, ParseException, IOException, org.json.simple.parser.ParseException {
    logger.increaseOffset();
    List<AbstractEntity<?>> result = new ArrayList<AbstractEntity<?>>();

    // parse linked text to automatically get hyperlink list
    logger.log("Get hyperlink list");
    String linkedText = article.getLinkedText();
    Parser parser = new Parser(TAG_PAR_START + linkedText + TAG_PAR_END);
    NodeList linkList = parser.parse(new TagNameFilter(TAG_LINK));
    int offset = TAG_PAR_START.length();

    // process each hyperlink
    logger.log("Process each hyperlink");
    logger.increaseOffset();
    for (int i = 0; i < linkList.size(); i++) {
        LinkTag linkTag = (LinkTag) linkList.elementAt(i);
        String valueStr = linkTag.getLinkText();
        int length = valueStr.length();
        String test = linkTag.toHtml();
        logger.log("Hyperlink '" + test + "'");

        // get type from Freebase
        EntityType type = null;
        // only process strings with uppercase initial
        if (StringTools.hasInitial(valueStr)) {
            String hyperlink = linkTag.getLink();
            String[] linkParts = hyperlink.split("/");
            String lastPart = linkParts[linkParts.length - 1];
            String wikipediaTitle = URLDecoder.decode(lastPart, "UTF-8"); //TODO we may take advantage of this to automatically detect the type
            String wikipediaTitleEscaped = FbCommonTools.escapeMqlKey(wikipediaTitle); //TODO or this
            logger.log("Wikipedia title: " + wikipediaTitle);
            logger.log("Escaped Wikipedia title: " + wikipediaTitleEscaped);
            // use only the notable type
            if (notableType) {
                String possibleType = FbTypeTools.getNotableType(wikipediaTitleEscaped);
                if (possibleType == null)
                    logger.log("No notable Freebase type found for \"" + valueStr + "\"");
                else {
                    List<String> possibleTypes = new ArrayList<String>();
                    possibleTypes.add(possibleType);
                    type = retrieveEntityType(possibleTypes);
                }
            }
            // use all available types
            if (type == null) {
                List<String> possibleTypes = FbTypeTools.getAllTypes(wikipediaTitleEscaped);
                logger.log("Possible types: " + possibleTypes.toString());
                if (possibleTypes.isEmpty())
                    logger.log("WARNING: no Freebase type found at all for \"" + valueStr + "\"");
                else
                    type = retrieveEntityType(possibleTypes);
            }
        }

        // set up the entity position
        int startPos = linkTag.getStartPosition() - offset;
        int endPos = startPos + length;
        offset = offset + test.length() - length;
        //debug                        
        //String text = article.getRawText();
        //String valueStr2 = text.substring(startPos,endPos);
        //boolean test2 = valueStr.equals(valueStr2);
        //if(!test2)
        //   System.out.println("ERROR: entity and article do not match (position problem)");

        // no type: we can't create the entity
        if (type == null) {
            logger.log("WARNING: no entity was created, because no type could be identified for \"" + valueStr
                    + "\"");
        }
        // otherwise, we try
        else { // ignore if purely numerical
            if (StringTools.hasNoLetter(valueStr))
                logger.log("The string is only numerical (no letters) so no entity is created for " + valueStr);

            // ignore if recognized as a location/organization but actually a demonym
            else if (discardDemonyms && (type == EntityType.LOCATION || type == EntityType.ORGANIZATION)
                    && DEMONYMS.contains(valueStr))
                logger.log("The string is in the demonym list, so no entity is created for " + valueStr);

            else {
                //debug
                //if(valueStr.equalsIgnoreCase("Irish"))
                //   System.out.print("");

                // possibly look for an acronym
                if (useAcronyms) { // only organization and locations have relevant acronyms
                                   // (for a person, acronyms usually correspond to titles or awards)
                    if (type == EntityType.ORGANIZATION || type == EntityType.LOCATION) { // check if there's an acronym inside the entity name itself
                        Pattern r = Pattern.compile("\\([^\\(a-z]+?\\)$"); // must be in uppercase
                        Matcher m = r.matcher(valueStr);
                        if (m.find()) { // create an additional entity (acronym) with the same type
                            int last = m.groupCount();
                            String acro = m.group(last);
                            int l = acro.length();
                            acro = acro.substring(1, l - 1);
                            int s = startPos + m.start(last) + 1;
                            int e = startPos + m.end(last) - 1;
                            if (!StringTools.hasNoLetter(acro)) {
                                //debug                        
                                //String valueStr3 = text.substring(s,e);
                                //boolean test3 = acro.equals(valueStr3);
                                //if(!test3)
                                //   System.out.println("ERROR: entity acronym and article do not match (position problem)");
                                AbstractEntity<?> entity = AbstractEntity.build(type, s, e,
                                        RecognizerName.SUBEE, acro);
                                result.add(entity);
                                logger.log("Creation of an extra entity (acronym) " + entity);
                            }
                            // remove the acronym from the original string
                            valueStr = valueStr.substring(0, valueStr.length() - l).trim();
                            endPos = startPos + valueStr.length();
                        }
                        // check if there's an acronym right after the entity 
                        else {
                            r = Pattern.compile("\\([^\\(a-z]+?\\)"); // must be in uppercase
                            m = r.matcher(linkedText);
                            if (m.find(linkTag.getEndTag().getEndPosition() - TAG_PAR_START.length())) { // possibly create an additional entity (acronym) with the same type
                                int last = m.groupCount();
                                String acro = m.group(last);
                                acro = acro.substring(1, acro.length() - 1);
                                int s = m.start(last) - 1 - (offset - TAG_PAR_END.length()) + 1; // actually <a/> and not <p/>, but same length...
                                // the acronym must be right after the original entity
                                if (s == endPos + 2 && !StringTools.hasNoLetter(acro)) {
                                    int e = m.end(last) - 1 - (offset - TAG_PAR_END.length()) - 1;
                                    //debug
                                    //String valueStr3 = text.substring(s,e);
                                    //boolean test3 = acro.equals(valueStr3);
                                    //if(!test3)
                                    //   System.out.println("ERROR: entity acronym and article do not match (position problem)");
                                    AbstractEntity<?> entity = AbstractEntity.build(type, s, e,
                                            RecognizerName.SUBEE, acro);
                                    result.add(entity);
                                    logger.log("Creation of an extra entity (acronym) " + entity);
                                }
                            }
                        }
                    }
                }

                // create the entity
                AbstractEntity<?> entity = AbstractEntity.build(type, startPos, endPos, RecognizerName.SUBEE,
                        valueStr);
                result.add(entity);
                logger.log("Creation of the entity " + entity);
            }
        }
    }
    logger.decreaseOffset();

    logger.decreaseOffset();
    return result;
}

From source file:com.akop.bach.parser.XboxLiveParser.java

public static String getStandardIcon(String loadBalIcon) {
    if (loadBalIcon == null)
        return null;

    Matcher m;
    if (!(m = PATTERN_LOADBAL_ICON.matcher(loadBalIcon)).find())
        return loadBalIcon;

    String replacement = loadBalIcon.substring(0, m.start(1)) + loadBalIcon.substring(m.end(1));

    return replacement;
}