List of usage examples for java.util.regex Matcher start
public int start(String name)
From source file:com.streamsets.pipeline.stage.processor.fieldmask.FieldMaskProcessor.java
@VisibleForTesting String regExMask(Field field, FieldMaskConfig fieldMaskConfig) { String value = field.getValueAsString(); Matcher matcher = regExToPatternMap.get(fieldMaskConfig.regex).matcher(value); if (matcher.matches()) { int groupCount = matcher.groupCount(); //create a masked string of the same length as the original string StringBuilder resultString = new StringBuilder(); for (int i = 0; i < value.length(); i++) { resultString.append(MASK_CHAR); }//from w ww .ja v a 2 s.c om //for each group that needs to be shown, replace the masked string with the original string characters at those //positions Set<Integer> groupsToShow = regexToGroupsToShowMap.get(fieldMaskConfig.regex); if (groupsToShow != null && !groupsToShow.isEmpty()) { for (int i = 1; i <= groupCount; i++) { if (groupsToShow.contains(i)) { resultString.replace(matcher.start(i), matcher.end(i), matcher.group(i)); } } } return resultString.toString(); } return field.getValueAsString(); }
From source file:com.github.gekoh.yagen.ddl.CreateDDL.java
private static String getIfExistsDropStatement(Dialect dialect, String sql, String name) { Matcher matcher = DROP_TABLE_PATTERN.matcher(sql); if (matcher.find()) { StringBuilder res = new StringBuilder(); res.append(sql.substring(0, matcher.start(1) >= 0 ? matcher.start(1) : matcher.start(2))) .append(getNameAndIfExistsWhenSupported(dialect, name == null ? matcher.group(2) : name)) .append(sql.substring(matcher.end(3) >= 0 ? matcher.end(3) : matcher.end(2))); sql = res.toString();//from w ww. j a va 2 s. c o m } return sql; }
From source file:io.kahu.hawaii.util.logger.DefaultLogManager.java
void maskPasswords(final LoggingConfiguration config) throws JSONException { // Look at all fields that may contain a URL, and mask passwords in // query string parameters for (String field : config.getUrlFields()) { Object value = getContext(field); if (value == null || value == JSONObject.NULL || value == JSONObject.EXPLICIT_NULL) { continue; }/*from w w w . java 2 s.co m*/ if (!(value instanceof String)) { // If it's not a string, just play it safe and replace the // entire parameter putContext(field, MASKED_PASSWORD); continue; } // It's a String so iterate over the QS parameters String url = (String) value; int p1 = url.indexOf('?'); boolean changed = false; while (p1 >= 0) { int p2 = url.indexOf('=', p1 + 1); if (p2 < 0) { break; } String qsParam = url.substring(p1 + 1, p2); int p3 = url.indexOf('&', p2 + 1); if (p3 < 0) { p3 = url.length(); } String qsValue = url.substring(p2 + 1, p3); if (config.getPasswordParameters().contains(qsParam)) { qsValue = MASKED_PASSWORD; url = url.substring(0, p2 + 1) + qsValue + url.substring(p3); changed = true; } p1 = url.indexOf('&', p1 + 1); } if (changed) { putContext(field, url); } } // Next, look at all parameter objects, which are String -> List of // String mappings for (String field : config.getParameterFields()) { Object value = getContext(field); if (value == null || value == JSONObject.NULL || value == JSONObject.EXPLICIT_NULL) { continue; } if (!(value instanceof JSONObject)) { // If it's not a JSONObject, just play it safe and replace the // entire parameter putContext(field, MASKED_PASSWORD); continue; } // It's a JSONObject, so discover and nuke all possible password // fields JSONObject params = (JSONObject) value; boolean changed = false; for (String key : config.getPasswordParameters()) { if (params.has(key)) { JSONArray a = new JSONArray(); a.put(MASKED_PASSWORD); params.put(key, a); changed = true; } } if (changed) { putContext(field, params); } } // Next, look at all request/response headers, which are stored as an // array of strings for (String field : config.getHeaderFields()) { Object value = getContext(field); if (value == null || value == JSONObject.NULL || value == JSONObject.EXPLICIT_NULL) { continue; } if (!(value instanceof JSONArray)) { // If it's not a JSONArray, just play it safe and replace the // entire parameter putContext(field, MASKED_PASSWORD); continue; } // It's a JSONArray, so discover and nuke all possible password // headers JSONArray headers = (JSONArray) value; boolean changed = false; for (String key : config.getPasswordParameters()) { for (int i = 0; i < headers.length(); i++) { String header = headers.getString(i); if (header.startsWith(key + ":")) { headers.put(i, key + ": " + MASKED_PASSWORD); changed = true; } } } if (changed) { putContext(field, headers); } } // Finally, look at all body fields for password patterns for (String field : config.getBodyFields()) { Object value = getContext(field); if (value == null || value == JSONObject.NULL || value == JSONObject.EXPLICIT_NULL) { continue; } if (!(value instanceof String)) { // If it's not a string, just play it safe and replace the // entire parameter putContext(field, MASKED_PASSWORD); continue; } // It's a String, so apply all password patterns to mask the // passwords String body = (String) value; boolean changed = false; if (looksLikeJson(body)) { body = removePasswordFieldsFromJsonBody(body, Arrays.asList(config.getBodyPasswordFields())); changed = (body != null); } else { for (Pattern pattern : config.getBodyPasswordPatterns()) { Matcher m = pattern.matcher(body); int i = 0; while (m.find(i)) { body = body.substring(0, m.start(1)) + MASKED_PASSWORD + body.substring(m.end(1)); i = m.end() - m.group(1).length() + MASKED_PASSWORD.length(); m = pattern.matcher(body); changed = true; } } } if (changed) { putContext(field, body); } } }
From source file:com.github.gekoh.yagen.ddl.CreateDDL.java
private static String addAuditColumns(Dialect dialect, String sqlCreate, Set<String> columns) { Matcher matcher = TBL_PATTERN.matcher(sqlCreate); if (matcher.find()) { StringBuilder sb = new StringBuilder( sqlCreate.substring(0, matcher.start(TBL_PATTERN_IDX_AFTER_COL_DEF))); for (String auditColumn : AUDIT_COLUMNS) { if (!columns.contains(auditColumn)) { sb.append(", ").append(formatColumn(dialect, AUDIT_COLUMN_DEFINITION.get(auditColumn), Constants.USER_NAME_LEN, null, null)); columns.add(auditColumn); }/*from w ww .j a va2 s .c o m*/ } sb.append(sqlCreate.substring(matcher.start(TBL_PATTERN_IDX_AFTER_COL_DEF))); sqlCreate = sb.toString(); } return sqlCreate; }
From source file:com.vladsch.idea.multimarkdown.editor.MultiMarkdownPreviewEditor.java
protected String makeHtmlPage(String html) { VirtualFile file = FileDocumentManager.getInstance().getFile(document); // scan for <table>, </table>, <tr>, </tr> and other tags we modify, this could be done with a custom plugin to pegdown but // then it would be more trouble to get un-modified HTML. String regex = "(<table>|<thead>|<tbody>|<tr>|<hr/>|<del>|</del>|</p>|<kbd>|</kbd>|<var>|</var>";//|<code>|</code>"; StringBuilder result = new StringBuilder(html.length() + (html.length() >> 2)); String gitHubHref = MultiMarkdownPathResolver.getGitHubDocumentURL(project, document, !isWikiDocument); String gitHubClose = ""; if (gitHubHref == null) { gitHubHref = ""; } else {/*from w w w . jav a 2 s .co m*/ gitHubHref = "<a href=\"" + gitHubHref + "\" name=\"wikipage\" id=\"wikipage\">"; gitHubClose = "</a>"; } if (isWikiDocument) { result.append("<body class=\"multimarkdown-wiki-preview\">\n<div class=\"content\">\n"); result.append("" + "<h1 class=\"first-child\">").append(gitHubHref) .append(escapeHtml(file == null ? "" : file.getNameWithoutExtension().replace('-', ' '))) .append(gitHubClose).append("</h1>\n").append(""); } else { result.append("<body class=\"multimarkdown-preview\">\n<div class=\"content\">\n" + "<div class=\"page-header\">").append(gitHubHref) .append(escapeHtml(file == null ? "" : file.getName().replace('-', ' '))).append(gitHubClose) .append("</div>\n").append("<div class=\"hr\"></div>\n").append(""); // for now nothing regex += "|<h1>"; } String regexTail = "|<li>\\n*\\s*<p>"; boolean isDarkTheme = isDarkTheme(); boolean taskLists = isTaskLists(); if (taskLists) { regex += "|<li class=\"task-list-item\">\\n*\\s*<p>|<br\\s*/?>|<li class=\"task-list-item\">|<li>\\[(?:x|X)\\]\\s*|<li>\\[ \\]\\s*|<li>\\n*\\s*<p>\\[x\\]\\s*|<li>\\n*\\s*<p>\\[ \\]\\s*"; } regex += regexTail; regex += ")"; Pattern p = Pattern.compile(regex, Pattern.CASE_INSENSITIVE); Matcher m = p.matcher(html); int lastPos = 0; int rowCount = 0; boolean[] isOrderedList = new boolean[20]; int listDepth = -1; boolean firstChildH1 = !isWikiDocument; while (m.find()) { String found = m.group(); if (lastPos < m.start(0)) { result.append(html.substring(lastPos, m.start(0))); } if (found.equals("</p>")) { result.append(found); } else if (found.startsWith("<br")) { result.append("<br/>\n"); } else if (found.equals("<table>")) { rowCount = 0; result.append(found); } else if (found.equals("<thead>")) { result.append(found); } else if (found.equals("<tbody>")) { result.append(found); } else if (found.equals("/>")) { result.append(">"); } else if (found.equals("<tr>")) { rowCount++; result.append("<tr class=\"") .append(rowCount == 1 ? "first-child" : (rowCount & 1) != 0 ? "odd-child" : "even-child") .append("\">"); } else if (found.equals("<hr/>")) { result.append("<div class=\"hr\"> </div>"); } else if (found.equals("<h1>")) { result.append(firstChildH1 ? "<h1 class=\"first-child\">" : "<h1>"); firstChildH1 = false; } else if (found.equals("<del>")) { result.append("<span class=\"del\">"); } else if (found.equals("</del>")) { result.append("</span>"); } else if (found.equals("<kbd>")) { result.append("<span class=\"kbd\">"); } else if (found.equals("</kbd>")) { result.append("</span>"); } else if (found.equals("<code>")) { result.append("<span class=\"code\">"); } else if (found.equals("</code>")) { result.append("</span>"); } else if (found.equals("<var>")) { result.append("<span class=\"var\">"); } else if (found.equals("</var>")) { result.append("</span>"); } else { found = found.trim(); if (taskLists && found.equals("<li>[x]")) { result.append("<li class=\"dtask\">"); } else if (taskLists && found.equals("<li>[X]")) { result.append("<li class=\"dtask\">"); } else if (taskLists && found.equals("<li>[ ]")) { result.append("<li class=\"dtask\">"); } else if (taskLists && found.equals("<li class=\"task-list-item\">")) { result.append("<li class=\"taski\">"); } else { // here we have <li>\n*\s*<p>, need to strip out \n*\s* so we can match them easier String foundWithP = found; foundWithP = foundWithP.replaceAll("<li>\\n*\\s*<p>", "<li><p>"); found = foundWithP.replaceAll("<li class=\"task-list-item\">\\n*\\s*<p>", "<li class=\"task\"><p>"); found = found.trim(); if (found.equals("<li><p>")) { result.append("<li class=\"p\"><p class=\"p\">"); } else if (taskLists && found.equals("<li><p>[x]")) { result.append("<li class=\"dtaskp\"><p class=\"p\">"); } else if (taskLists && found.equals("<li><p>[ ]")) { result.append("<li class=\"dtaskp\"><p class=\"p\">"); } else if (taskLists && found.equals("<li class=\"task-list-item\"><p>")) { result.append("<li class=\"taskp\"><p class=\"p\">"); } else { result.append(found); } } } lastPos = m.end(0); } if (lastPos < html.length()) { result.append(html.substring(lastPos)); } result.append("\n</div>\n</body>\n"); return result.toString(); }
From source file:com.icesoft.faces.component.style.OutputStyleRenderer.java
public void encodeEnd(FacesContext facesContext, UIComponent uiComponent) throws IOException { validateParameters(facesContext, uiComponent, OutputStyle.class); try {// w ww . j av a2 s.co m DOMContext domContext = DOMContext.attachDOMContext(facesContext, uiComponent); if (!domContext.isInitialized()) { OutputStyle outputStyle = (OutputStyle) uiComponent; Element styleEle = buildCssElement(domContext); String href = outputStyle.getHref(); styleEle.setAttribute(HTML.HREF_ATTR, getResourceURL(facesContext, href)); domContext.setRootNode(styleEle); int browserType = browserType(facesContext, uiComponent); if (browserType != DEFAULT_TYPE) { if (href.endsWith(CSS_EXTENTION)) { int i = href.indexOf(CSS_EXTENTION); if (i > 0) { String start = href.substring(0, i); Element ieStyleEle = buildCssElement(domContext); String extention = IE_EXTENTION; if (browserType == SAFARI) { extention = SAFARI_EXTENTION; } if (browserType == DT) { extention = DT_EXTENTION; } if (browserType == IE_7) { extention = IE_7_EXTENTION; } if (browserType == IE_8) { extention = IE_8_EXTENSION; } if (browserType == SAFARI_MOBILE) { extention = SAFARI_MOBILE_EXTENTION; } if (browserType == OPERA) { extention = OPERA_EXTENTION; } if (browserType == OPERA_MOBILE) { extention = OPERA_MOBILE_EXTENTION; } String browserSpecificFilename = useSpecific(facesContext, start, extention); if (browserSpecificFilename != null) { // W3C spec: To make a style sheet preferred, set the rel attribute to "stylesheet" and name the style sheet with the title attribute ieStyleEle.setAttribute(HTML.TITLE_ATTR, extention); String hrefURL = CoreUtils.resolveResourceURL(facesContext, browserSpecificFilename); ieStyleEle.setAttribute(HTML.HREF_ATTR, hrefURL); styleEle.getParentNode().appendChild(ieStyleEle); } } else { throw new RuntimeException("OutputStyle file attribute is too short. " + "Needs at least one character before .css. Current Value is [" + href + "]"); } } else { Matcher matcher = Pattern .compile(".*javax\\.faces\\.resource/((.*)\\.css)(\\..*)?\\?ln=([^&]*)(&.*|$)") .matcher(href); if (matcher.matches()) { Element ieStyleEle = buildCssElement(domContext); String extension = browserType >= 0 && browserType < extensions.length ? extensions[browserType] : IE_EXTENTION; ieStyleEle.setAttribute(HTML.TITLE_ATTR, extension); String hrefURL = new StringBuffer(matcher.group(0)).insert(matcher.end(2), extension) .toString(); ieStyleEle.setAttribute(HTML.HREF_ATTR, hrefURL); String resourceName = new StringBuffer(matcher.group(1)) .insert(matcher.end(2) - matcher.start(2), extension).toString(); Resource resource = facesContext.getApplication().getResourceHandler() .createResource(resourceName, matcher.group(4)); if (resource != null) { styleEle.getParentNode().appendChild(ieStyleEle); } } } } } domContext.stepOver(); } catch (Exception e) { log.error("Error in OutputStyleRenderer", e); } }
From source file:com.edgenius.wiki.render.filter.MacroFilter.java
/** *///from w w w.j a v a 2 s.c o m private void checkGroup(final int initPos, CharSequence input, final LinkedList<GroupProcessor> stack, List<GroupProcessor> processors) { final List<Region> pairRegions = new ArrayList<Region>(); singleMacroProvider.replaceByTokenVisitor(input, new TokenVisitor<Matcher>() { public void handleMatch(StringBuffer buffer, Matcher result) { String macroName = result.group(1); if (macroName != null && !macroName.startsWith("$")) { Macro macro = macroMgr.getMacro(macroName); if (macro != null) { //IMPORTANT: here does not check Macro.isPair() and also put it into pairRegions for following process //it is the sequence of process must keep consistant with physical sequence in input text, //for example, {table}{cell}...{rowdiv}, rowdiv is single and must be after cell int start = result.start(0); int end = result.end(0); Region pair = new Region(start, end); //no parameter, then mark as unknown, otherwise, must be a start macro if (StringUtils.isBlank(result.group(2))) { pair.setKey(MACRO_REGION_KEY_UNKNOWN); } else { pair.setKey(MACRO_REGION_KEY_START); } //just for temporary to remember the macro name... pair.setContent(macroName); //sum to list pairRegions.add(pair); } } } }); int size = pairRegions.size(); if (size > 0) { StringBuffer inputBuf = new StringBuffer(input); for (int idx = 0; idx < size; idx++) { Region reg = pairRegions.get(idx); Macro macro = macroMgr.getMacro(reg.getContent()); if (macro.isPaired()) { int deep = 0; Region pair = null; //looking for pairs... for (int chIdx = idx + 1; chIdx < size; chIdx++) { Region next = pairRegions.get(chIdx); if (StringUtils.equalsIgnoreCase(reg.getContent(), next.getContent())) { //start is unknown (no attribute), then end must be unknown if (MACRO_REGION_KEY_UNKNOWN.equals(reg.getKey()) && MACRO_REGION_KEY_UNKNOWN.equals(next.getKey())) { //matched pair = next; //skip all internal node - which is handle by embedded recursive idx = chIdx; break; } if (MACRO_REGION_KEY_START.equals(reg.getKey()) && MACRO_REGION_KEY_UNKNOWN.equals(next.getKey())) { if (deep == 0) { //matched; pair = next; //skip all internal node - which is handle by embedded recursive idx = chIdx; break; } else { //just another inner same name macro matched, deep minus 1 deep--; } } if (MACRO_REGION_KEY_START.equals(next.getKey())) { //ok, it gets another start, in 4th scenarios - then add deep deep++; } } } //ok, success find paired if (pair != null) { int start = initPos + reg.getStart(); int end = initPos + pair.getEnd(); int contentStart = initPos + reg.getEnd(); int contentEnd = initPos + pair.getStart(); GroupProcessor currProcessor = stack.size() == 0 ? null : stack.getLast(); if (currProcessor != null) { currProcessor.adoptChild(macro, start, end); } if (macro.isProcessEmbedded() && (end > start)) { if (macro.hasChildren() != null) { stack.add(((GroupProcessorMacro) macro).newGroupProcessor(macro, start, end)); } checkGroup(contentStart, inputBuf.subSequence(contentStart - initPos, contentEnd - initPos), stack, processors); if (macro.hasChildren() != null) { //pop the current one, means it is a completed GroupProcessor processors.add(stack.removeLast()); } } } } else { //single macro - directly detect if it is child GroupProcessor currProcessor = stack.size() == 0 ? null : stack.getLast(); if (currProcessor != null) { currProcessor.adoptChild(macro, initPos + reg.getStart(), initPos + reg.getEnd()); } } } } }
From source file:cn.dreampie.resource.LessSource.java
private void resolveImports() throws IOException { Matcher importMatcher = IMPORT_PATTERN.matcher(normalizedContent); while (importMatcher.find()) { String importedResource = importMatcher.group(5); importedResource = importedResource.matches(".*\\.(le?|c)ss$") ? importedResource : importedResource + ".less"; String importType = importMatcher.group(3) == null ? importedResource.substring(importedResource.lastIndexOf(".") + 1) : importMatcher.group(3); if (importType.equals("less")) { logger.debug("Importing %s", importedResource); if (!imports.containsKey(importedResource)) { LessSource importedLessSource = new LessSource(getImportedResource(importedResource)); imports.put(importedResource, importedLessSource); normalizedContent = includeImportedContent(importedLessSource, importMatcher); importMatcher = IMPORT_PATTERN.matcher(normalizedContent); } else { normalizedContent = normalizedContent.substring(0, importMatcher.start(1)) + normalizedContent.substring(importMatcher.end(1)); importMatcher = IMPORT_PATTERN.matcher(normalizedContent); }//from w ww.j ava 2s . c o m } } }
From source file:tr.edu.gsu.nerwip.recognition.internal.modelless.subee.Subee.java
/** * Takes advantage of hyperlinks in the text, in order * to detect entities. Most of the time, in a Wikipedia * article, the hyperlink is defined only for the very * first occurrence of the entity. For this reason, * an additional processing is required to find the possible * other occurrences (cf. {@link #processOccurrences(Article, List)}). * //from w ww. ja va 2 s . c o m * @param article * Processed article. * @return * The list of entities detected by this method. * * @throws ParserException * Problem while parsing the hyperlinks. * @throws ClientProtocolException * Problem while accessing Freebase. * @throws ParseException * Problem while accessing Freebase. * @throws IOException * Problem while accessing Freebase. * @throws org.json.simple.parser.ParseException * Problem while accessing Freebase. */ private List<AbstractEntity<?>> processHyperlinks(Article article) throws ParserException, ClientProtocolException, ParseException, IOException, org.json.simple.parser.ParseException { logger.increaseOffset(); List<AbstractEntity<?>> result = new ArrayList<AbstractEntity<?>>(); // parse linked text to automatically get hyperlink list logger.log("Get hyperlink list"); String linkedText = article.getLinkedText(); Parser parser = new Parser(TAG_PAR_START + linkedText + TAG_PAR_END); NodeList linkList = parser.parse(new TagNameFilter(TAG_LINK)); int offset = TAG_PAR_START.length(); // process each hyperlink logger.log("Process each hyperlink"); logger.increaseOffset(); for (int i = 0; i < linkList.size(); i++) { LinkTag linkTag = (LinkTag) linkList.elementAt(i); String valueStr = linkTag.getLinkText(); int length = valueStr.length(); String test = linkTag.toHtml(); logger.log("Hyperlink '" + test + "'"); // get type from Freebase EntityType type = null; // only process strings with uppercase initial if (StringTools.hasInitial(valueStr)) { String hyperlink = linkTag.getLink(); String[] linkParts = hyperlink.split("/"); String lastPart = linkParts[linkParts.length - 1]; String wikipediaTitle = URLDecoder.decode(lastPart, "UTF-8"); //TODO we may take advantage of this to automatically detect the type String wikipediaTitleEscaped = FbCommonTools.escapeMqlKey(wikipediaTitle); //TODO or this logger.log("Wikipedia title: " + wikipediaTitle); logger.log("Escaped Wikipedia title: " + wikipediaTitleEscaped); // use only the notable type if (notableType) { String possibleType = FbTypeTools.getNotableType(wikipediaTitleEscaped); if (possibleType == null) logger.log("No notable Freebase type found for \"" + valueStr + "\""); else { List<String> possibleTypes = new ArrayList<String>(); possibleTypes.add(possibleType); type = retrieveEntityType(possibleTypes); } } // use all available types if (type == null) { List<String> possibleTypes = FbTypeTools.getAllTypes(wikipediaTitleEscaped); logger.log("Possible types: " + possibleTypes.toString()); if (possibleTypes.isEmpty()) logger.log("WARNING: no Freebase type found at all for \"" + valueStr + "\""); else type = retrieveEntityType(possibleTypes); } } // set up the entity position int startPos = linkTag.getStartPosition() - offset; int endPos = startPos + length; offset = offset + test.length() - length; //debug //String text = article.getRawText(); //String valueStr2 = text.substring(startPos,endPos); //boolean test2 = valueStr.equals(valueStr2); //if(!test2) // System.out.println("ERROR: entity and article do not match (position problem)"); // no type: we can't create the entity if (type == null) { logger.log("WARNING: no entity was created, because no type could be identified for \"" + valueStr + "\""); } // otherwise, we try else { // ignore if purely numerical if (StringTools.hasNoLetter(valueStr)) logger.log("The string is only numerical (no letters) so no entity is created for " + valueStr); // ignore if recognized as a location/organization but actually a demonym else if (discardDemonyms && (type == EntityType.LOCATION || type == EntityType.ORGANIZATION) && DEMONYMS.contains(valueStr)) logger.log("The string is in the demonym list, so no entity is created for " + valueStr); else { //debug //if(valueStr.equalsIgnoreCase("Irish")) // System.out.print(""); // possibly look for an acronym if (useAcronyms) { // only organization and locations have relevant acronyms // (for a person, acronyms usually correspond to titles or awards) if (type == EntityType.ORGANIZATION || type == EntityType.LOCATION) { // check if there's an acronym inside the entity name itself Pattern r = Pattern.compile("\\([^\\(a-z]+?\\)$"); // must be in uppercase Matcher m = r.matcher(valueStr); if (m.find()) { // create an additional entity (acronym) with the same type int last = m.groupCount(); String acro = m.group(last); int l = acro.length(); acro = acro.substring(1, l - 1); int s = startPos + m.start(last) + 1; int e = startPos + m.end(last) - 1; if (!StringTools.hasNoLetter(acro)) { //debug //String valueStr3 = text.substring(s,e); //boolean test3 = acro.equals(valueStr3); //if(!test3) // System.out.println("ERROR: entity acronym and article do not match (position problem)"); AbstractEntity<?> entity = AbstractEntity.build(type, s, e, RecognizerName.SUBEE, acro); result.add(entity); logger.log("Creation of an extra entity (acronym) " + entity); } // remove the acronym from the original string valueStr = valueStr.substring(0, valueStr.length() - l).trim(); endPos = startPos + valueStr.length(); } // check if there's an acronym right after the entity else { r = Pattern.compile("\\([^\\(a-z]+?\\)"); // must be in uppercase m = r.matcher(linkedText); if (m.find(linkTag.getEndTag().getEndPosition() - TAG_PAR_START.length())) { // possibly create an additional entity (acronym) with the same type int last = m.groupCount(); String acro = m.group(last); acro = acro.substring(1, acro.length() - 1); int s = m.start(last) - 1 - (offset - TAG_PAR_END.length()) + 1; // actually <a/> and not <p/>, but same length... // the acronym must be right after the original entity if (s == endPos + 2 && !StringTools.hasNoLetter(acro)) { int e = m.end(last) - 1 - (offset - TAG_PAR_END.length()) - 1; //debug //String valueStr3 = text.substring(s,e); //boolean test3 = acro.equals(valueStr3); //if(!test3) // System.out.println("ERROR: entity acronym and article do not match (position problem)"); AbstractEntity<?> entity = AbstractEntity.build(type, s, e, RecognizerName.SUBEE, acro); result.add(entity); logger.log("Creation of an extra entity (acronym) " + entity); } } } } } // create the entity AbstractEntity<?> entity = AbstractEntity.build(type, startPos, endPos, RecognizerName.SUBEE, valueStr); result.add(entity); logger.log("Creation of the entity " + entity); } } } logger.decreaseOffset(); logger.decreaseOffset(); return result; }
From source file:com.akop.bach.parser.XboxLiveParser.java
public static String getStandardIcon(String loadBalIcon) { if (loadBalIcon == null) return null; Matcher m; if (!(m = PATTERN_LOADBAL_ICON.matcher(loadBalIcon)).find()) return loadBalIcon; String replacement = loadBalIcon.substring(0, m.start(1)) + loadBalIcon.substring(m.end(1)); return replacement; }