List of usage examples for java.util.regex Matcher start
public int start(String name)
From source file:com.chiorichan.factory.parsers.BasicParser.java
public String runParser(String source) throws Exception { if (source == null || source.isEmpty()) return ""; Matcher m1 = p1.matcher(source); Matcher m2 = p2.matcher(source); while (m1.find() && m2.find()) { String[] args = m1.group(1).split("[ ]?,[ ]?"); String[] args2 = new String[args.length + 1]; args2[0] = m1.group(0);/* w w w .j a va2s . c o m*/ for (int i = 0; i < args.length; i++) args2[i + 1] = StringFunc.trimAll(args[i].trim(), '"'); String result = resolveMethod(args2); if (result == null) result = ""; source = new StringBuilder(source).replace(m2.start(1), m2.end(1), result).toString(); // We have to reset the matcher since the source changes with each loop m1 = p1.matcher(source); m2 = p2.matcher(source); } return source; }
From source file:com.joliciel.talismane.tokeniser.filters.TokenRegexFilterImpl.java
@Override public Set<TokenPlaceholder> apply(String text) { Set<TokenPlaceholder> placeholders = new HashSet<TokenPlaceholder>(); Matcher matcher = this.getPattern().matcher(text); int lastStart = -1; while (matcher.find()) { int start = matcher.start(groupIndex); if (start > lastStart) { int end = matcher.end(groupIndex); String newText = RegexUtils.getReplacement(replacement, text, matcher); TokenPlaceholder placeholder = this.tokeniserFilterService.getTokenPlaceholder(start, end, newText, regex);//ww w . j a v a 2 s . co m placeholder.setPossibleSentenceBoundary(this.possibleSentenceBoundary); for (String key : attributes.keySet()) placeholder.addAttribute(key, attributes.get(key)); placeholders.add(placeholder); } lastStart = start; } return placeholders; }
From source file:mobi.jenkinsci.server.core.net.ProxyUtil.java
private String resolveImages(final String userAgent, final String pluginName, final Pattern imgLinkPattern, final int index, final String url, final String resultString) { log.debug("Resolving images for URL " + url); final StringBuilder outString = new StringBuilder(); int currPos = 0; final Matcher matcher = imgLinkPattern.matcher(resultString); while (matcher.find(currPos)) { final int start = matcher.start(index); final int end = matcher.end(index); final String imagePath = matcher.group(index); if (isAlreadyDataEncoded(imagePath)) { continue; }/*from w w w .jav a 2 s . com*/ outString.append(resultString.substring(currPos, start)); try { outString.append(retrieveImage(userAgent, pluginName, url, imagePath)); } catch (final Exception e) { log.warn("Cannot retrieve image '" + imagePath + "'", e); } currPos = end; } outString.append(resultString.substring(currPos)); log.debug(outString.length() + " Base64 of images included for URL " + url); return outString.toString(); }
From source file:ai.susi.mind.SusiAction.java
/** * Action descriptions are templates for data content. Strings may refer to arguments from * a thought deduction using variable templates. I.e. "$name$" inside an action string would * refer to an data entity in an thought argument which has the name "name". Applying the * Action to a thought will instantiate such variable templates and produces a new String * attribute named "expression"//from www. j a v a 2 s . co m * @param thoughts an argument from previously applied inferences * @return the action with the attribute "expression" instantiated by unification of the thought with the action */ public SusiAction execution(SusiArgument thoughts, SusiMind mind, String client) { if ((this.getRenderType() == RenderType.answer || this.getRenderType() == RenderType.self) && this.json.has("phrases")) { // transform the answer according to the data ArrayList<String> a = getPhrases(); String phrase = a.get(random.nextInt(a.size())); String expression = thoughts.unify(phrase); if (expression != null) { // transform the answer according to the data // this is the final chance that we can add another thought according to a memorizing skill in the answer string Matcher m; // self-referrer evaluate contents from the answers expressions as recursion: susi is asked again while (new TimeoutMatcher(m = self_referrer.matcher(expression)).matches()) { String observation = m.group(1); expression = expression.substring(0, m.start(1) - 1) + mind.react(observation, client, new SusiThought()) + expression.substring(m.end(1) + 1); } // assignments set variables from the result expressions. These can be visible or invisible while (new TimeoutMatcher(m = visible_assignment.matcher(expression)).matches()) { String observation = m.group(1); String variable = m.group(2); expression = expression.substring(0, m.end(1)) + expression.substring(m.end(2)); // write the variable v as side-effect into the thoughts argument thoughts.think(new SusiThought().addObservation(variable, observation)); } while (new TimeoutMatcher(m = blind_assignment.matcher(expression)).matches()) { String observation = m.group(1); String variable = m.group(2); expression = expression.substring(0, m.start(1) - 1) + expression.substring(m.end(2)); // write the variable v as side-effect into the thoughts argument thoughts.think(new SusiThought().addObservation(variable, observation)); } // find an response type: self-recursion or answer if (this.getRenderType() == RenderType.answer) { // the expression is answered to the communication partner this.json.put("expression", expression); } if (this.getRenderType() == RenderType.self) { // recursive call susi with the answer expression = mind.react(expression, client, new SusiThought()); this.json.put("expression", expression); this.phrasesCache = null; // important, otherwise the expression is not recognized // patch the render type this.json.put("type", RenderType.answer.name()); this.renderTypeCache = RenderType.answer; } } } if (this.getRenderType() == RenderType.websearch && this.json.has("query")) { this.json.put("query", thoughts.unify(getStringAttr("query"))); } if (this.getRenderType() == RenderType.anchor && this.json.has("link") && this.json.has("text")) { this.json.put("link", thoughts.unify(getStringAttr("link"))); this.json.put("text", thoughts.unify(getStringAttr("text"))); } if (this.getRenderType() == RenderType.map && this.json.has("latitude") && this.json.has("longitude") && this.json.has("zoom")) { this.json.put("latitude", thoughts.unify(getStringAttr("latitude"))); this.json.put("longitude", thoughts.unify(getStringAttr("longitude"))); this.json.put("zoom", thoughts.unify(getStringAttr("zoom"))); } return this; }
From source file:de.escalon.hypermedia.spring.PartialUriTemplate.java
/** * Creates a new {@link PartialUriTemplate} using the given template string. * * @param template must not be {@literal null} or empty. *///from w w w. j a v a2 s. co m public PartialUriTemplate(String template) { Assert.hasText(template, "Template must not be null or empty!"); Matcher matcher = VARIABLE_REGEX.matcher(template); // first group is the variable start without leading {: "", "/", "?", "#", // second group is the comma-separated name list without the trailing } of the variable int endOfPart = 0; while (matcher.find()) { // 0 is the current match, i.e. the entire variable expression int startOfPart = matcher.start(0); // add part before current match if (endOfPart < startOfPart) { final String partWithoutVariables = template.substring(endOfPart, startOfPart); final StringTokenizer stringTokenizer = new StringTokenizer(partWithoutVariables, "?", true); boolean inQuery = false; while (stringTokenizer.hasMoreTokens()) { final String token = stringTokenizer.nextToken(); if ("?".equals(token)) { inQuery = true; } else { if (!inQuery) { urlComponents.add(token); } else { urlComponents.add("?" + token); } variableIndices.add(Collections.<Integer>emptyList()); } } } endOfPart = matcher.end(0); // add current match as part final String variablePart = template.substring(startOfPart, endOfPart); urlComponents.add(variablePart); // collect variablesInPart and track for each part which variables it contains // group(1) is the variable head without the leading { TemplateVariable.VariableType type = TemplateVariable.VariableType.from(matcher.group(1)); // group(2) is the String[] names = matcher.group(2).split(","); List<Integer> variablesInPart = new ArrayList<Integer>(); for (String name : names) { TemplateVariable variable = new TemplateVariable(name, type); variablesInPart.add(variables.size()); variables.add(variable); variableNames.add(name); } variableIndices.add(variablesInPart); } // finish off remaining part if (endOfPart < template.length()) { urlComponents.add(template.substring(endOfPart)); variableIndices.add(Collections.<Integer>emptyList()); } }
From source file:io.sugo.grok.api.Grok.java
/** * Match the given <tt>text</tt> with the named regex * {@code Grok} will extract data from the string and get an extence of {@link Match}. * * @param text : Single line of log//from w ww . ja v a 2s .c o m * @return Grok Match */ public Match match(String text) { if (compiledNamedRegex == null || StringUtils.isBlank(text)) { return Match.EMPTY; } Matcher m = compiledNamedRegex.matcher(text); Match match = new Match(); if (m.find()) { match.setSubject(text); match.setGrok(this); match.setMatch(m); match.setStart(m.start(0)); match.setEnd(m.end(0)); } return match; }
From source file:com.cyberway.issue.extractor.RegexpHTMLLinkExtractor.java
protected void processMeta(CharSequence cs) { Matcher attr = TextUtils.getMatcher(EACH_ATTRIBUTE_EXTRACTOR, cs); String name = null;//from ww w. j ava 2 s . co m String httpEquiv = null; String content = null; while (attr.find()) { int valueGroup = (attr.start(12) > -1) ? 12 : (attr.start(13) > -1) ? 13 : 14; CharSequence value = cs.subSequence(attr.start(valueGroup), attr.end(valueGroup)); if (attr.group(1).equalsIgnoreCase("name")) { name = value.toString(); } else if (attr.group(1).equalsIgnoreCase("http-equiv")) { httpEquiv = value.toString(); } else if (attr.group(1).equalsIgnoreCase("content")) { content = value.toString(); } // TODO: handle other stuff } TextUtils.recycleMatcher(attr); // Look for the 'robots' meta-tag if ("robots".equalsIgnoreCase(name) && content != null) { if (getHonorRobots()) { String contentLower = content.toLowerCase(); if ((contentLower.indexOf("nofollow") >= 0 || contentLower.indexOf("none") >= 0)) { // if 'nofollow' or 'none' is specified and we // are honoring robots, end html extraction logger.fine("HTML extraction skipped due to robots meta-tag for: " + source); cancelFurtherExtraction(); return; } } } else if ("refresh".equalsIgnoreCase(httpEquiv) && content != null) { String refreshUri = content.substring(content.indexOf("=") + 1); try { Link refreshLink = new Link(source, UURIFactory.getInstance(base, refreshUri), Link.elementContext("meta", httpEquiv), Link.REFER_HOP); next.addLast(refreshLink); } catch (URIException e) { extractErrorListener.noteExtractError(e, source, refreshUri); } } }
From source file:me.ryanhamshire.griefprevention.DataStore.java
/** * Returns a list with all links contained in the input */// w w w.j a va 2 s . c om public static List<String> extractUrls(String text) { List<String> containedUrls = new ArrayList<String>(); String urlRegex = "((https?|ftp|gopher|telnet|file):((//)|(\\\\))+[\\w\\d:#@%/;$()~_?\\+-=\\\\\\.&]*)"; Pattern pattern = Pattern.compile(urlRegex, Pattern.CASE_INSENSITIVE); Matcher urlMatcher = null; try { urlMatcher = pattern.matcher(text); } catch (Throwable t) { return containedUrls; } while (urlMatcher.find()) { containedUrls.add(text.substring(urlMatcher.start(0), urlMatcher.end(0))); } return containedUrls; }
From source file:com.github.hateoas.forms.affordance.PartialUriTemplate.java
/** * Creates a new {@link PartialUriTemplate} using the given template string. * * @param template must not be {@literal null} or empty. *///from www. j a v a 2s . com public PartialUriTemplate(String template) { Assert.hasText(template, "Template must not be null or empty!"); Matcher matcher = VARIABLE_REGEX.matcher(template); // first group is the variable start without leading {: "", "/", "?", "#", // second group is the comma-separated name list without the trailing } of the variable int endOfPart = 0; while (matcher.find()) { // 0 is the current match, i.e. the entire variable expression int startOfPart = matcher.start(0); // add part before current match if (endOfPart < startOfPart) { final String partWithoutVariables = template.substring(endOfPart, startOfPart); final StringTokenizer stringTokenizer = new StringTokenizer(partWithoutVariables, "?", true); boolean inQuery = false; while (stringTokenizer.hasMoreTokens()) { final String token = stringTokenizer.nextToken(); if ("?".equals(token)) { inQuery = true; } else { if (!inQuery) { urlComponents.add(token); } else { urlComponents.add("?" + token); } variableIndices.add(Collections.<Integer>emptyList()); } } } endOfPart = matcher.end(0); // add current match as part final String variablePart = template.substring(startOfPart, endOfPart); urlComponents.add(variablePart); // collect variablesInPart and track for each part which variables it contains // group(1) is the variable head without the leading { TemplateVariable.VariableType type = TemplateVariable.VariableType.from(matcher.group(1)); // group(2) are the variable names String[] names = matcher.group(2).split(","); List<Integer> variablesInPart = new ArrayList<Integer>(); for (String name : names) { TemplateVariable variable = new TemplateVariable(name, type); variablesInPart.add(variables.size()); variables.add(variable); variableNames.add(name); } variableIndices.add(variablesInPart); } // finish off remaining part if (endOfPart < template.length()) { urlComponents.add(template.substring(endOfPart)); variableIndices.add(Collections.<Integer>emptyList()); } }
From source file:com.nttec.everychan.ui.downloading.HtmlBuilder.java
private String fixComment(String comment) { comment = comment.replaceAll("(?i)<aibquote>", "<span class=\"unkfunc\">") .replaceAll("(?i)</aibquote>", "</span>").replaceAll("(?i)<aibspoiler>", "<span class=\"spoiler\">") .replaceAll("(?i)</aibspoiler>", "</span>"); Matcher m = A_HREF_PATTERN.matcher(comment); if (!m.find()) return comment; StringBuffer sb = new StringBuffer(); do {//from ww w . j a v a 2 s. c om String group = m.group(); String found = m.group(1); int oldPos = m.start(1) - m.start(); int oldLen = found.length(); String url; if (found.startsWith("#")) { try { String thisThreadUrl = chan.buildUrl(pageModel); int i = thisThreadUrl.indexOf('#'); if (i != -1) thisThreadUrl = thisThreadUrl.substring(0, i); String postNumber = chan.parseUrl(thisThreadUrl + found).postNumber; url = "#" + postNumber != null ? postNumber : pageModel.threadNumber; } catch (Exception e) { url = found; } } else { url = chan.fixRelativeUrl(found); try { UrlPageModel linkModel = chan.parseUrl(url); if (ChanModels.hashUrlPageModel(linkModel).equals(ChanModels.hashUrlPageModel(pageModel))) { url = "#" + linkModel.postNumber; } } catch (Exception e) { /* ignore */ } } m.appendReplacement(sb, url.equals(found) ? group : (group.substring(0, oldPos) + url + group.substring(oldPos + oldLen))); } while (m.find()); m.appendTail(sb); return sb.toString(); }