List of usage examples for java.util.regex Matcher start
public int start()
From source file:com.thero.framework.util.AntPathStringMatcher.java
private Pattern createPattern(String pattern) { StringBuilder patternBuilder = new StringBuilder(); Matcher m = GLOB_PATTERN.matcher(pattern); int end = 0;// w ww . j av a2s . c o m while (m.find()) { patternBuilder.append(quote(pattern, end, m.start())); String match = m.group(); if ("?".equals(match)) { patternBuilder.append('.'); } else if ("*".equals(match)) { patternBuilder.append(".*"); } else if (match.startsWith("{") && match.endsWith("}")) { int colonIdx = match.indexOf(':'); if (colonIdx == -1) { patternBuilder.append(DEFAULT_VARIABLE_PATTERN); variableNames.add(m.group(1)); } else { String variablePattern = match.substring(colonIdx + 1, match.length() - 1); patternBuilder.append('('); patternBuilder.append(variablePattern); patternBuilder.append(')'); String variableName = match.substring(1, colonIdx); variableNames.add(variableName); } } end = m.end(); } patternBuilder.append(quote(pattern, end, pattern.length())); return Pattern.compile(patternBuilder.toString()); }
From source file:de.ist.clonto.webwiki.InfoboxParser.java
public List<Information> parse(String text) { String pagetext = replaceHTMLComments(text); List<Information> setlist = new ArrayList<Information>(); Pattern pattern = Pattern.compile("\\{\\{\\s*infobox", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE | Pattern.DOTALL); Matcher matcher = pattern.matcher(pagetext); while (matcher.find()) { int begin = matcher.start(); int bracketnr = 2; int end = begin + matcher.group().length(); while (end < pagetext.length()) { switch (pagetext.charAt(end)) { case '}': bracketnr--;//from www. j a va 2 s . co m break; case '{': bracketnr++; break; } if (bracketnr == 0) { break; } end++; } String infobox = pagetext.substring(begin, end); Information info = parseSet(infobox); setlist.add(info); } return setlist; }
From source file:io.wcm.tooling.netbeans.sightly.completion.classLookup.MemberLookupCompleter.java
@Override public int indexOfStartCharacter(char[] line) { //TODO: change to check if there is a data-sly-use before the class final String text = String.copyValueOf(line); final Matcher matcher = LAST_DOLLAR_CURLYBRACE.matcher(text); if (matcher.find()) { final int beginIndex = matcher.start(); final Matcher variableMatcher = LAST_VARIABLE.matcher(text); if (variableMatcher.find(beginIndex)) { return variableMatcher.start() - 1; }/*w w w . j a va 2 s . c o m*/ } return -1; }
From source file:alluxio.cli.fs.command.StatCommand.java
private String formatOutput(CommandLine cl, URIStatus status) { String format = cl.getOptionValue('f'); int formatLen = format.length(); StringBuilder output = new StringBuilder(); Matcher m = FORMAT_PATTERN.matcher(format); int i = 0;/*from ww w. ja v a2 s .c o m*/ while (i < formatLen && m.find(i)) { if (m.start() != i) { output.append(format.substring(i, m.start())); } output.append(getField(m, status)); i = m.end(); } if (i < formatLen) { output.append(format.substring(i)); } return output.toString(); }
From source file:edu.cmu.lti.f12.hw2.hw2_team01.passage.CorrectedPassageCandidateFinder.java
public List<PassageCandidate> extractPassages(String[] keyterms) { String[] paragraphs = text.split("<p>"); int start = 0, end = 0; List<PassageSpan> matchedSpans = new ArrayList<PassageSpan>(); List<PassageCandidate> passageList = new ArrayList<PassageCandidate>(); // System.out.println("starting paragraph spans"); for (String paragraph : paragraphs) { start = end + 3;/*w ww. j av a 2 s . co m*/ end = start + paragraph.length(); // System.out.println("cleaning text..."); String cleanText = Jsoup.parse(paragraph).text().replaceAll("([\177-\377\0-\32]*)", ""); int totalKeyterms = 0; for (String keyterm : keyterms) { // System.out.println("matching keyterms..."); Pattern p = Pattern.compile(keyterm); Matcher mClean = p.matcher(cleanText); while (mClean.find()) { PassageSpan match = new PassageSpan(mClean.start(), mClean.end()); matchedSpans.add(match); totalMatches++; } if (!matchedSpans.isEmpty()) { // matchingSpans.add(matchedSpans); totalKeyterms++; } try { BioPassageCandidate pc = new BioPassageCandidate(docId, start, end, null); pc.keytermMatches = totalKeyterms; pc.addSpans(matchedSpans); pc.setText(paragraph); pcMap.put(pc, totalKeyterms); // pc.setProbablity(getScore(pc,totalmatches)); passageList.add(pc); } catch (AnalysisEngineProcessException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } //System.out.println("ranking passages..."); // rank passage candidate for (PassageCandidate pc : passageList) { pc.setProbablity((float) getScore(pc, totalMatches)); } // System.out.println("ranking results"); Collections.sort(passageList, new PassageCandidateComparator()); // System.out.println("returning results"); return passageList; }
From source file:org.openmeetings.app.data.flvrecord.converter.FlvExplorerConverter.java
private FlvDimension getFlvDimension(String txt) throws Exception { Pattern p = Pattern.compile("\\d{2,4}(x)\\d{2,4}"); Matcher matcher = p.matcher(txt); while (matcher.find()) { String foundResolution = txt.substring(matcher.start(), matcher.end()); String[] resultions = foundResolution.split("x"); return new FlvDimension(Integer.valueOf(resultions[0]).intValue(), Integer.valueOf(resultions[1]).intValue()); }// w w w .java 2 s.c o m return null; }
From source file:com.norconex.importer.handler.transformer.impl.StripAfterTransformer.java
@Override protected void transformStringContent(String reference, StringBuilder content, ImporterMetadata metadata, boolean parsed, boolean partialContent) { if (stripAfterRegex == null) { LOG.error("No regular expression provided."); return;//from w w w .j av a 2 s .c om } int flags = Pattern.DOTALL | Pattern.UNICODE_CASE; if (!caseSensitive) { flags = flags | Pattern.CASE_INSENSITIVE; } Pattern pattern = Pattern.compile(stripAfterRegex, flags); Matcher match = pattern.matcher(content); if (match.find()) { if (inclusive) { content.delete(match.start(), content.length()); } else { content.delete(match.end(), content.length()); } } }
From source file:com.nuxeo.intranet.jenkins.web.JenkinsJobsActions.java
/** * Converts a job comment to HTML and parses JIRA issues to turn them into * links.// w ww. ja va 2s . co m * * @param jiraUrl TODO */ public String getConvertedJobComment(String toConvert, String jiraURL, String[] jiraProjects) { if (toConvert == null) { return null; } if (StringUtils.isBlank(jiraURL) || jiraProjects == null || jiraProjects.length == 0) { toConvert = toConvert.replace("\n", "<br />\n"); return toConvert; } String res = ""; String regexp = "\\b(" + StringUtils.join(jiraProjects, "|") + ")-\\d+\\b"; Pattern pattern = Pattern.compile(regexp, Pattern.CASE_INSENSITIVE); Matcher m = pattern.matcher(toConvert); int lastIndex = 0; boolean done = false; while (m.find()) { String jiraIssue = m.group(0); res += toConvert.substring(lastIndex, m.start()) + getJiraUrlTag(jiraURL, jiraIssue); lastIndex = m.end(); done = true; } if (done) { res += toConvert.substring(lastIndex); } else { res = toConvert; } res = res.replace("\n", "<br />\n"); return res; }
From source file:org.esigate.parser.Parser.java
/** * Parses all the CharSequence.//from www . j a va 2 s . c o m * * @param in * The CharSequence to parse * @param out * The Writable to write the result to * @throws IOException * @throws HttpErrorPage */ public void parse(CharSequence in, Appendable out) throws IOException, HttpErrorPage { ParserContextImpl ctx = new ParserContextImpl(out, httpRequest, httpResponse); Matcher matcher = pattern.matcher(in); int currentPosition = 0; while (matcher.find()) { String tag = matcher.group(); ctx.characters(in, currentPosition, matcher.start()); currentPosition = matcher.end(); if (ctx.isCurrentTagEnd(tag)) { // check if this is the end tag for current element LOG.info("Processing end tag {}", tag); ctx.endElement(tag); } else { // if not, it is an opening tag for a new element LOG.info("Processing start tag {}", tag); ElementType type = null; for (ElementType t : elementTypes) { if (t.isStartTag(tag)) { type = t; break; } } Element element = type.newInstance(); ctx.startElement(type, element, tag); if (type.isSelfClosing(tag)) { ctx.endElement(tag); } } } // we reached the end of input ctx.characters(in, currentPosition, in.length()); }
From source file:info.novatec.testit.livingdoc.html.BulletListFilter.java
@Override public String process(String content) { Matcher matcher = Pattern.compile(ELEMENT_BOUNDARY).matcher(content); StringBuilder sb = new StringBuilder(); int startIndex, matchIndex; for (startIndex = 0; matcher.find(); startIndex = matcher.end()) { matchIndex = matcher.start(); String preMatch = content.substring(startIndex, matchIndex); if (!StringUtils.isBlank(preMatch)) { sb.append(span(preMatch));/* w w w .java 2s . co m*/ } if (!StringUtils.isBlank(matcher.group(2))) { sb.append(matcher.group()); } } String postMatch = content.substring(startIndex, content.length()); if (!StringUtils.isBlank(postMatch)) { sb.append(span(postMatch)); } return sb.toString(); }