List of usage examples for java.util.regex Matcher end
public int end(String name)
From source file:de.micromata.genome.gwiki.utils.StringUtils.java
public static String replace(String text, Pattern p, int group, CallableX1<String, String, RuntimeException> replacer) { if (isEmpty(text) == true) { return text; }/*w w w. java2s.com*/ StringBuilder sb = new StringBuilder(); Matcher m = p.matcher(text); int lastIdx = 0; while (m.find() == true) { int idx = m.start(group); int eidx = m.end(group); if (idx > lastIdx) { sb.append(text.substring(lastIdx, idx)); } sb.append(replacer.call(text.substring(idx, eidx))); lastIdx = eidx; } if (lastIdx < text.length()) { sb.append(text.substring(lastIdx, text.length())); } return sb.toString(); }
From source file:dk.netarkivet.harvester.harvesting.extractor.ExtractorJS.java
public static long considerStrings(CrawlURI curi, CharSequence cs, CrawlController controller, boolean handlingJSFile) { long foundLinks = 0; Matcher strings = TextUtils.getMatcher(JAVASCRIPT_STRING_EXTRACTOR, cs); while (strings.find()) { CharSequence subsequence = cs.subSequence(strings.start(2), strings.end(2)); if (UriUtils.isLikelyUriJavascriptContextLegacy(subsequence)) { String string = subsequence.toString(); string = StringEscapeUtils.unescapeJavaScript(string); string = UriUtils.speculativeFixup(string, curi.getUURI()); foundLinks++;/* ww w .ja v a 2 s . c o m*/ try { if (handlingJSFile) { curi.createAndAddLinkRelativeToVia(string, Link.JS_MISC, Link.SPECULATIVE_HOP); } else { curi.createAndAddLinkRelativeToBase(string, Link.JS_MISC, Link.SPECULATIVE_HOP); } } catch (URIException e) { // There may not be a controller (e.g. If we're being run // by the extractor tool). if (controller != null) { controller.logUriError(e, curi.getUURI(), string); } else { LOGGER.info(curi + ", " + string + ": " + e.getMessage()); } } } else { foundLinks += considerStrings(curi, subsequence, controller, handlingJSFile); } } TextUtils.recycleMatcher(strings); return foundLinks; }
From source file:com.smartitengineering.cms.api.common.MediaType.java
public static MediaType fromString(String mediaType) { if (StringUtils.isBlank(mediaType)) { return MediaType.WILDCARD; }//from ww w . ja v a2s . co m MediaType cacheResult = parseResult.get(mediaType); if (cacheResult != null) { return cacheResult; } Matcher matcher = MEDIA_TYPE_REGEX.matcher(mediaType); if (matcher.matches()) { String type = matcher.group(1); String subtype = matcher.group(2); final int endOfSubtype = matcher.end(2); Map<String, String> params = new HashMap<String, String>(); if (endOfSubtype < mediaType.length()) { String paramsStr = mediaType.substring(endOfSubtype); if (StringUtils.isNotBlank(paramsStr)) { String[] parameters = paramsStr.split(";"); for (String parameter : parameters) { final int indexOfEquals = parameter.indexOf('='); if (indexOfEquals >= 0) { params.put(parameter.substring(0, indexOfEquals), indexOfEquals < parameter.length() - 1 ? parameter.substring(indexOfEquals + 1) : ""); } } } } final MediaType result = new MediaType(type, subtype, params); parseResult.put(mediaType, result); return result; } else { return MediaType.WILDCARD; } }
From source file:com.cyberway.issue.crawler.extractor.ExtractorJS.java
public static long considerStrings(CrawlURI curi, CharSequence cs, CrawlController controller, boolean handlingJSFile) { long foundLinks = 0; Matcher strings = TextUtils.getMatcher(JAVASCRIPT_STRING_EXTRACTOR, cs); while (strings.find()) { CharSequence subsequence = cs.subSequence(strings.start(2), strings.end(2)); Matcher uri = TextUtils.getMatcher(STRING_URI_DETECTOR, subsequence); if (uri.matches()) { String string = uri.group(); string = speculativeFixup(string, curi); foundLinks++;/*w w w. j a v a2 s . c o m*/ try { if (handlingJSFile) { curi.createAndAddLinkRelativeToVia(string, Link.JS_MISC, Link.SPECULATIVE_HOP); } else { curi.createAndAddLinkRelativeToBase(string, Link.JS_MISC, Link.SPECULATIVE_HOP); } } catch (URIException e) { // There may not be a controller (e.g. If we're being run // by the extractor tool). if (controller != null) { controller.logUriError(e, curi.getUURI(), string); } else { LOGGER.info(curi + ", " + string + ": " + e.getMessage()); } } } else if (subsequence.toString().startsWith("/")) { try { curi.createAndAddLinkRelativeToBase(subsequence.toString(), Link.JS_MISC, Link.NAVLINK_HOP); } catch (URIException e) { if (controller != null) { controller.logUriError(e, curi.getUURI(), subsequence); } else { LOGGER.info(curi + ", " + subsequence + ": " + e.getMessage()); } } } else { foundLinks += considerStrings(curi, subsequence, controller, handlingJSFile); } TextUtils.recycleMatcher(uri); } TextUtils.recycleMatcher(strings); return foundLinks; }
From source file:com.github.feribg.audiogetter.helpers.Utils.java
/** * Extract a list of urls from a string/*from ww w.jav a 2 s.c om*/ * Retrieved from: http://www.java-tutorial.ch/core-java-tutorial/extract-urls-using-java-regular-expressions * * @param value * @return */ public static List<String> extractUrls(String value) { List<String> result = new ArrayList<String>(); String urlPattern = "((https?):((//)|(\\\\))+[\\w\\d:#@%/;$()~_?\\+-=\\\\\\.&]*)"; Pattern p = Pattern.compile(urlPattern, Pattern.CASE_INSENSITIVE); Matcher m = p.matcher(value); while (m.find()) { result.add(value.substring(m.start(0), m.end(0))); } return result; }
From source file:de.pixida.logtest.logreaders.PatternMatchingsStripper.java
public static String strip(final Matcher matcher, final String value) { if (matcher.groupCount() == 0) { return value; }//from w w w .j a v a 2 s. c om // Remove optional matches which were empty and filter nested matches final List<Pair<Integer, Integer>> realMatches = new ArrayList<>(); int lastMostOuterMatchEnd = -1; for (int i = 1; i <= matcher.groupCount(); i++) { if (matcher.start(i) != -1) { if (matcher.end(i) <= lastMostOuterMatchEnd) { continue; } lastMostOuterMatchEnd = matcher.end(i); realMatches.add(Pair.of(matcher.start(i), matcher.end(i))); } } if (realMatches.isEmpty()) { return value; } // Removal final StringBuilder sb = new StringBuilder(value.substring(0, realMatches.get(0).getLeft())); for (int i = 0; i < realMatches.size(); i++) { // Assumption: Substring before start of match was already appended if (i + 1 < realMatches.size()) { sb.append(value.substring(realMatches.get(i).getRight(), realMatches.get(i + 1).getLeft())); } else { sb.append(value.substring(realMatches.get(i).getRight())); } } return sb.toString(); }
From source file:de.micromata.genome.gwiki.page.search.expr.SearchUtils.java
@Deprecated public static String sampleToHtmlNew(String text, List<String> words) { String ap = Pattern.quote("<!--KW:XXX-->"); String ep = Pattern.quote("<!--KW-->"); // TODO gwiki geht nicht mit umlauten, da words normalisiert sind. // StringBuilder sb = new StringBuilder(); for (String w : words) { String nw = NormalizeUtils.normalize(w); String app = StringUtils.replace(ap, "XXX", nw); String reg = app + "(.+?)" + ep; Pattern p = Pattern.compile(reg); Matcher m = p.matcher(text); while (m.find() == true) { int start = m.start(1); int end = m.end(1); String t = m.group(1); text = text.substring(0, start) + "<b><strong><big>" + t + "</big></strong></b>" + text.substring(end); }//from w w w . j ava2 s . c o m // text = StringUtils.replace(text, w, "<b><strong><big>" + w + "</big></strong></b>"); } return text; }
From source file:de.unidue.inf.is.ezdl.gframedl.utils.HighlightingUtils.java
private static String highlight(String htmlString, boolean escape, List<String> highlightStrings, Matcher matcher, String foregroundColor, String backgroundColor) { if (htmlString != null) { if (highlightStrings.isEmpty() || highlightStrings.size() == 1 && highlightStrings.get(0).isEmpty()) { return htmlString; }/* w w w .j a va2 s . co m*/ StringBuilder result = new StringBuilder(); int lastStop = 0; while (matcher.find()) { int start = matcher.start(1); int end = matcher.end(1); int i = htmlString.substring(0, start).lastIndexOf(">"); int j = htmlString.substring(0, start).lastIndexOf("<"); boolean r = i > j || (i == -1 && j == -1); if (r) { if (escape) { result.append(escape(htmlString.substring(lastStop, start))); } else { result.append(htmlString.substring(lastStop, start)); } result.append("<font bgcolor=\"" + backgroundColor + "\" color=\"" + foregroundColor + "\">"); if (escape) { result.append(escape(htmlString.substring(start, end))); } else { result.append(htmlString.substring(start, end)); } result.append("</font>"); lastStop = end; } } if (escape) { result.append(escape(htmlString.substring(lastStop, htmlString.length()))); } else { result.append(htmlString.substring(lastStop, htmlString.length())); } return result.toString(); } else { return ""; } }
From source file:com.puppycrawl.tools.checkstyle.checks.javadoc.JavadocUtils.java
/** * Gets validTags from a given piece of Javadoc. * @param cmt//from w w w. ja v a 2 s . co m * the Javadoc comment to process. * @param tagType * the type of validTags we're interested in * @return all standalone validTags from the given javadoc. */ public static JavadocTags getJavadocTags(TextBlock cmt, JavadocTagType tagType) { final String[] text = cmt.getText(); final List<JavadocTag> tags = Lists.newArrayList(); final List<InvalidJavadocTag> invalidTags = Lists.newArrayList(); Pattern blockTagPattern = Pattern.compile("/\\*{2,}\\s*@(\\p{Alpha}+)\\s"); for (int i = 0; i < text.length; i++) { final String s = text[i]; final Matcher blockTagMatcher = blockTagPattern.matcher(s); if ((tagType == JavadocTagType.ALL || tagType == JavadocTagType.BLOCK) && blockTagMatcher.find()) { final String tagName = blockTagMatcher.group(1); String content = s.substring(blockTagMatcher.end(1)); if (content.endsWith("*/")) { content = content.substring(0, content.length() - 2); } final int line = cmt.getStartLineNo() + i; int col = blockTagMatcher.start(1) - 1; if (i == 0) { col += cmt.getStartColNo(); } if (JavadocTagInfo.isValidName(tagName)) { tags.add(new JavadocTag(line, col, tagName, content.trim())); } else { invalidTags.add(new InvalidJavadocTag(line, col, tagName)); } } // No block tag, so look for inline validTags else if (tagType == JavadocTagType.ALL || tagType == JavadocTagType.INLINE) { // Match Javadoc text after comment characters final Pattern commentPattern = Pattern.compile("^\\s*(?:/\\*{2,}|\\*+)\\s*(.*)"); final Matcher commentMatcher = commentPattern.matcher(s); final String commentContents; final int commentOffset; // offset including comment characters if (commentMatcher.find()) { commentContents = commentMatcher.group(1); commentOffset = commentMatcher.start(1) - 1; } else { commentContents = s; // No leading asterisks, still valid commentOffset = 0; } final Pattern tagPattern = Pattern.compile(".*?\\{@(\\p{Alpha}+)\\s+(.*?)\\}"); final Matcher tagMatcher = tagPattern.matcher(commentContents); while (tagMatcher.find()) { final String tagName = tagMatcher.group(1); final String tagValue = tagMatcher.group(2).trim(); final int line = cmt.getStartLineNo() + i; int col = commentOffset + tagMatcher.start(1) - 1; if (i == 0) { col += cmt.getStartColNo(); } if (JavadocTagInfo.isValidName(tagName)) { tags.add(new JavadocTag(line, col, tagName, tagValue)); } else { invalidTags.add(new InvalidJavadocTag(line, col, tagName)); } // else Error: Unexpected match count for inline Javadoc // tag! } } blockTagPattern = Pattern.compile("^\\s*\\**\\s*@(\\p{Alpha}+)\\s"); } return new JavadocTags(tags, invalidTags); }
From source file:com.puppycrawl.tools.checkstyle.utils.JavadocUtils.java
/** * Gets validTags from a given piece of Javadoc. * @param textBlock/*from w w w.j a v a2 s. c om*/ * the Javadoc comment to process. * @param tagType * the type of validTags we're interested in * @return all standalone validTags from the given javadoc. */ public static JavadocTags getJavadocTags(TextBlock textBlock, JavadocTagType tagType) { final String[] text = textBlock.getText(); final List<JavadocTag> tags = Lists.newArrayList(); final List<InvalidJavadocTag> invalidTags = Lists.newArrayList(); Pattern blockTagPattern = Pattern.compile("/\\*{2,}\\s*@(\\p{Alpha}+)\\s"); for (int i = 0; i < text.length; i++) { final String textValue = text[i]; final Matcher blockTagMatcher = blockTagPattern.matcher(textValue); if ((tagType == JavadocTagType.ALL || tagType == JavadocTagType.BLOCK) && blockTagMatcher.find()) { final String tagName = blockTagMatcher.group(1); String content = textValue.substring(blockTagMatcher.end(1)); if (content.endsWith("*/")) { content = content.substring(0, content.length() - 2); } final int line = textBlock.getStartLineNo() + i; int col = blockTagMatcher.start(1) - 1; if (i == 0) { col += textBlock.getStartColNo(); } if (JavadocTagInfo.isValidName(tagName)) { tags.add(new JavadocTag(line, col, tagName, content.trim())); } else { invalidTags.add(new InvalidJavadocTag(line, col, tagName)); } } // No block tag, so look for inline validTags else if (tagType == JavadocTagType.ALL || tagType == JavadocTagType.INLINE) { lookForInlineTags(textBlock, i, tags, invalidTags); } blockTagPattern = Pattern.compile("^\\s*\\**\\s*@(\\p{Alpha}+)\\s"); } return new JavadocTags(tags, invalidTags); }