List of usage examples for java.util.regex Matcher start
public int start(String name)
From source file:com.cyberway.issue.crawler.extractor.ExtractorJS.java
public static long considerStrings(CrawlURI curi, CharSequence cs, CrawlController controller, boolean handlingJSFile) { long foundLinks = 0; Matcher strings = TextUtils.getMatcher(JAVASCRIPT_STRING_EXTRACTOR, cs); while (strings.find()) { CharSequence subsequence = cs.subSequence(strings.start(2), strings.end(2)); Matcher uri = TextUtils.getMatcher(STRING_URI_DETECTOR, subsequence); if (uri.matches()) { String string = uri.group(); string = speculativeFixup(string, curi); foundLinks++;/*from w ww . j av a 2s .co m*/ try { if (handlingJSFile) { curi.createAndAddLinkRelativeToVia(string, Link.JS_MISC, Link.SPECULATIVE_HOP); } else { curi.createAndAddLinkRelativeToBase(string, Link.JS_MISC, Link.SPECULATIVE_HOP); } } catch (URIException e) { // There may not be a controller (e.g. If we're being run // by the extractor tool). if (controller != null) { controller.logUriError(e, curi.getUURI(), string); } else { LOGGER.info(curi + ", " + string + ": " + e.getMessage()); } } } else if (subsequence.toString().startsWith("/")) { try { curi.createAndAddLinkRelativeToBase(subsequence.toString(), Link.JS_MISC, Link.NAVLINK_HOP); } catch (URIException e) { if (controller != null) { controller.logUriError(e, curi.getUURI(), subsequence); } else { LOGGER.info(curi + ", " + subsequence + ": " + e.getMessage()); } } } else { foundLinks += considerStrings(curi, subsequence, controller, handlingJSFile); } TextUtils.recycleMatcher(uri); } TextUtils.recycleMatcher(strings); return foundLinks; }
From source file:de.pixida.logtest.logreaders.PatternMatchingsStripper.java
public static String strip(final Matcher matcher, final String value) { if (matcher.groupCount() == 0) { return value; }//from w w w . j a v a 2 s. c om // Remove optional matches which were empty and filter nested matches final List<Pair<Integer, Integer>> realMatches = new ArrayList<>(); int lastMostOuterMatchEnd = -1; for (int i = 1; i <= matcher.groupCount(); i++) { if (matcher.start(i) != -1) { if (matcher.end(i) <= lastMostOuterMatchEnd) { continue; } lastMostOuterMatchEnd = matcher.end(i); realMatches.add(Pair.of(matcher.start(i), matcher.end(i))); } } if (realMatches.isEmpty()) { return value; } // Removal final StringBuilder sb = new StringBuilder(value.substring(0, realMatches.get(0).getLeft())); for (int i = 0; i < realMatches.size(); i++) { // Assumption: Substring before start of match was already appended if (i + 1 < realMatches.size()) { sb.append(value.substring(realMatches.get(i).getRight(), realMatches.get(i + 1).getLeft())); } else { sb.append(value.substring(realMatches.get(i).getRight())); } } return sb.toString(); }
From source file:com.github.feribg.audiogetter.helpers.Utils.java
/** * Extract a list of urls from a string//w w w. j a va 2 s .co m * Retrieved from: http://www.java-tutorial.ch/core-java-tutorial/extract-urls-using-java-regular-expressions * * @param value * @return */ public static List<String> extractUrls(String value) { List<String> result = new ArrayList<String>(); String urlPattern = "((https?):((//)|(\\\\))+[\\w\\d:#@%/;$()~_?\\+-=\\\\\\.&]*)"; Pattern p = Pattern.compile(urlPattern, Pattern.CASE_INSENSITIVE); Matcher m = p.matcher(value); while (m.find()) { result.add(value.substring(m.start(0), m.end(0))); } return result; }
From source file:de.knowwe.visualization.util.Utils.java
public static String prepareLabel(String string) { // if (true) return string; String lb = LINE_BREAK;/*from w w w. j a v a 2 s. c o m*/ int length = string.length(); if (length < 13) return clean(string, lb); // find possible line break positions Set<Integer> possibleLBs = new TreeSet<>(); // possible line breaks are before the following chars: // _ >= <= = . ( [ and white spaces Matcher m = Pattern.compile("_|>=|<=|=|\\.|\\([^\\)]{1}|\\[[^\\]]{1}").matcher(string); while (m.find()) { possibleLBs.add(m.start(0)); } // line breaks at whitespace only if they are not in range of = or > or // < m = Pattern.compile("(?<=[^=<>]){3}( )(?=[^=<>]{3})").matcher(string); while (m.find()) { possibleLBs.add(m.start(1)); } if (possibleLBs.isEmpty()) return clean(string, lb); // add the line breaks were it makes sense List<Integer> desiredLBs = new LinkedList<>(); Set<Integer> addedLBs = new TreeSet<>(); // optimal length is determined by the length of the given String double optimalLength = (double) length / Math.sqrt(length / 5); for (int i = 1; i < string.length() / optimalLength; i++) { // having the line breaks on these position would be optimal desiredLBs.add((int) Math.round(i * optimalLength)); } //todo: remove creation of trailing linebreaks // try to find those possible line breaks that closest to the optimal // line breaks int d = 0; for (Integer desLB : desiredLBs) { int bestCandiadate = 0; // to avoid breaks for only a few chars at the end, we make // extra efforts for the last line break // we get the line break that produces the smallest variance // we should actually calculate the best break via variance for // all line breaks, but that seems rather complex and not yet // justified right now, since the current simple algorithm // already produces nice results if (d == desiredLBs.size() - 1) { double bestVar = Double.MAX_VALUE; for (Integer posLB : possibleLBs) { Set<Integer> temp = new TreeSet<>(addedLBs); temp.add(posLB); TreeSet<Integer> varianceCheck = new TreeSet<>(temp); varianceCheck.add(length); double variance = getVariance(varianceCheck); if (variance <= bestVar) { bestVar = variance; bestCandiadate = posLB; } } } // for all other breakpoints, just get the one closest to the // desired position else { for (Integer posLB : possibleLBs) { if (Math.abs(desLB - posLB) <= Math.abs(desLB - bestCandiadate)) { bestCandiadate = posLB; } } } if (bestCandiadate != 0 && bestCandiadate != length) { addedLBs.add(bestCandiadate); } d++; } // but in the line breaks StringBuilder labelBuilder = new StringBuilder(); List<String> split = new ArrayList<>(addedLBs.size() + 1); int last = 0; for (Integer addedLB : addedLBs) { split.add(string.substring(last, addedLB)); last = addedLB; } split.add(string.substring(last, string.length())); for (String s : split) { // clean the substrings labelBuilder.append(clean(s.trim(), lb)).append(lb); } String label = labelBuilder.toString(); return label; }
From source file:de.micromata.genome.gwiki.page.search.expr.SearchUtils.java
@Deprecated public static String sampleToHtmlNew(String text, List<String> words) { String ap = Pattern.quote("<!--KW:XXX-->"); String ep = Pattern.quote("<!--KW-->"); // TODO gwiki geht nicht mit umlauten, da words normalisiert sind. // StringBuilder sb = new StringBuilder(); for (String w : words) { String nw = NormalizeUtils.normalize(w); String app = StringUtils.replace(ap, "XXX", nw); String reg = app + "(.+?)" + ep; Pattern p = Pattern.compile(reg); Matcher m = p.matcher(text); while (m.find() == true) { int start = m.start(1); int end = m.end(1); String t = m.group(1); text = text.substring(0, start) + "<b><strong><big>" + t + "</big></strong></b>" + text.substring(end); }//from w w w.j a v a2 s . co m // text = StringUtils.replace(text, w, "<b><strong><big>" + w + "</big></strong></b>"); } return text; }
From source file:com.github.gekoh.yagen.util.FieldInfo.java
private static String addNamePrefixToAttributeOverride(String annotation, String prefix) { Matcher matcher = ATTR_OVERR_NAME.matcher(annotation); if (matcher.find()) { return annotation.substring(0, matcher.start(1)) + prefix + annotation.substring(matcher.start(1)); }//from ww w. java 2 s. co m throw new IllegalArgumentException("no AttributeOverride found in '" + annotation + "'"); }
From source file:com.ultrapower.eoms.common.plugin.ecside.tag.TagUtils.java
public final static String escapeExpression(String attribute,String escapeType){ String[] escapeRexs=(String[])escape_REX.get(escapeType); for (int i=0;i<escapeRexs.length;i++){ Pattern pattern = Pattern.compile(escapeRexs[i]); Matcher matcher = pattern.matcher(attribute); StringBuffer attributeBuffer=new StringBuffer(); int lastIndex=0; while (matcher.find()){ String pEsc=matcher.group(0); // String pEscType=matcher.group(1); String pEscValue=matcher.group(2); pEscValue=escapeString(pEscValue,escapeType); int start=matcher.start(0); attributeBuffer.append(attribute.substring(lastIndex, start)); attributeBuffer.append(pEscValue); lastIndex=start+pEsc.length(); }//from w w w.jav a 2s . c o m attributeBuffer.append(attribute.substring(lastIndex)); attribute=attributeBuffer.toString(); } return attribute; }
From source file:com.github.gekoh.yagen.util.FieldInfo.java
private static String formatAnnotation(Annotation annotation) { String a = annotation.toString(); StringBuilder result = new StringBuilder(); // wrap string value of attribute "name" into double quotes as needed for java code Matcher m = STRING_ATTR_PATTERN.matcher(a); int idx = 0;/*from w ww . ja v a 2 s. com*/ while (m.find(idx)) { result.append(a.substring(idx, m.start(2))); result.append("\"").append(escapeAttributeValue(m.group(2))).append("\""); result.append(a.substring(m.end(2), m.end())); idx = m.end(); } result.append(a.substring(idx)); a = result.toString(); result = new StringBuilder(); // remove empty attributes like (columnDefinition=) m = Pattern.compile("\\(?(,?\\s*[A-Za-z]*=)[,|\\)]").matcher(a); idx = 0; while (m.find(idx)) { result.append(a.substring(idx, m.start(1))); idx = m.end(1); } result.append(a.substring(idx)); // set nullable=true m = NULLABLE_PATTERN.matcher(result); idx = 0; while (m.find(idx)) { if (m.group(1).equals("false")) { result.replace(m.start(1), m.end(1), "true"); } idx = m.start(1) + 1; m = NULLABLE_PATTERN.matcher(result); } // set unique=false m = UNIQUE_PATTERN.matcher(result); idx = 0; while (m.find(idx)) { if (m.group(1).equals("true")) { result.replace(m.start(1), m.end(1), "false"); } idx = m.start(1) + 1; m = UNIQUE_PATTERN.matcher(result); } return result.toString().replaceAll("=\\[([^\\]]*)\\]", "={$1}"); }
From source file:com.googlecode.jtiger.modules.ecside.tag.TagUtils.java
public final static String escapeExpression(String attribute, String escapeType) { String[] escapeRexs = (String[]) escape_REX.get(escapeType); for (int i = 0; i < escapeRexs.length; i++) { Pattern pattern = Pattern.compile(escapeRexs[i]); Matcher matcher = pattern.matcher(attribute); StringBuffer attributeBuffer = new StringBuffer(); int lastIndex = 0; while (matcher.find()) { String pEsc = matcher.group(0); // String pEscType=matcher.group(1); String pEscValue = matcher.group(2); pEscValue = escapeString(pEscValue, escapeType); int start = matcher.start(0); attributeBuffer.append(attribute.substring(lastIndex, start)); attributeBuffer.append(pEscValue); lastIndex = start + pEsc.length(); }//from www . j a v a2 s . c o m attributeBuffer.append(attribute.substring(lastIndex)); attribute = attributeBuffer.toString(); } return attribute; }
From source file:de.unidue.inf.is.ezdl.gframedl.utils.HighlightingUtils.java
private static String highlight(String htmlString, boolean escape, List<String> highlightStrings, Matcher matcher, String foregroundColor, String backgroundColor) { if (htmlString != null) { if (highlightStrings.isEmpty() || highlightStrings.size() == 1 && highlightStrings.get(0).isEmpty()) { return htmlString; }/*from ww w .j a va2 s . co m*/ StringBuilder result = new StringBuilder(); int lastStop = 0; while (matcher.find()) { int start = matcher.start(1); int end = matcher.end(1); int i = htmlString.substring(0, start).lastIndexOf(">"); int j = htmlString.substring(0, start).lastIndexOf("<"); boolean r = i > j || (i == -1 && j == -1); if (r) { if (escape) { result.append(escape(htmlString.substring(lastStop, start))); } else { result.append(htmlString.substring(lastStop, start)); } result.append("<font bgcolor=\"" + backgroundColor + "\" color=\"" + foregroundColor + "\">"); if (escape) { result.append(escape(htmlString.substring(start, end))); } else { result.append(htmlString.substring(start, end)); } result.append("</font>"); lastStop = end; } } if (escape) { result.append(escape(htmlString.substring(lastStop, htmlString.length()))); } else { result.append(htmlString.substring(lastStop, htmlString.length())); } return result.toString(); } else { return ""; } }