List of usage examples for java.util.regex Matcher start
public int start()
From source file:sapience.injectors.stax.inject.StringBasedStaxStreamInjector.java
/** * Helper method, takes an XML reference with local namespace definitions, and returns the namespaces as QNames * @param xml// w w w.j a v a 2 s . c om * @return */ private List<QName> extractNamespaces(String xml) { List<QName> res = new ArrayList<QName>(); Matcher nsMatcher = nsPattern.matcher(xml); while (nsMatcher.find()) { int start = nsMatcher.start(); int end = nsMatcher.end(); StringBuilder sbu = new StringBuilder(xml.substring(start, end)); String prefix = sbu.substring(sbu.indexOf(":") + 1, sbu.lastIndexOf("=")); String uri = sbu.substring(sbu.indexOf("\"") + 1, sbu.lastIndexOf("\"")); res.add(new QName(uri, prefix)); } return res; }
From source file:gate.tagger.tagme.TaggerTagMeWS.java
protected void annotateText(Document doc, AnnotationSet outputAS, long from, long to) { String text = ""; try {/* w ww . j ava 2 s. c o m*/ text = doc.getContent().getContent(from, to).toString(); } catch (InvalidOffsetException ex) { throw new GateRuntimeException("Unexpected offset exception, offsets are " + from + "/" + to); } // send the text to the service and get back the response //System.out.println("Annotating text: "+text); //System.out.println("Starting offset is "+from); // NOTE: there is a bug in the TagMe service which causes offset errors // if we use the tweet mode and there are certain patterns in the tweet. // The approach recommended by Francesco Piccinno is to replace those // patterns by spaces. if (getIsTweet()) { logger.debug("Text before cleaning: >>" + text + "<<"); // replace text = text.replaceAll(patternStringRT3, " "); text = text.replaceAll(patternStringRT2, " "); text = text.replaceAll(patternHashTag, " $1"); // now replace the remaining patterns by spaces StringBuilder sb = new StringBuilder(text); Matcher m = patternUrl.matcher(text); while (m.find()) { int start = m.start(); int end = m.end(); sb.replace(start, end, nSpaces(end - start)); } m = patternUser.matcher(text); while (m.find()) { int start = m.start(); int end = m.end(); sb.replace(start, end, nSpaces(end - start)); } text = sb.toString(); logger.debug("Text after cleaning: >>" + text + "<<"); } TagMeAnnotation[] tagmeAnnotations = getTagMeAnnotations(text); for (TagMeAnnotation tagmeAnn : tagmeAnnotations) { if (tagmeAnn.rho >= minrho) { FeatureMap fm = Factory.newFeatureMap(); fm.put("tagMeId", tagmeAnn.id); fm.put("title", tagmeAnn.title); fm.put("rho", tagmeAnn.rho); fm.put("spot", tagmeAnn.spot); fm.put("link_probability", tagmeAnn.link_probability); if (tagmeAnn.title == null) { throw new GateRuntimeException("Odd: got a null title from the TagMe service" + tagmeAnn); } else { fm.put("inst", "http://dbpedia.org/resource/" + recodeForDbp38(tagmeAnn.title)); } try { gate.Utils.addAnn(outputAS, from + tagmeAnn.start, from + tagmeAnn.end, getOutputAnnotationType(), fm); } catch (Exception ex) { System.err.println( "Got an exception in document " + doc.getName() + ": " + ex.getLocalizedMessage()); ex.printStackTrace(System.err); System.err.println("from=" + from + ", to=" + to + " TagMeAnn=" + tagmeAnn); } } } }
From source file:gtu.youtube.JavaYoutubeVideoUrlHandler.java
private String getParamStr(String baseUrlString, int start, String endPattern) { String tmpUrl = StringUtils.substring(baseUrlString, start); Pattern ptn = Pattern.compile(endPattern); Matcher mth = ptn.matcher(tmpUrl); int pos = -1; if (mth.find()) { pos = mth.start(); } else {// w w w . j a v a 2s . c o m pos = baseUrlString.length(); } String paramStr = StringUtils.substring(tmpUrl, 0, pos); return paramStr; }
From source file:com.haulmont.cuba.core.global.QueryTransformerRegex.java
@Override public void addWhereAsIs(String where) { Matcher entityMatcher = FROM_ENTITY_PATTERN.matcher(buffer); findAlias(entityMatcher);/*from ww w . java 2s . co m*/ int insertPos = buffer.length(); Matcher lastClauseMatcher = LAST_CLAUSE_PATTERN.matcher(buffer); if (lastClauseMatcher.find(entityMatcher.end())) insertPos = lastClauseMatcher.start() - 1; StringBuilder sb = new StringBuilder(); Matcher whereMatcher = WHERE_PATTERN.matcher(buffer); if (whereMatcher.find(entityMatcher.end())) sb.append(" and "); else sb.append(" where "); sb.append("(").append(where).append(")"); buffer.insert(insertPos, sb); Matcher paramMatcher = PARAM_PATTERN.matcher(where); while (paramMatcher.find()) { addedParams.add(paramMatcher.group(1)); } }
From source file:com.jkoolcloud.tnt4j.streams.custom.parsers.ApacheAccessLogParser.java
/** * Makes log entry parsing RegEx from defined Apache access log configuration pattern string. * * @param apacheLogPattern//from w ww. ja v a 2s .c o m * Apache access log configuration pattern string * * @return regular expression string, or {@code null} if can't make RegEx string from defined Apache access log * configuration pattern string */ private String makeRegexPattern(String apacheLogPattern) { Pattern pattern = Pattern.compile(APACHE_LOG_CONFIG_TOKEN_REPLACEMENT_REGEX); Matcher matcher = pattern.matcher(apacheLogPattern); StringBuilder logRegexBuff = new StringBuilder(); int pos = 0; while (matcher.find()) { logRegexBuff.append(apacheLogPattern.substring(pos, matcher.start())); logRegexBuff.append(mapConfigTokenToRegex(matcher.group())); pos = matcher.end(); } if (pos < apacheLogPattern.length()) { logRegexBuff.append(apacheLogPattern.substring(pos, apacheLogPattern.length())); } String logRegex = logRegexBuff.toString().trim(); // return logRegex.isEmpty() ? null : "(?m)^" + logRegex; return logRegex.isEmpty() ? null : '^' + logRegex; // NON-NLS }
From source file:br.msf.commons.util.CharSequenceUtils.java
@SuppressWarnings("unchecked") public static List<MatchEntry> findPattern(final Pattern pattern, final CharSequence sequence) { if (pattern == null || isEmptyOrNull(sequence)) { return CollectionUtils.EMPTY_LIST; }//from www . jav a 2 s. c o m final Matcher matcher = pattern.matcher(sequence); final List<MatchEntry> occurrences = new ArrayList<MatchEntry>(); while (matcher.find()) { occurrences.add(new MatchEntry(matcher.start(), matcher.end())); } return occurrences; }
From source file:com.intuit.tank.tools.debugger.FindReplaceDialog.java
private void find(RSyntaxTextArea textArea) { try {//from ww w .j a va 2s. c o m int offset = currentLine < textArea.getLineCount() ? textArea.getLineStartOffset(currentLine) : 0; String searchTerm = tfSearchEditor.getText(); String text = textArea.getText(); int foundIndex = -1; int flags = (checkboxRegexp.isSelected() ? 0 : Pattern.LITERAL) | (checkboxMatchCase.isSelected() ? 0 : Pattern.CASE_INSENSITIVE); Pattern p = Pattern.compile(searchTerm, flags); Matcher matcher = p.matcher(text); matcher.region(offset, text.length()); if (matcher.find()) { foundIndex = matcher.start(); } else if (checkboxWrap.isSelected() && offset > 0) { matcher.region(0, offset); if (matcher.find()) { foundIndex = matcher.start(); } } if (foundIndex != -1) { int lineOfOffset = textArea.getLineOfOffset(foundIndex); // textArea.setActiveLineRange(lineOfOffset, lineOfOffset); textArea.setCurrentLine(lineOfOffset); // textArea.setCaretPosition(foundIndex + searchTerm.length()); parent.repaint(); parent.fireStepChanged(lineOfOffset); currentLine = lineOfOffset + 1; } else { JOptionPane.showMessageDialog(parent, "Search String not found."); } } catch (Exception e) { e.printStackTrace(); } }
From source file:com.haulmont.cuba.core.global.QueryTransformerRegex.java
@Override public void addWhere(String where) { Matcher entityMatcher = FROM_ENTITY_PATTERN.matcher(buffer); String alias = findAlias(entityMatcher); int insertPos = buffer.length(); Matcher lastClauseMatcher = LAST_CLAUSE_PATTERN.matcher(buffer); if (lastClauseMatcher.find(entityMatcher.end())) insertPos = lastClauseMatcher.start() - 1; StringBuilder sb = new StringBuilder(); Matcher whereMatcher = WHERE_PATTERN.matcher(buffer); int whereEnd = -1; boolean needOpenBracket = false; if (whereMatcher.find(entityMatcher.end())) { whereEnd = whereMatcher.end();/* w w w . j ava2s . c o m*/ Matcher orMatcher = OR_PATTERN.matcher(buffer); orMatcher.region(whereEnd + 1, insertPos); if (orMatcher.find()) { // surround with brackets if there is OR inside WHERE sb.append(")"); needOpenBracket = true; } sb.append(" and "); } else { sb.append(" where "); } sb.append("(").append(where); int idx; while ((idx = sb.indexOf(ALIAS_PLACEHOLDER)) >= 0) { sb.replace(idx, idx + ALIAS_PLACEHOLDER.length(), alias); } sb.append(")"); if (needOpenBracket) { buffer.insert(whereEnd + 1, "("); insertPos++; } buffer.insert(insertPos, sb); Matcher paramMatcher = PARAM_PATTERN.matcher(where); while (paramMatcher.find()) { addedParams.add(paramMatcher.group(1)); } }
From source file:com.google.acre.appengine.script.AppEngineAsyncUrlfetch.java
private Scriptable callback_result(AsyncRequest req, HTTPResponse res) { long waiting_time = System.currentTimeMillis() - req.start_time; URL furl = res.getFinalUrl(); if (furl == null) { furl = req.url;// ww w . j av a2 s .c o m } BrowserCompatSpecFactory bcsf = new BrowserCompatSpecFactory(); CookieSpec cspec = bcsf.newInstance(null); String protocol = furl.getProtocol(); boolean issecure = ("https".equals(protocol)); int port = furl.getPort(); if (port == -1) port = 80; CookieOrigin origin = new CookieOrigin(furl.getHost(), port, furl.getPath(), issecure); Context ctx = Context.getCurrentContext(); Scriptable out = ctx.newObject(_scope); Scriptable headers = ctx.newObject(_scope); Scriptable cookies = ctx.newObject(_scope); out.put("status", out, res.getResponseCode()); String response_body = null; try { response_body = new String(res.getContent(), getResponseEncoding(res)); out.put("body", out, response_body); } catch (java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); } out.put("headers", out, headers); out.put("cookies", out, cookies); StringBuilder response_header_log = new StringBuilder(); for (HTTPHeader h : res.getHeaders()) { if (h.getName().equalsIgnoreCase("set-cookie")) { String set_cookie = h.getValue(); Matcher m = Pattern.compile("\\s*(([^,]|(,\\s*\\d))+)").matcher(set_cookie); while (m.find()) { Header ch = new BasicHeader("Set-Cookie", set_cookie.substring(m.start(), m.end())); try { List<Cookie> pcookies = cspec.parse(ch, origin); for (Cookie c : pcookies) { cookies.put(c.getName(), cookies, new AcreCookie(c).toJsObject(_scope)); } } catch (MalformedCookieException e) { // we've occasionally choked on cookie-set, // e.g. www.google.com returning expires=; expires=Mon, 01-Jan-1990 00:00:00 GMT; // no solution but at least log exactly what's happening. String cookiestring = ch.toString(); _logger.warn("urlfetch.response.async", "exception thrown on bad cookie " + cookiestring); throw new RuntimeException(e); } } } headers.put(h.getName(), headers, h.getValue()); response_header_log.append(h.getName() + ": " + h.getValue() + ", "); } boolean system = req.system; boolean log_to_user = req.log_to_user; String log_body = new String(); if (res.getResponseCode() != 200 && response_body != null) { log_body = response_body; } _logger.syslog4j("INFO", "urlfetch.response.async", "URL", furl.toString(), "Status", Integer.toString(res.getResponseCode()), "Headers", response_header_log, "Body", log_body); if (system && log_to_user) { _response.userlog4j("INFO", "urlfetch.response.async", "URL", furl.toString(), "Status", Integer.toString(res.getResponseCode()), "Headers", response_header_log); } _costCollector.collect((system) ? "asuc" : "auuc").collect((system) ? "asuw" : "auuw", waiting_time); return out; }
From source file:mobi.jenkinsci.server.core.net.ProxyUtil.java
private String resolveJs(final String userAgent, final String pluginName, final String url, final String resultString) throws IOException { log.debug("Resolving JavaScript for URL " + url); final StringBuilder outString = new StringBuilder(); int currPos = 0; final Pattern linkPattern = Pattern.compile( "<script>?[^>]*src=[\"\\']([^>\"\\']*)[\"\\']([^>]*/>|[^>]*>[ \r\n]*</script>)", Pattern.DOTALL | Pattern.CASE_INSENSITIVE); final Matcher matcher = linkPattern.matcher(resultString); while (matcher.find(currPos)) { final int start = matcher.start(); final int end = matcher.end(); outString.append(resultString.substring(currPos, start)); final String cssUrl = matcher.group(1); final String jsText = retrieveJs(userAgent, pluginName, url, cssUrl); outString.append(jsText);/*from w ww.j a v a2 s.c o m*/ currPos = end; } outString.append(resultString.substring(currPos)); log.debug(outString.length() + " JavaScript chars included for URL " + url); return outString.toString(); }