List of usage examples for java.util.regex Matcher start
public int start()
From source file:net.sf.logsaw.dialect.log4j.pattern.Log4JConversionPatternTranslator.java
@Override public List<ConversionRule> extractRules(String externalPattern) throws CoreException { // Find supported conversion characters Matcher m = EXTRACTION_PATTERN.matcher(externalPattern); List<ConversionRule> ret = new ArrayList<ConversionRule>(); while (m.find()) { String minWidthModifier = m.group(2); String maxWidthModifier = m.group(4); String conversionName = m.group(5); String conversionModifier = m.group(7); int minWidth = -1; // not specified if ((minWidthModifier != null) && (minWidthModifier.length() > 0)) { minWidth = Integer.parseInt(minWidthModifier); }/*from w ww . ja v a2 s. c o m*/ int maxWidth = -1; // not specified if ((maxWidthModifier != null) && (maxWidthModifier.length() > 0)) { maxWidth = Integer.parseInt(maxWidthModifier); } ConversionRule rule = new ConversionRule(); rule.setBeginIndex(m.start()); rule.setLength(m.end() - m.start()); rule.setMaxWidth(maxWidth); rule.setMinWidth(minWidth); rule.setPlaceholderName(conversionName); rule.setModifier(conversionModifier); if (conversionName.equals("n")) { rule.setLineBreak(true); } ret.add(rule); } return ret; }
From source file:com.twosigma.beaker.r.utils.RServerEvaluator.java
protected String fixSvgResults(String xml) { Pattern pat = Pattern.compile("<use xlink:href=\"#([^\"]+)\" x=\"([^\"]+)\" y=\"([^\"]+)\"/>"); xml = xml.replace("d=\"\"", ""); xml = xml.replace("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", ""); while (true) { Matcher matcher = pat.matcher(xml); if (!matcher.find()) { break; }// w w w. ja va 2 s . c o m String expansion = "<g transform=\"translate(" + matcher.group(2) + "," + matcher.group(3) + ")\">\n"; String glyph = matcher.group(1); int gi = xml.indexOf(glyph); int pathStart = xml.indexOf("<path", gi); int pathStop = xml.indexOf("/>", pathStart); String path = xml.substring(pathStart, pathStop + 2); expansion = expansion + path + "</g>\n"; xml = xml.substring(0, matcher.start()) + expansion + xml.substring(matcher.end()); } int defsStart = xml.indexOf("<defs>"); if (defsStart >= 0) { int defsStop = xml.indexOf("</defs>"); xml = xml.substring(0, defsStart) + xml.substring(defsStop + 7); } return xml; }
From source file:com.quancheng.saluki.core.grpc.router.internal.ConditionRouter.java
private Map<String, MatchPair> doParseRule(String rule) throws ParseException { Map<String, MatchPair> condition = new HashMap<String, MatchPair>(); if (StringUtils.isBlank(rule)) { return condition; }/*from w ww.j ava 2 s. c o m*/ // ???Key-Value MatchPair pair = null; // Value Set<String> values = null; final Matcher matcher = ROUTE_PATTERN.matcher(rule); while (matcher.find()) { // ?? String separator = matcher.group(1); String content = matcher.group(2); // ? if (separator == null || separator.length() == 0) { pair = new MatchPair(); condition.put(content, pair); } // KV else if ("&".equals(separator)) { if (condition.get(content) == null) { pair = new MatchPair(); condition.put(content, pair); } else { condition.put(content, pair); } } // KVValue else if ("=".equals(separator)) { if (pair == null) throw new ParseException("Illegal route rule \"" + rule + "\", The error char '" + separator + "' at index " + matcher.start() + " before \"" + content + "\".", matcher.start()); values = pair.matches; values.add(content); } // KVValue else if ("!=".equals(separator)) { if (pair == null) throw new ParseException("Illegal route rule \"" + rule + "\", The error char '" + separator + "' at index " + matcher.start() + " before \"" + content + "\".", matcher.start()); values = pair.mismatches; values.add(content); } // KVValue? else if (",".equals(separator)) { // ? if (values == null || values.size() == 0) throw new ParseException("Illegal route rule \"" + rule + "\", The error char '" + separator + "' at index " + matcher.start() + " before \"" + content + "\".", matcher.start()); values.add(content); } else { throw new ParseException("Illegal route rule \"" + rule + "\", The error char '" + separator + "' at index " + matcher.start() + " before \"" + content + "\".", matcher.start()); } } return condition; }
From source file:com.norconex.collector.http.url.impl.GenericLinkExtractor.java
private void extractLinks(String theContent, Referer referrer, Set<Link> links) { String content = theContent;//from w ww . j a va 2 s . co m // Get rid of <script> tags to eliminate possibly generated URLs. content = SCRIPT_PATTERN.matcher(content).replaceAll(""); //TODO eliminate URLs inside <!-- comments --> too? Matcher matcher = tagPattern.matcher(content); while (matcher.find()) { String tagName = matcher.group(1); String restOfTag = matcher.group(4); String attribs = tagAttribs.getString(tagName); //--- the body value of the tag is taken as URL --- if (StringUtils.isBlank(attribs)) { Pattern bodyPattern = getTagBodyPattern(tagName); Matcher bodyMatcher = bodyPattern.matcher(content); String url = null; if (bodyMatcher.find(matcher.start())) { url = bodyMatcher.group(1).trim(); url = toCleanAbsoluteURL(referrer, url); if (url == null) { continue; } Link link = new Link(url); if (keepReferrerData) { link.setReferrer(referrer.url); link.setTag(tagName); } links.add(link); } continue; } //--- a tag attribute has the URL --- String text = null; if (StringUtils.isBlank(restOfTag)) { continue; } if ("meta".equalsIgnoreCase(tagName)) { extractMetaRefresh(restOfTag, referrer, links); continue; } if ("a".equalsIgnoreCase(tagName)) { if (!ignoreNofollow && isNofollow(restOfTag)) { continue; } if (keepReferrerData) { Matcher textMatcher = A_TEXT_PATTERN.matcher(content); if (textMatcher.find(matcher.start())) { text = textMatcher.group(1).trim(); } } } Pattern p = Pattern.compile("(^|\\s)(" + attribs + ")\\s*=\\s*([\"'])([^\\<\\>]*?)\\3", PATTERN_FLAGS); Matcher urlMatcher = p.matcher(restOfTag); while (urlMatcher.find()) { String attribName = urlMatcher.group(2); String matchedUrl = urlMatcher.group(PATTERN_URL_GROUP); if (StringUtils.isBlank(matchedUrl)) { continue; } String[] urls = null; if ("object".equalsIgnoreCase(tagName)) { urls = StringUtils.split(matchedUrl, ' '); } else if ("applet".equalsIgnoreCase(tagName)) { urls = StringUtils.split(matchedUrl, ", "); } else { urls = new String[] { matchedUrl }; } for (String url : urls) { url = toCleanAbsoluteURL(referrer, url); if (url == null) { continue; } Link link = new Link(url); if (keepReferrerData) { link.setReferrer(referrer.url); link.setTag(tagName + "." + attribName); link.setText(text); } links.add(link); } } } }
From source file:com.ephesoft.dcma.tablefinder.share.DataTableService.java
/** * Sets column header span for a column. * //w ww . j av a 2 s .c o m * @param inputData {@link String} * @param columnHeaderPattern {@link String} * @param spanList {@link List}<{@link Span}> * @throws DCMAApplicationException {@link DCMAApplicationException} */ private final DataCarrier setColumnHeaderSpan(final String inputData, final String columnHeaderPattern, final List<Span> spanList) throws DCMAApplicationException { List<Span> bestMatchSpans = null; String value = null; float bestConfidence = 0; Coordinates bestMatchSpanCoordinates = null; if (EphesoftStringUtil.isNullOrEmpty(inputData)) { LOGGER.warn("Invalid input character sequence of line."); } else { if (EphesoftStringUtil.isNullOrEmpty(columnHeaderPattern)) { LOGGER.warn("Invalid input pattern sequence."); } else { // Compile and use regular expression final CharSequence inputStr = inputData; final Pattern pattern = Pattern.compile(columnHeaderPattern); final Matcher matcher = pattern.matcher(inputStr); float previousConfidence = 0; final List<Coordinates> coordinatesList = new ArrayList<Coordinates>(); while (matcher.find()) { // Get all groups for this match for (int i = 0; i <= matcher.groupCount(); i++) { final String groupString = matcher.group(i); if (groupString != null) { final int startIndex = matcher.start(); final int endIndex = startIndex + groupString.trim().length(); List<Span> matchedSpans = PatternMatcherUtil.getMatchedSpans(spanList, startIndex, endIndex); String headerValue = PatternMatcherUtil.getMatchedSpansValue(matchedSpans); if (!EphesoftStringUtil.isNullOrEmpty(headerValue)) { final float confidence = (groupString.length() * CommonConstants.DEFAULT_MAXIMUM_CONFIDENCE) / headerValue.length(); LOGGER.info("Matched Value : ", groupString, " ,Confidence : ", confidence); if (confidence > previousConfidence) { bestMatchSpans = matchedSpans; bestConfidence = confidence; value = headerValue; previousConfidence = confidence; coordinatesList.clear(); for (Span span : matchedSpans) { coordinatesList.add(span.getCoordinates()); } bestMatchSpanCoordinates = HocrUtil.getRectangleCoordinates(coordinatesList); } LOGGER.info(groupString); } } } } } } DataCarrier dataCarrier = new DataCarrier(bestMatchSpans, bestConfidence, value, bestMatchSpanCoordinates); return dataCarrier; }
From source file:com.hurence.logisland.processor.mailer.MailerProcessor.java
/** * This parses the HTML template to/*from ww w. ja v a2s . c o m*/ * - get the list of needed parameters (${xxx} parameters in the template, so get the foo, bar etc variables) * - create a usable template using the MessageFormat system (have strings with ${0}, ${1} instead of ${foo}, ${bar}) * @param htmlTemplate */ private void prepareTemplateAndParameters(String htmlTemplate) { Pattern pattern = Pattern.compile("\\$\\{(.+?)}"); // Detect ${...} sequences Matcher matcher = pattern.matcher(htmlTemplate); // To construct a new template with ${0}, ${1} fields .. StringBuilder buffer = new StringBuilder(); int previousStart = 0; int currentParameterIndex = 0; // Loop through the parameters in the template while (matcher.find()) { String parameter = matcher.group(1); String stringBeforeCurrentParam = htmlTemplate.substring(previousStart, matcher.start()); // Add string before parameter buffer.append(stringBeforeCurrentParam); // Replace parameter with parameter index buffer.append("{" + currentParameterIndex + "}"); // Save current parameter name in the list parameterNames.add(parameter); previousStart = matcher.end(); currentParameterIndex++; } // Add string after the last parameter String stringAfterLastParam = htmlTemplate.substring(previousStart); buffer.append(stringAfterLastParam); // Create the HTML form htmlForm = new MessageFormat(buffer.toString()); }
From source file:com.microsoft.gittf.core.tasks.CheckinHeadCommitTask.java
/** * Parses the comments of the commits to list the mentioned work items *//* w w w . java 2 s . c o m*/ private WorkItemCheckinInfo[] getWorkItems(final TaskProgressMonitor progressMonitor, final String commitComment, final boolean isLastCommit) throws Exception { List<WorkItemCheckinInfo> workItemsCheckinInfo = new ArrayList<WorkItemCheckinInfo>(); if (mentions) { final String REGEX = "(\\s|^)#\\d+(\\s|$)(#\\d+(\\s|$))*"; //$NON-NLS-1$ if (commitComment != null && commitComment.length() > 0) { final Pattern pattern = Pattern.compile(REGEX); // get a matcher object final Matcher patternMatcher = pattern.matcher(commitComment); while (patternMatcher.find()) { final String workItemIDREGEX = "#\\d+"; //$NON-NLS-1$ final Pattern workItemIDPattern = Pattern.compile(workItemIDREGEX); final String workItemIDString = commitComment.substring(patternMatcher.start(), patternMatcher.end()); final Matcher workItemIDMatcher = workItemIDPattern.matcher(workItemIDString); while (workItemIDMatcher.find()) { final WorkItem workitem = getWorkItem(progressMonitor, workItemIDString.substring(workItemIDMatcher.start(), workItemIDMatcher.end())); if (workitem != null) { final WorkItemCheckinInfo workItemCheckinInfo = new WorkItemCheckinInfo(workitem, CheckinWorkItemAction.ASSOCIATE); if (!workItemsCheckinInfo.contains(workItemCheckinInfo)) { workItemsCheckinInfo.add(workItemCheckinInfo); } } } } } } if (isLastCommit) { // If there were no work items in the comments if (workItemsCheckinInfo.isEmpty()) { return workItems; } for (final WorkItemCheckinInfo workItem : workItems) { if (!workItemsCheckinInfo.contains(workItem)) { workItemsCheckinInfo.add(workItem); } } } return workItemsCheckinInfo.toArray(new WorkItemCheckinInfo[workItemsCheckinInfo.size()]); }
From source file:de.geeksfactory.opacclient.apis.Pica.java
protected DetailledItem parse_result(String html) { Document doc = Jsoup.parse(html); doc.setBaseUri(opac_url);/* w w w . j a va 2s. c o m*/ DetailledItem result = new DetailledItem(); for (Element a : doc.select("a[href*=PPN")) { Map<String, String> hrefq = getQueryParamsFirst(a.absUrl("href")); String ppn = hrefq.get("PPN"); result.setId(ppn); break; } // GET COVER if (doc.select("td.preslabel:contains(ISBN) + td.presvalue").size() > 0) { Element isbnElement = doc.select("td.preslabel:contains(ISBN) + td.presvalue").first(); String isbn = ""; for (Node child : isbnElement.childNodes()) { if (child instanceof TextNode) { isbn = ((TextNode) child).text().trim(); break; } } result.setCover(ISBNTools.getAmazonCoverURL(isbn, true)); } // GET TITLE AND SUBTITLE String titleAndSubtitle; Element titleAndSubtitleElem = null; String titleRegex = ".*(Titel|Aufsatz|Zeitschrift|Gesamttitel" + "|Title|Article|Periodical|Collective\\stitle" + "|Titre|Article|P.riodique|Titre\\sg.n.ral).*"; String selector = "td.preslabel:matches(" + titleRegex + ") + td.presvalue"; if (doc.select(selector).size() > 0) { titleAndSubtitleElem = doc.select(selector).first(); titleAndSubtitle = titleAndSubtitleElem.text().trim(); int slashPosition = Math.min(titleAndSubtitle.indexOf("/"), titleAndSubtitle.indexOf(":")); String title; if (slashPosition > 0) { title = titleAndSubtitle.substring(0, slashPosition).trim(); String subtitle = titleAndSubtitle.substring(slashPosition + 1).trim(); result.addDetail(new Detail(stringProvider.getString(StringProvider.SUBTITLE), subtitle)); } else { title = titleAndSubtitle; } result.setTitle(title); } else { result.setTitle(""); } // Details int line = 0; Elements lines = doc.select("td.preslabel + td.presvalue"); if (titleAndSubtitleElem != null) { lines.remove(titleAndSubtitleElem); } for (Element element : lines) { Element titleElem = element.firstElementSibling(); String detail = ""; if (element.select("div").size() > 1 && element.select("div").text().equals(element.text())) { boolean first = true; for (Element div : element.select("div")) { if (!div.text().replace("\u00a0", " ").trim().equals("")) { if (!first) { detail += "\n" + div.text().replace("\u00a0", " ").trim(); } else { detail += div.text().replace("\u00a0", " ").trim(); first = false; } } } } else { detail = element.text().replace("\u00a0", " ").trim(); } String title = titleElem.text().replace("\u00a0", " ").trim(); if (element.select("hr").size() > 0) // after the separator we get the copies { break; } if (detail.length() == 0 && title.length() == 0) { line++; continue; } if (title.contains(":")) { title = title.substring(0, title.indexOf(":")); // remove colon } result.addDetail(new Detail(title, detail)); if (element.select("a").size() == 1 && !element.select("a").get(0).text().trim().equals("")) { String url = element.select("a").first().absUrl("href"); if (!url.startsWith(opac_url)) { result.addDetail(new Detail(stringProvider.getString(StringProvider.LINK), url)); } } line++; } line++; // next line after separator // Copies Copy copy = new Copy(); String location = ""; // reservation info will be stored as JSON JSONArray reservationInfo = new JSONArray(); while (line < lines.size()) { Element element = lines.get(line); if (element.select("hr").size() == 0) { Element titleElem = element.firstElementSibling(); String detail = element.text().trim(); String title = titleElem.text().replace("\u00a0", " ").trim(); if (detail.length() == 0 && title.length() == 0) { line++; continue; } if (title.contains("Standort") || title.contains("Vorhanden in") || title.contains("Location")) { location += detail; } else if (title.contains("Sonderstandort")) { location += " - " + detail; } else if (title.contains("Systemstelle") || title.contains("Subject")) { copy.setDepartment(detail); } else if (title.contains("Fachnummer") || title.contains("locationnumber")) { copy.setLocation(detail); } else if (title.contains("Signatur") || title.contains("Shelf mark")) { copy.setShelfmark(detail); } else if (title.contains("Anmerkung")) { location += " (" + detail + ")"; } else if (title.contains("Link")) { result.addDetail(new Detail(title.replace(":", "").trim(), detail)); } else if (title.contains("Status") || title.contains("Ausleihinfo") || title.contains("Ausleihstatus") || title.contains("Request info")) { // Find return date Pattern pattern = Pattern.compile("(till|bis) (\\d{2}-\\d{2}-\\d{4})"); Matcher matcher = pattern.matcher(detail); if (matcher.find()) { DateTimeFormatter fmt = DateTimeFormat.forPattern("dd-MM-yyyy").withLocale(Locale.GERMAN); try { copy.setStatus(detail.substring(0, matcher.start() - 1).trim()); copy.setReturnDate(fmt.parseLocalDate(matcher.group(2))); } catch (IllegalArgumentException e) { e.printStackTrace(); copy.setStatus(detail); } } else { copy.setStatus(detail); } // Get reservation info if (element.select("a:has(img[src*=inline_arrow])").size() > 0) { Element a = element.select("a:has(img[src*=inline_arrow])").first(); boolean multipleCopies = a.text().matches(".*(Exemplare|Volume list).*"); JSONObject reservation = new JSONObject(); try { reservation.put("multi", multipleCopies); reservation.put("link", _extract_url(a.absUrl("href"))); reservation.put("desc", location); reservationInfo.put(reservation); } catch (JSONException e1) { e1.printStackTrace(); } result.setReservable(true); } } } else { copy.setBranch(location); result.addCopy(copy); location = ""; copy = new Copy(); } line++; } if (copy.notEmpty()) { copy.setBranch(location); result.addCopy(copy); } if (reservationInfo.length() == 0) { // No reservation info found yet, because we didn't find any copies. // If there is a reservation link somewhere in the rows we interpreted // as details, we still want to use it. if (doc.select("td a:has(img[src*=inline_arrow])").size() > 0) { Element a = doc.select("td a:has(img[src*=inline_arrow])").first(); boolean multipleCopies = a.text().matches(".*(Exemplare|Volume list).*"); JSONObject reservation = new JSONObject(); try { reservation.put("multi", multipleCopies); reservation.put("link", _extract_url(a.attr("href"))); reservation.put("desc", location); reservationInfo.put(reservation); } catch (JSONException e1) { e1.printStackTrace(); } result.setReservable(true); } } result.setReservation_info(reservationInfo.toString()); // Volumes if (doc.select("a[href^=FAM?PPN=]").size() > 0) { String href = doc.select("a[href^=FAM?PPN=]").attr("href"); String ppn = getQueryParamsFirst(href).get("PPN"); Map<String, String> data = new HashMap<>(); data.put("ppn", ppn); result.setVolumesearch(data); } return result; }
From source file:com.healthmarketscience.jackcess.ImportUtil.java
/** * Splits the given line using the given delimiter pattern and quote * character. May read additional lines for quotes spanning newlines. */// w w w.j av a2 s . co m private static Object[] splitLine(String line, Pattern delim, char quote, BufferedReader in, int numColumns) throws IOException { List<String> tokens = new ArrayList<String>(); StringBuilder sb = new StringBuilder(); Matcher m = delim.matcher(line); int idx = 0; while (idx < line.length()) { if (line.charAt(idx) == quote) { // find quoted value sb.setLength(0); ++idx; while (true) { int endIdx = line.indexOf(quote, idx); if (endIdx >= 0) { sb.append(line, idx, endIdx); ++endIdx; if ((endIdx < line.length()) && (line.charAt(endIdx) == quote)) { // embedded quote sb.append(quote); // keep searching idx = endIdx + 1; } else { // done idx = endIdx; break; } } else { // line wrap sb.append(line, idx, line.length()); sb.append(LINE_SEPARATOR); idx = 0; line = in.readLine(); if (line == null) { throw new EOFException("Missing end of quoted value " + sb); } } } tokens.add(sb.toString()); // skip next delim idx = (m.find(idx) ? m.end() : line.length()); } else if (m.find(idx)) { // next unquoted value tokens.add(line.substring(idx, m.start())); idx = m.end(); } else { // trailing token tokens.add(line.substring(idx)); idx = line.length(); } } return tokens.toArray(new Object[Math.max(tokens.size(), numColumns)]); }
From source file:eu.semlibproject.annotationserver.restapis.APIHelper.java
/** * Prepare a SPARQL query to be executed on the internal SPARQL end-point * //from w ww. j a v a 2s.c om * @param query the SPARQL query * @param annotationsIDs the list of all annotations ID in a Notebooks * @return the prepared query */ public String prepareQueryForNotebooksSPARQLEndPoint(String query, List<String> annotationsIDs) { String froms = ""; String fromNameds = ""; for (String annID : annotationsIDs) { String annotationGraph = Annotation.getGraphURIFromID(annID); String itemGraph = Annotation.getItemsGraphURIFormID(annID); froms += "FROM <" + annotationGraph + "> FROM <" + itemGraph + "> "; fromNameds += "FROM NAMED <" + annotationGraph + "> FROM NAMED <" + itemGraph + "> "; } String finalFroms = " " + froms + fromNameds; // Remove any existing FROM and FROM NAMED from the original query int startIndex = -1; Pattern regExPattern = Pattern.compile("(FROM <.+>\\s*|FROM NAMED <.+>\\s*)+", Pattern.CASE_INSENSITIVE); Matcher matcher = regExPattern.matcher(query); if (matcher.find()) { startIndex = matcher.start(); String cleanQuery = matcher.replaceAll(""); StringBuilder finalQuery = new StringBuilder(cleanQuery); finalQuery.insert(startIndex, finalFroms); return finalQuery.toString(); } else { int indexOfWhere = query.toLowerCase().indexOf("where"); StringBuilder strBuilder = new StringBuilder(query); strBuilder.insert(indexOfWhere, finalFroms); return strBuilder.toString(); } }