List of usage examples for java.util.regex Matcher end
public int end()
From source file:com.streamsets.pipeline.lib.parser.log.ExtendedFormatParser.java
@Override public Map<String, Field> parseLogLine(StringBuilder logLine) throws DataParserException { Map<String, Field> map = new HashMap<>(); // Parse headers Matcher m = HEADER_PATTERN.matcher(logLine); int counter = 0; int index = 0; int headerCount = 1; while (counter < headerCount && m.find()) { String val = logLine.substring(index, m.start()); if (counter == 0) { Field formatVersion = getExtFormatVersion(val); map.put(formatType.label + "Version", formatVersion); headerCount = getNumHeaderFields(formatType, formatVersion); } else {//from w ww .j a v a2 s . co m map.put(getHeaderFieldName(counter), Field.create(val)); } index = m.end(); counter++; } if (counter < headerCount) { throw new DataParserException(Errors.LOG_PARSER_12, formatName, headerCount, counter); } // For LEEF 2.0, there is an optional field in the header, so we check for it, and // advance the index, if necessary, to get to the start of the extensions char attrSeparator = getExtensionAttrSeparator(m, index, logLine); if (!m.hitEnd()) { index = m.end(); } // Calls to trim() will strip off whitespace, but if format is LEEF 2.0 and a custom // delimiter is being used, we need to offset the start index of extension keys int offset = 0; if (!Character.isWhitespace(attrSeparator)) { offset = 1; } // Process extensions Map<String, Field> extMap = new HashMap<>(); Map<String, String> labelMap = new HashMap<>(); String ext = logLine.substring(index); m = EXT_PATTERN.matcher(ext); index = 0; String key = null; String value; while (m.find()) { if (key == null) { key = ext.substring(index, m.start()); index = m.end(); if (!m.find()) { break; } } // Regex will search for unescaped '=' character to find the split between keys // and values. We'll need to figure out where the separator is to determine the // end of the value, and then go back for the next KV pair value = ext.substring(index, m.start()); index = m.end(); int lastSepIndex = value.lastIndexOf(attrSeparator); if (lastSepIndex > 0) { String temp = value.substring(0, lastSepIndex).trim(); putLabelIntoAppropriateMap(labelMap, extMap, key, temp); key = value.substring(lastSepIndex + offset).trim(); } } value = ext.substring(index); // Build a map of Label extensions to apply later putLabelIntoAppropriateMap(labelMap, extMap, key, value); // Apply the labels to custom fields for (Map.Entry<String, String> label : labelMap.entrySet()) { if (extMap.containsKey(label.getKey())) { Field field = extMap.remove(label.getKey()); extMap.put(label.getValue(), field); } } map.put("extensions", Field.create(extMap)); return map; }
From source file:com.nttec.everychan.chans.cirno.MikubaReader.java
private String fixPostRefs(StringBuilder commentBuffer) { String comment = commentBuffer.toString(); commentBuffer.setLength(0);/* w w w . j a v a 2 s . c om*/ if (postsBuf == null || postsBuf.size() == 0) return comment; Matcher matcher = POST_REFERENCE.matcher(comment); if (!matcher.find()) return comment; String threadNum = postsBuf.get(0).number; int appendPos = 0; boolean replacements = false; do { String num = matcher.group(1); commentBuffer.append(comment, appendPos, matcher.start(1)); appendPos = matcher.end(); if (!num.equals(threadNum) && postsNumBuf.contains(num)) { replacements = true; commentBuffer.append(threadNum); } else { commentBuffer.append(num); } } while (matcher.find()); commentBuffer.append(comment, appendPos, comment.length()); if (replacements) comment = commentBuffer.toString(); commentBuffer.setLength(0); return comment; }
From source file:de.tudarmstadt.lt.lm.service.UimaStringProvider.java
public Iterator<String> applySplitHeuristic(final String text, int aggressiveness) { if (aggressiveness >= patterns.length) return new Iterator<String>() { int i = 0; @Override/*from w ww . ja va2 s.c om*/ public boolean hasNext() { return i < text.length(); } @Override public String next() { int next_i = Math.min(i + 1000, text.length()); String result = text.substring(i, next_i); i = next_i; return result; } @Override public void remove() { throw new UnsupportedOperationException("remove() is not supported"); } }; final Matcher m = patterns[aggressiveness].matcher(text); return new Iterator<String>() { int last = 0; boolean found = false; @Override public boolean hasNext() { found = m.find(); return found || last < text.length(); } @Override public String next() { int current; if (found) current = m.end() - 1; else current = text.length(); String res = text.substring(last, current); last = current; return res; } @Override public void remove() { throw new UnsupportedOperationException("remove() is not supported"); } }; }
From source file:hudson.Util.java
/** * Replaces the occurrence of '$key' by <tt>resolver.get('key')</tt>. * * <p>// w w w . ja va2s .c om * Unlike shell, undefined variables are left as-is (this behavior is the same as Ant.) */ public static String replaceMacro(String s, VariableResolver<String> resolver) { if (s == null) { return null; } int idx = 0; while (true) { Matcher m = VARIABLE.matcher(s); if (!m.find(idx)) { return s; } String key = m.group().substring(1); // escape the dollar sign or get the key to resolve String value; if (key.charAt(0) == '$') { value = "$"; } else { if (key.charAt(0) == '{') { key = key.substring(1, key.length() - 1); } value = resolver.resolve(key); } if (value == null) { idx = m.end(); // skip this } else { s = s.substring(0, m.start()) + value + s.substring(m.end()); idx = m.start() + value.length(); } } }
From source file:gate.creole.splitter.RegexSentenceSplitter.java
@Override public void execute() throws ExecutionException { interrupted = false;// ww w.jav a2 s. c o m int lastProgress = 0; fireProgressChanged(lastProgress); //get pointers to the annotation sets AnnotationSet outputAS = (outputASName == null || outputASName.trim().length() == 0) ? document.getAnnotations() : document.getAnnotations(outputASName); String docText = document.getContent().toString(); /* If the document's content is empty or contains only whitespace, * we drop out right here, since there's nothing to sentence-split. */ if (docText.trim().length() < 1) { return; } Matcher internalSplitMatcher = internalSplitsPattern.matcher(docText); Matcher externalSplitMatcher = externalSplitsPattern.matcher(docText); Matcher nonSplitMatcher = nonSplitsPattern.matcher(docText); //store all non split locations in a list of pairs List<int[]> nonSplits = new LinkedList<int[]>(); while (nonSplitMatcher.find()) { nonSplits.add(new int[] { nonSplitMatcher.start(), nonSplitMatcher.end() }); } //this lists holds the next matches at each step List<MatchResult> nextSplitMatches = new ArrayList<MatchResult>(); //initialise matching process MatchResult internalMatchResult = null; if (internalSplitMatcher.find()) { internalMatchResult = internalSplitMatcher.toMatchResult(); nextSplitMatches.add(internalMatchResult); } MatchResult externalMatchResult = null; if (externalSplitMatcher.find()) { externalMatchResult = externalSplitMatcher.toMatchResult(); nextSplitMatches.add(externalMatchResult); } MatchResultComparator comparator = new MatchResultComparator(); int lastSentenceEnd = 0; while (!nextSplitMatches.isEmpty()) { //see which one matches first Collections.sort(nextSplitMatches, comparator); MatchResult nextMatch = nextSplitMatches.remove(0); if (nextMatch == internalMatchResult) { //we have a new internal split; see if it's vetoed or not if (!veto(nextMatch, nonSplits)) { //split is not vetoed try { //add the split annotation FeatureMap features = Factory.newFeatureMap(); features.put("kind", "internal"); outputAS.add(new Long(nextMatch.start()), new Long(nextMatch.end()), "Split", features); //generate the sentence annotation int endOffset = nextMatch.end(); //find the first non whitespace character starting from where the //last sentence ended while (lastSentenceEnd < endOffset && Character.isWhitespace(Character.codePointAt(docText, lastSentenceEnd))) { lastSentenceEnd++; } //if there is any useful text between the two offsets, generate //a new sentence if (lastSentenceEnd < nextMatch.start()) { outputAS.add(new Long(lastSentenceEnd), new Long(endOffset), ANNIEConstants.SENTENCE_ANNOTATION_TYPE, Factory.newFeatureMap()); } //store the new sentence end lastSentenceEnd = endOffset; } catch (InvalidOffsetException e) { // this should never happen throw new ExecutionException(e); } } //prepare for next step if (internalSplitMatcher.find()) { internalMatchResult = internalSplitMatcher.toMatchResult(); nextSplitMatches.add(internalMatchResult); } else { internalMatchResult = null; } } else if (nextMatch == externalMatchResult) { //we have a new external split; see if it's vetoed or not if (!veto(nextMatch, nonSplits)) { //split is not vetoed try { //generate the split FeatureMap features = Factory.newFeatureMap(); features.put("kind", "external"); outputAS.add(new Long(nextMatch.start()), new Long(nextMatch.end()), "Split", features); //generate the sentence annotation //find the last non whitespace character, going backward from //where the external skip starts int endOffset = nextMatch.start(); while (endOffset > lastSentenceEnd && Character.isSpaceChar(Character.codePointAt(docText, endOffset - 1))) { endOffset--; } //find the first non whitespace character starting from where the //last sentence ended while (lastSentenceEnd < endOffset && Character.isSpaceChar(Character.codePointAt(docText, lastSentenceEnd))) { lastSentenceEnd++; } //if there is any useful text between the two offsets, generate //a new sentence if (lastSentenceEnd < endOffset) { outputAS.add(new Long(lastSentenceEnd), new Long(endOffset), ANNIEConstants.SENTENCE_ANNOTATION_TYPE, Factory.newFeatureMap()); } //store the new sentence end lastSentenceEnd = nextMatch.end(); } catch (InvalidOffsetException e) { // this should never happen throw new ExecutionException(e); } } //prepare for next step if (externalSplitMatcher.find()) { externalMatchResult = externalSplitMatcher.toMatchResult(); nextSplitMatches.add(externalMatchResult); } else { externalMatchResult = null; } } else { //malfunction throw new ExecutionException("Invalid state - cannot identify match!"); } //report progress int newProgress = 100 * lastSentenceEnd / docText.length(); if (newProgress - lastProgress > 20) { lastProgress = newProgress; fireProgressChanged(lastProgress); } } //while(!nextMatches.isEmpty()){ fireProcessFinished(); }
From source file:ninja.javafx.smartcsv.fx.validation.ValidationEditorController.java
private static StyleSpans<Collection<String>> computeHighlighting(String text) { Matcher matcher = PATTERN.matcher(text); int lastKwEnd = 0; StyleSpansBuilder<Collection<String>> spansBuilder = new StyleSpansBuilder<>(); while (matcher.find()) { String styleClass = matcher .group("KEYWORD") != null ? "keyword" : matcher.group("PAREN") != null ? "paren" : matcher.group("BRACE") != null ? "brace" : matcher.group("BRACKET") != null ? "bracket" : matcher.group("SEMICOLON") != null ? "semicolon" : matcher.group("STRING") != null ? "string" : matcher.group("STRING2") != null ? "string" : matcher.group("COMMENT") != null ? "comment" : null; /* never happens */ assert styleClass != null; spansBuilder.add(Collections.emptyList(), matcher.start() - lastKwEnd); spansBuilder.add(Collections.singleton(styleClass), matcher.end() - matcher.start()); lastKwEnd = matcher.end();//w w w .j ava 2 s .c o m } spansBuilder.add(Collections.emptyList(), text.length() - lastKwEnd); return spansBuilder.create(); }
From source file:com.g3net.tool.StringUtils.java
/** * ?????/*from w ww . jav a 2 s . com*/ * @param src ?? * @param regex ??? * @param ignoreCase ?? * @param endPos src??(regex)??1 * @return */ public static boolean startsWith(String src, String regex, boolean ignoreCase, TInteger endPos) { Pattern p = null; if (ignoreCase) { p = Pattern.compile("^" + regex, Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); } else { p = Pattern.compile("^" + regex, Pattern.MULTILINE); } Matcher m = p.matcher(src); while (m.find()) { // log.info(m.group()+":"+m.start()+":"+m.end()); endPos.setValue(m.end()); return true; } return false; }
From source file:sapience.injectors.stax.inject.ModelBasedStaxStreamInjector.java
/** * If the reference is a attribute (e.g. sawsdl:modelreference), we add it here (by creating * the according XMLEvent). The ref//from w w w . j a va2 s.co m * @param w * @param ref * @param se * @throws XMLStreamException */ private StartElement handleAttribute(XMLEventWriter w, Reference ref, StartElement se) throws XMLStreamException { /* we are having attributes which are in both, the reference and the current element. We only add * a new Attribute event, if it is not already contained in the Start Element * * Example: * reference <element ns:attr1="value" reference="http://example.com"> * element <element ns:attr1="value"> */ StringBuilder referenceString = new StringBuilder(ref.getTarget().toString()); Matcher matcher = findAttributeInReference.matcher(referenceString); List<Attribute> attributeList = new ArrayList<Attribute>(); // copy namespaces LocalNamespaceContext lnc = new LocalNamespaceContext((BaseNsContext) se.getNamespaceContext()); while (matcher.find()) { int start = matcher.start(); int end = matcher.end(); String key = null; String prefix = null; String value = null; // [ns:attr1, "value"] String[] l = referenceString.substring(start, end).split("="); if (l.length > 0) { // [ns, attr1] String[] n = l[0].split(":"); if (n.length == 2) { key = n[1]; prefix = n[0]; } else { key = n[0]; } if (l.length == 2) { value = l[1].substring(1, l[1].length() - 1); // remove "" } } // check if this is a namespace definition if ((prefix != null) && ("xmlns".contentEquals(prefix))) { lnc.put(key, value); } else { QName name = null; // create QName if (prefix != null) { name = new QName(null, key, prefix); } else { String namespaceURI = se.getNamespaceContext().getNamespaceURI(XMLConstants.DEFAULT_NS_PREFIX); name = new QName(namespaceURI, key); } if (name != null) { Attribute created = getXMLEventFactory().createAttribute(name, value); attributeList.add(created); } } } // remove redundant attribute from reference list Iterator<?> it = se.getAttributes(); while (it.hasNext()) { Attribute ae = (Attribute) it.next(); for (Attribute ar : attributeList) { if ((ar.getName().getLocalPart().contentEquals(ae.getName().getLocalPart())) && (ar.getValue().contentEquals(ae.getValue()))) { //System.out.println("Attribute removed! -> " + ar.getName() + "= " + ar.getValue()); attributeList.remove(ar); break; } } } // merge everything again Iterator<? extends Attribute> it2 = se.getAttributes(); while (it2.hasNext()) { attributeList.add(it2.next()); } // create a new element with the attribute set and return it return StartElementEventImpl.construct(se.getLocation(), se.getName(), attributeList.iterator(), lnc.getNamespaces().iterator(), lnc); }
From source file:com.jsmartframework.web.manager.ExpressionHandler.java
public Object getExpressionValue(Object expr) { if (expr != null) { String evalExpr = expr.toString(); Matcher matcher = EL_PATTERN.matcher(evalExpr); if (!matcher.find()) { return expr; }/* www. j a v a 2 s .c om*/ boolean hasMoreGroup = false; StringBuffer exprBuffer = new StringBuffer(); Object result = evaluateExpression(evalExpr.substring(matcher.start() + 2, matcher.end() - 1)); matcher.appendReplacement(exprBuffer, result != null ? Matcher.quoteReplacement(result.toString()) : "null"); while (matcher.find()) { hasMoreGroup = true; Object object = evaluateExpression(evalExpr.substring(matcher.start() + 2, matcher.end() - 1)); matcher.appendReplacement(exprBuffer, object != null ? Matcher.quoteReplacement(object.toString()) : "null"); } if (hasMoreGroup || result instanceof String) { return matcher.appendTail(exprBuffer).toString(); } else { return result; } } return null; }
From source file:de.mpg.escidoc.services.cone.util.CCCrawler.java
private static List<LocalizedTripleObject> extractLanguages(String page, String baseURL) throws Exception { HttpClient httpClient = new HttpClient(); List<LocalizedTripleObject> result = new ArrayList<LocalizedTripleObject>(); Pattern pattern = Pattern.compile( "<a\\s+href=\"./([^\"]+)\"\\s+title=\"([^\"]+)\"\\s+hreflang=\"([^\"]+)\"\\s+rel=\"alternate nofollow\"\\s+(xml:)?lang=\"([^\"]+)\">"); Matcher matcher = pattern.matcher(page); int start = 0; while (matcher.find(start)) { String genid = querier.createUniqueIdentifier(null); TreeFragment treeFragment = new TreeFragment(genid); String locale = matcher.group(3); treeFragment.setLanguage(locale.split("_")[0]); String url = baseURL + matcher.group(1); List<LocalizedTripleObject> list = new ArrayList<LocalizedTripleObject>(); list.add(new LocalizedString(url)); treeFragment.put("http://purl.org/dc/elements/1.1/identifier", list); list = new ArrayList<LocalizedTripleObject>(); list.add(new LocalizedString(matcher.group(2))); treeFragment.put("http://purl.org/dc/elements/1.1/title", list); list = new ArrayList<LocalizedTripleObject>(); list.add(new LocalizedString(locale)); treeFragment.put("urn:cone:locale", list); GetMethod method = new GetMethod(url); ProxyHelper.executeMethod(httpClient, method); String translation = method.getResponseBodyAsString(); Pattern namePattern = Pattern.compile("<h2 property=\"dc:title\">([^<]+)</h2>"); Matcher nameMatcher = namePattern.matcher(translation); if (nameMatcher.find()) { list = new ArrayList<LocalizedTripleObject>(); list.add(new LocalizedString(nameMatcher.group(1))); treeFragment.put("http://purl.org/dc/elements/1.1/title", list); }/* ww w .jav a 2 s .com*/ result.add(treeFragment); start = matcher.end(); } return result; }