List of usage examples for java.util.regex Matcher end
public int end(String name)
From source file:com.nextep.designer.sqlgen.oracle.parser.OraclePackageParser.java
private String renameSqlEnd(String sql, String newName) { // Matching the END tag final Pattern pattern = Pattern.compile("end\\s+((\\w)+)(;|/|\\s)*$"); //$NON-NLS-1$ final Matcher m = pattern.matcher(sql.toLowerCase()); String newSql = sql;//w w w . j a v a 2 s.c om if (m.find()) { newSql = sql.substring(0, m.start(1)) + newName + sql.substring(m.end(1)); } return newSql; }
From source file:net.healeys.lexic.online.OnlineGame.java
public boolean submitWords(WebView display) { Pattern contentPat = Pattern.compile("([^;]+); charset=(.+)"); for (int attempt = 0; attempt < MAX_ATTEMPTS; attempt++) { String url = BASE_URL + urls.get("words"); Iterator<String> li = uniqueListIterator(); StringBuffer sb = new StringBuffer(4096); while (li.hasNext()) { sb.append(li.next());//from w w w. j a v a2s . c o m if (li.hasNext()) sb.append(','); } String data = URLEncoder.encode(sb.toString()); try { HttpClient httpClient = new DefaultHttpClient(); HttpPost post = new HttpPost(url); addHeaders(post); post.setEntity(new StringEntity("words=" + data)); HttpResponse resp = httpClient.execute(post); BufferedReader br = new BufferedReader(new InputStreamReader(resp.getEntity().getContent())); sb = new StringBuffer(4096); String line; while ((line = br.readLine()) != null) { sb.append(line); sb.append('\n'); } String contentHeader = resp.getFirstHeader("Content-type").getValue(); String contentType; String contentEncoding; Matcher mat = contentPat.matcher(contentHeader); if (mat.find()) { contentType = contentHeader.substring(mat.start(1), mat.end(1)); contentEncoding = contentHeader.substring(mat.start(2), mat.end(2)); } else { contentType = contentHeader; contentEncoding = "utf-8"; } // Log.d(TAG,"url:"+url); // Log.d(TAG,"data:"+sb.toString()); // Log.d(TAG,"contentType:"+contentType); // Log.d(TAG,"contentEncoding:"+contentEncoding); display.loadDataWithBaseURL(url, sb.toString(), "text/html", "utf-8", null); return true; } catch (Exception e) { // Log.d(TAG,"error submitting words",e); } } return false; }
From source file:dk.netarkivet.harvester.harvesting.extractor.IcelandicExtractorJS.java
public long considerStrings(Extractor ext, CrawlURI curi, CharSequence cs, boolean handlingJSFile) { long foundLinks = 0; Matcher strings = TextUtils.getMatcher(JAVASCRIPT_STRING_EXTRACTOR, cs); int startIndex = 0; while (strings.find(startIndex)) { CharSequence subsequence = cs.subSequence(strings.start(2), strings.end(2)); Matcher uri = TextUtils.getMatcher(STRING_URI_DETECTOR, subsequence); if (uri.matches()) { String string = uri.group(); boolean falsePositive = false; try { string = StringEscapeUtils.unescapeJavaScript(string); } catch (NestableRuntimeException e) { LOGGER.log(Level.WARNING, "problem unescaping some javascript", e); }// w w w. j a v a 2 s.co m string = UriUtils.speculativeFixup(string, curi.getUURI()); // Filter out some bad false positives (should really fix regexp for URI detection) if (string.contains("/.") || string.contains("@") || string.length() > 150) { // While legal in URIs, these are rare and usually an indication of a false positive // in the speculative extraction. falsePositive = true; } if (!falsePositive) { falsePositive = shouldIgnorePossibleRelativeLink(string); } if (falsePositive) { foundFalsePositives++; } else { foundLinks++; try { int max = ext.getExtractorParameters().getMaxOutlinks(); if (handlingJSFile) { addRelativeToVia(curi, max, string, JS_MISC, SPECULATIVE); } else { addRelativeToBase(curi, max, string, JS_MISC, SPECULATIVE); } } catch (URIException e) { ext.logUriError(e, curi.getUURI(), string); } } } else { foundLinks += considerStrings(ext, curi, subsequence, handlingJSFile); } // reconsider the last closing quote as possible opening quote startIndex = strings.end(2); } TextUtils.recycleMatcher(strings); return foundLinks; }
From source file:com.github.rwitzel.streamflyer.xml.XmlVersionModifier.java
/** * @see com.github.rwitzel.streamflyer.core.Modifier#modify(java.lang.StringBuilder, int, boolean) *///from w w w . j av a 2 s .c o m @Override public AfterModification modify(StringBuilder characterBuffer, int firstModifiableCharacterInBuffer, boolean endOfStreamHit) { switch (state) { case NO_LONGER_MODIFYING: return factory.skipEntireBuffer(characterBuffer, firstModifiableCharacterInBuffer, endOfStreamHit); case INITIAL: state = XmlVersionModifierState.PROLOG_REQUEST; // you never know how many whitespace characters are in the prolog return factory.modifyAgainImmediately(INITIAL_NUMBER_OF_CHARACTERS, firstModifiableCharacterInBuffer); case PROLOG_REQUEST: // (Should we do aware of BOMs here? No. I consider it the // responsibility of the caller to provide characters without BOM.) Matcher matcher = Pattern.compile("<\\?xml[^>]*version\\s*=\\s*['\"]((1.0)|(1.1))['\"].*") .matcher(characterBuffer); if (matcher.matches()) { // replace version in prolog characterBuffer.replace(matcher.start(1), matcher.end(1), xmlVersion); } else { // is there a prolog that is too long? Matcher matcher2 = Pattern.compile("<\\?xml.*").matcher(characterBuffer); if (matcher2.matches()) { // this is not normal at all -> throw exception throw new XmlPrologRidiculouslyLongException(characterBuffer.toString()); } // insert prolog characterBuffer.insert(0, "<?xml version='" + xmlVersion + "'>"); } state = XmlVersionModifierState.NO_LONGER_MODIFYING; return factory.skipEntireBuffer(characterBuffer, firstModifiableCharacterInBuffer, endOfStreamHit); default: throw new IllegalStateException("state " + state + " not supported"); } }
From source file:com.nextep.designer.sqlgen.oracle.parser.OraclePackageParser.java
@Override public String parseName(String sql) { String tag = null;//from w w w .j av a 2 s.com if (isBody(sql)) { tag = "body"; //$NON-NLS-1$ } else { tag = "package"; //$NON-NLS-1$ } // Extracting procedure or function name from the SQL source Pattern pattern = Pattern.compile("\\s*(" + tag + ")\\s+((\\w)+)"); //$NON-NLS-1$ //$NON-NLS-2$ Matcher m = pattern.matcher(sql.toLowerCase()); // Looking for first occurrence String parsedName = null; if (m.find()) { parsedName = sql.substring(m.start(2), m.end(2)); } return parsedName; }
From source file:de.tudarmstadt.ukp.dkpro.core.textnormalizer.transformation.HyphenationRemover.java
@Override public void process(JCas aInput, JCas aOutput) throws AnalysisEngineProcessException { StringBuilder c_new = new StringBuilder(); final Matcher m = HYPHEN_PATTERN.matcher(aInput.getDocumentText()); while (m.find()) { // The capturing groups count should be exactly 2. assert m.groupCount() == 2 : "Expected 2 groups but got " + m.groupCount(); c_new.setLength(0); c_new.append(m.group(1)); c_new.append(m.group(2)); if (dict.contains(c_new.toString())) { replace(m.start(1), m.end(2), c_new.toString()); // getLogger().info( // "Conflated: [" + aInput.getDocumentText().substring(m.start(1), m.end(2)) // + "] to [" + c_new + "]"); }/*from w w w .ja v a2 s . c om*/ } }
From source file:net.healeys.lexic.online.OnlineGame.java
public Iterator<Score> getScores() { Pattern startPat = Pattern.compile("^!START:([^,]+),(\\d+),(-?\\d+)$"); Pattern endPat = Pattern.compile("^!END$"); for (int attempt = 0; attempt < MAX_ATTEMPTS; attempt++) { try {//from w ww .j a va 2 s . co m LinkedList<Score> scores = new LinkedList<Score>(); HttpClient httpClient = new DefaultHttpClient(); HttpGet get = new HttpGet(BASE_URL + urls.get("score")); addHeaders(get); HttpResponse resp = httpClient.execute(get); BufferedReader br = new BufferedReader(new InputStreamReader(resp.getEntity().getContent())); String username = ""; int points = 0; int score = 0; String line; StringBuffer sb = new StringBuffer(0); while ((line = br.readLine()) != null) { // Log.d(TAG,"line:"+line); Matcher mat; if ((mat = startPat.matcher(line)).find()) { // Log.d(TAG,"startPat matched"+line); username = line.substring(mat.start(1), mat.end(1)); points = Integer.parseInt(line.substring(mat.start(2), mat.end(2))); score = Integer.parseInt(line.substring(mat.start(3), mat.end(3))); sb = new StringBuffer(2048); } else if ((mat = endPat.matcher(line)).find()) { // Log.d(TAG,"endPat matched"+line); scores.add(new Score(username, score, points, sb.toString())); } else if (sb != null && line.length() > 1) { sb.append(line); sb.append('\n'); } } return scores.iterator(); } catch (Exception e) { // Log.e(TAG,"getScores error",e); } } return null; }
From source file:com.haulmont.cuba.core.global.QueryTransformerRegex.java
@Override public void replaceEntityName(String newName) { Matcher entityMatcher = FROM_ENTITY_PATTERN.matcher(buffer); if (entityMatcher.find()) { buffer.replace(entityMatcher.start(FEP_ENTITY), entityMatcher.end(FEP_ENTITY), newName); return;// w ww. j a v a 2s.c o m } error("Unable to find entity name"); }
From source file:com.haulmont.cuba.core.global.QueryTransformerRegex.java
@Override public void handleCaseInsensitiveParam(String paramName) { Pattern pattern = Pattern.compile(COND_PATTERN_REGEX + ":" + paramName, Pattern.CASE_INSENSITIVE); Matcher matcher = pattern.matcher(buffer); if (matcher.find()) { String field = matcher.group(1); buffer.replace(matcher.start(1), matcher.end(1), "lower(" + field + ")"); }// www .ja va 2 s .c o m }
From source file:edu.uab.ccts.nlp.uima.annotator.SegmentRegexAnnotator.java
/** * Add Segment annotations to the cas. First create a list of segments. Then * sort the list according to segment start. For each segment that has no * end, set the end to the [beginning of next segment - 1], or the eof. */// w w w .ja v a2s . c o m @Override public void process(JCas aJCas) throws AnalysisEngineProcessException { log.info("Starting SegmentRegexAnnotator with " + regexMap.size() + " segements."); String strDocText = aJCas.getDocumentText(); if (strDocText == null) return; List<Segment> segmentsAdded = new ArrayList<Segment>(); // find all the segments, set begin and id, add to list for (Map.Entry<SegmentRegex, Pattern> entry : regexMap.entrySet()) { if (log.isDebugEnabled()) { log.debug("applying regex:" + entry.getKey().getRegex()); } Matcher matcher = entry.getValue().matcher(strDocText); while (matcher.find()) { Segment seg = new Segment(aJCas); if (entry.getKey().isLimitToRegex() && matcher.groupCount() == 1) { seg.setBegin(matcher.start(1)); seg.setEnd(matcher.end(1)); } else { seg.setBegin(matcher.start()); if (entry.getKey().isLimitToRegex()) { seg.setEnd(matcher.end()); } } seg.setId(entry.getKey().getSegmentID()); //if (log.isDebugEnabled()) { log.debug("found match: id=" + seg.getId() + ", begin=" + seg.getBegin() + " end=" + seg.getEnd()); //} segmentsAdded.add(seg); } } if (log.isDebugEnabled()) { log.debug("segmentsAdded: " + segmentsAdded.size()); } if (segmentsAdded.size() > 0) { // sort the segments by begin Collections.sort(segmentsAdded, new Comparator<Segment>() { // @Override public int compare(Segment o1, Segment o2) { return o1.getBegin() < o2.getBegin() ? -1 : o1.getBegin() > o2.getBegin() ? 1 : 0; } }); // set the end for each segment for (int i = 0; i < segmentsAdded.size(); i++) { Segment seg = segmentsAdded.get(i); Segment segNext = (i + 1) < segmentsAdded.size() ? segmentsAdded.get(i + 1) : null; if (seg.getEnd() <= 0) { if (segNext != null) { // set end to beginning of next segment seg.setEnd(segNext.getBegin() - 1); } else { // set end to doc end seg.setEnd(strDocText.length()); } } else { // segments shouldn't overlap if (segNext != null && segNext.getBegin() < seg.getEnd()) { seg.setEnd(segNext.getBegin() - 1); } } //if (log.isDebugEnabled()) { log.debug("Adding Segment: segment id=" + seg.getId() + ", begin=" + seg.getBegin() + ", end=" + seg.getEnd()); //} seg.addToIndexes(); } } // ctakes 1.3.2 - anything not in a segment will not be annotated - add // text outside segments to the 'default' segment int end = 0; for (Segment seg : segmentsAdded) { if ((seg.getBegin() - 1) > end) { addGapSegment(aJCas, end, seg.getBegin() - 1); } end = seg.getEnd(); } if (end < strDocText.length()) { addGapSegment(aJCas, end, strDocText.length()); } }