List of usage examples for java.util.regex Matcher start
public int start(String name)
From source file:com.nttec.everychan.chans.cirno.MikubaReader.java
private String fixPostRefs(StringBuilder commentBuffer) { String comment = commentBuffer.toString(); commentBuffer.setLength(0);/*from w w w . j av a 2 s . co m*/ if (postsBuf == null || postsBuf.size() == 0) return comment; Matcher matcher = POST_REFERENCE.matcher(comment); if (!matcher.find()) return comment; String threadNum = postsBuf.get(0).number; int appendPos = 0; boolean replacements = false; do { String num = matcher.group(1); commentBuffer.append(comment, appendPos, matcher.start(1)); appendPos = matcher.end(); if (!num.equals(threadNum) && postsNumBuf.contains(num)) { replacements = true; commentBuffer.append(threadNum); } else { commentBuffer.append(num); } } while (matcher.find()); commentBuffer.append(comment, appendPos, comment.length()); if (replacements) comment = commentBuffer.toString(); commentBuffer.setLength(0); return comment; }
From source file:Repackage.java
public void repackageJavaFile(String name) throws IOException { File sourceFile = new File(_sourceBase, name); StringBuffer sb = readFile(sourceFile); Matcher packageMatcher = _packagePattern.matcher(sb); if (packageMatcher.find()) { String pkg = packageMatcher.group(1); int pkgStart = packageMatcher.start(1); int pkgEnd = packageMatcher.end(1); if (packageMatcher.find()) throw new RuntimeException("Two package specifications found: " + name); List filePath = Repackager.splitPath(name, File.separatorChar); String srcDir = Repackager.dirForPath(name); // Sort the repackage spec so that longer from's are first to match // longest package first for (;;) { boolean swapped = false; for (int i = 1; i < filePath.size(); i++) { String spec1 = (String) filePath.get(i - 1); String spec2 = (String) filePath.get(i); if (spec1.indexOf(':') < spec2.indexOf(':')) { filePath.set(i - 1, spec2); filePath.set(i, spec1); swapped = true;// w ww . j av a 2 s.c om } } if (!swapped) break; } List pkgPath = Repackager.splitPath(pkg, '.'); int f = filePath.size() - 2; if (f < 0 || (filePath.size() - 1) < pkgPath.size()) throw new RuntimeException("Package spec differs from file path: " + name); for (int i = pkgPath.size() - 1; i >= 0; i--) { if (!pkgPath.get(i).equals(filePath.get(f))) throw new RuntimeException("Package spec differs from file path: " + name); f--; } List changeTo = null; List changeFrom = null; from: for (int i = 0; i < _fromPackages.size(); i++) { List from = (List) _fromPackages.get(i); if (from.size() <= pkgPath.size()) { for (int j = 0; j < from.size(); j++) if (!from.get(j).equals(pkgPath.get(j))) continue from; changeFrom = from; changeTo = (List) _toPackages.get(i); break; } } if (changeTo != null) { String newPkg = ""; String newName = ""; for (int i = 0; i < changeTo.size(); i++) { if (i > 0) { newPkg += "."; newName += File.separatorChar; } newPkg += changeTo.get(i); newName += changeTo.get(i); } for (int i = filePath.size() - pkgPath.size() - 2; i >= 0; i--) newName = (String) filePath.get(i) + File.separatorChar + newName; for (int i = changeFrom.size(); i < pkgPath.size(); i++) { newName += File.separatorChar + (String) pkgPath.get(i); newPkg += '.' + (String) pkgPath.get(i); } newName += File.separatorChar + (String) filePath.get(filePath.size() - 1); sb.replace(pkgStart, pkgEnd, newPkg); name = newName; String newDir = Repackager.dirForPath(name); if (!srcDir.equals(newDir)) { _movedDirs.put(srcDir, newDir); } } } File targetFile = new File(_targetBase, name); // new name if (sourceFile.lastModified() < targetFile.lastModified()) { _skippedFiles += 1; return; } writeFile(new File(_targetBase, name), _repackager.repackage(sb)); }
From source file:com.cyberway.issue.extractor.RegexpHTMLLinkExtractor.java
protected boolean processGeneralTag(CharSequence element, CharSequence cs) { Matcher attr = TextUtils.getMatcher(EACH_ATTRIBUTE_EXTRACTOR, cs); // Just in case it's an OBJECT or APPLET tag String codebase = null;// ww w. ja v a2s . c o m ArrayList<String> resources = null; long tally = next.size(); while (attr.find()) { int valueGroup = (attr.start(12) > -1) ? 12 : (attr.start(13) > -1) ? 13 : 14; int start = attr.start(valueGroup); int end = attr.end(valueGroup); CharSequence value = cs.subSequence(start, end); if (attr.start(2) > -1) { // HREF CharSequence context = Link.elementContext(element, attr.group(2)); if (element.toString().equalsIgnoreCase(LINK)) { // <LINK> elements treated as embeds (css, ico, etc) processEmbed(value, context); } else { if (element.toString().equalsIgnoreCase(BASE)) { try { base = UURIFactory.getInstance(value.toString()); } catch (URIException e) { extractErrorListener.noteExtractError(e, source, value); } } // other HREFs treated as links processLink(value, context); } } else if (attr.start(3) > -1) { // ACTION CharSequence context = Link.elementContext(element, attr.group(3)); processLink(value, context); } else if (attr.start(4) > -1) { // ON____ processScriptCode(value); // TODO: context? } else if (attr.start(5) > -1) { // SRC etc. CharSequence context = Link.elementContext(element, attr.group(5)); processEmbed(value, context); } else if (attr.start(6) > -1) { // CODEBASE // TODO: more HTML deescaping? codebase = TextUtils.replaceAll(ESCAPED_AMP, value, AMP); CharSequence context = Link.elementContext(element, attr.group(6)); processEmbed(codebase, context); } else if (attr.start(7) > -1) { // CLASSID, DATA if (resources == null) { resources = new ArrayList<String>(); } resources.add(value.toString()); } else if (attr.start(8) > -1) { // ARCHIVE if (resources == null) { resources = new ArrayList<String>(); } String[] multi = TextUtils.split(WHITESPACE, value); for (int i = 0; i < multi.length; i++) { resources.add(multi[i]); } } else if (attr.start(9) > -1) { // CODE if (resources == null) { resources = new ArrayList<String>(); } // If element is applet and code value does not end with // '.class' then append '.class' to the code value. if (element.toString().toLowerCase().equals(APPLET) && !value.toString().toLowerCase().endsWith(CLASSEXT)) { resources.add(value.toString() + CLASSEXT); } else { resources.add(value.toString()); } } else if (attr.start(10) > -1) { // VALUE if (TextUtils.matches(LIKELY_URI_PATH, value)) { CharSequence context = Link.elementContext(element, attr.group(10)); processLink(value, context); } } else if (attr.start(11) > -1) { // any other attribute // ignore for now // could probe for path- or script-looking strings, but // those should be vanishingly rare in other attributes, // and/or symptomatic of page bugs } } TextUtils.recycleMatcher(attr); // handle codebase/resources if (resources == null) { return (tally - next.size()) > 0; } Iterator iter = resources.iterator(); UURI codebaseURI = null; String res = null; try { if (codebase != null) { // TODO: Pass in the charset. codebaseURI = UURIFactory.getInstance(base, codebase); } while (iter.hasNext()) { res = iter.next().toString(); // TODO: more HTML deescaping? res = TextUtils.replaceAll(ESCAPED_AMP, res, AMP); if (codebaseURI != null) { res = codebaseURI.resolve(res).toString(); } processEmbed(res, element); // TODO: include attribute too } } catch (URIException e) { extractErrorListener.noteExtractError(e, source, codebase); } catch (IllegalArgumentException e) { DevUtils.logger.log(Level.WARNING, "processGeneralTag()\n" + "codebase=" + codebase + " res=" + res + "\n" + DevUtils.extraInfo(), e); } return (tally - next.size()) > 0; }
From source file:com.cyberway.issue.crawler.extractor.ExtractorHTML.java
/** * Run extractor./*from w ww.j a va 2 s . com*/ * This method is package visible to ease testing. * @param curi CrawlURI we're processing. * @param cs Sequence from underlying ReplayCharSequence. This * is TRANSIENT data. Make a copy if you want the data to live outside * of this extractors' lifetime. */ void extract(CrawlURI curi, CharSequence cs) { Matcher tags = TextUtils.getMatcher(RELEVANT_TAG_EXTRACTOR, cs); while (tags.find()) { if (Thread.interrupted()) { break; } if (tags.start(8) > 0) { // comment match // for now do nothing } else if (tags.start(7) > 0) { // <meta> match int start = tags.start(5); int end = tags.end(5); assert start >= 0 : "Start is: " + start + ", " + curi; assert end >= 0 : "End is :" + end + ", " + curi; if (processMeta(curi, cs.subSequence(start, end))) { // meta tag included NOFOLLOW; abort processing break; } } else if (tags.start(5) > 0) { // generic <whatever> match int start5 = tags.start(5); int end5 = tags.end(5); assert start5 >= 0 : "Start is: " + start5 + ", " + curi; assert end5 >= 0 : "End is :" + end5 + ", " + curi; int start6 = tags.start(6); int end6 = tags.end(6); assert start6 >= 0 : "Start is: " + start6 + ", " + curi; assert end6 >= 0 : "End is :" + end6 + ", " + curi; processGeneralTag(curi, cs.subSequence(start6, end6), cs.subSequence(start5, end5)); } else if (tags.start(1) > 0) { // <script> match int start = tags.start(1); int end = tags.end(1); assert start >= 0 : "Start is: " + start + ", " + curi; assert end >= 0 : "End is :" + end + ", " + curi; assert tags.end(2) >= 0 : "Tags.end(2) illegal " + tags.end(2) + ", " + curi; processScript(curi, cs.subSequence(start, end), tags.end(2) - start); } else if (tags.start(3) > 0) { // <style... match int start = tags.start(3); int end = tags.end(3); assert start >= 0 : "Start is: " + start + ", " + curi; assert end >= 0 : "End is :" + end + ", " + curi; assert tags.end(4) >= 0 : "Tags.end(4) illegal " + tags.end(4) + ", " + curi; processStyle(curi, cs.subSequence(start, end), tags.end(4) - start); } } TextUtils.recycleMatcher(tags); }
From source file:com.haulmont.cuba.gui.config.WindowConfig.java
/** * Get screen information by screen ID./*from w w w. j a v a 2s . c o m*/ * * @param id screen ID as set up in <code>screens.xml</code> * @param deviceInfo target device info * @return screen's registration information or null if not found */ @Nullable public WindowInfo findWindowInfo(String id, @Nullable DeviceInfo deviceInfo) { lock.readLock().lock(); try { checkInitialized(); List<WindowInfo> infos = screens.get(id); if (infos == null) { Matcher matcher = ENTITY_SCREEN_PATTERN.matcher(id); if (matcher.matches()) { MetaClass metaClass = metadata.getClass(matcher.group(1)); if (metaClass == null) { return null; } MetaClass originalMetaClass = metadata.getExtendedEntities().getOriginalMetaClass(metaClass); if (originalMetaClass != null) { String originalId = new StringBuilder(id) .replace(matcher.start(1), matcher.end(1), originalMetaClass.getName()).toString(); infos = screens.get(originalId); } } } List<WindowInfo> foundWindowInfos = infos; if (foundWindowInfos != null) { // do not perform stream processing in a simple case if (foundWindowInfos.size() == 1 && foundWindowInfos.get(0).getScreenAgent() == null) { return foundWindowInfos.get(0); } if (deviceInfo == null) { // find default screen return foundWindowInfos.stream().filter(windowInfo -> windowInfo.getScreenAgent() == null) .findFirst().orElse(null); } else { return infos.stream().filter( wi -> wi.getScreenAgent() != null && wi.getScreenAgent().isSupported(deviceInfo)) .findFirst() .orElseGet(() -> foundWindowInfos.stream() .filter(windowInfo -> windowInfo.getScreenAgent() == null).findFirst() .orElse(null)); } } return null; } finally { lock.readLock().unlock(); } }
From source file:biz.astute.test.simulator.rest.RequestContext.java
/** * Return path portion of URL. The url may be modified to extract variables. * * @param globalProperties global properties * @return path portion of url/*from w w w .j av a 2s .co m*/ * @throws UnsupportedEncodingException exception */ public final String getResourcePath(final Properties globalProperties) throws UnsupportedEncodingException { uriProperties.clear(); String requestURI = URLDecoder.decode(request.getRequestURI(), "utf-8"); Pattern[] currentPatterns = getPatterns(globalProperties); if (currentPatterns.length < 1) { return requestURI; } StringBuilder resourceName = new StringBuilder(requestURI); resourceName.append('/'); // Remove this later - need for matcher for (Pattern pattern : currentPatterns) { Matcher matcher = pattern.matcher(resourceName); if (matcher.matches() && (matcher.groupCount() > 0)) { for (int index = 1; index <= matcher.groupCount(); index++) { String matched = matcher.group(index); uriProperties.add(matched); } // Do so in reverse order so as to not affect offset for (int index = matcher.groupCount(); index > 0; index--) { resourceName.replace(matcher.start(index), matcher.end(index), StringUtils.EMPTY); } break; } } // remove '/' appended earlier resourceName.setLength(resourceName.length() - 1); // Remove any // that result from pattern replacement return resourceName.toString().replaceAll("//", "/"); }
From source file:com.cyberway.issue.crawler.extractor.ExtractorHTML.java
/** * Process metadata tags./*from ww w . j ava 2 s. c om*/ * @param curi CrawlURI we're processing. * @param cs Sequence from underlying ReplayCharSequence. This * is TRANSIENT data. Make a copy if you want the data to live outside * of this extractors' lifetime. * @return True robots exclusion metatag. */ protected boolean processMeta(CrawlURI curi, CharSequence cs) { Matcher attr = TextUtils.getMatcher(EACH_ATTRIBUTE_EXTRACTOR, cs); String name = null; String httpEquiv = null; String content = null; while (attr.find()) { int valueGroup = (attr.start(14) > -1) ? 14 : (attr.start(15) > -1) ? 15 : 16; CharSequence value = cs.subSequence(attr.start(valueGroup), attr.end(valueGroup)); if (attr.group(1).equalsIgnoreCase("name")) { name = value.toString(); } else if (attr.group(1).equalsIgnoreCase("http-equiv")) { httpEquiv = value.toString(); } else if (attr.group(1).equalsIgnoreCase("content")) { content = value.toString(); } // TODO: handle other stuff } TextUtils.recycleMatcher(attr); // Look for the 'robots' meta-tag if ("robots".equalsIgnoreCase(name) && content != null) { curi.putString(A_META_ROBOTS, content); RobotsHonoringPolicy policy = getSettingsHandler().getOrder().getRobotsHonoringPolicy(); String contentLower = content.toLowerCase(); if ((policy == null || (!policy.isType(curi, RobotsHonoringPolicy.IGNORE) && !policy.isType(curi, RobotsHonoringPolicy.CUSTOM))) && (contentLower.indexOf("nofollow") >= 0 || contentLower.indexOf("none") >= 0)) { // if 'nofollow' or 'none' is specified and the // honoring policy is not IGNORE or CUSTOM, end html extraction logger.fine("HTML extraction skipped due to robots meta-tag for: " + curi.toString()); return true; } } else if ("refresh".equalsIgnoreCase(httpEquiv) && content != null) { int urlIndex = content.indexOf("=") + 1; if (urlIndex > 0) { String refreshUri = content.substring(urlIndex); try { curi.createAndAddLinkRelativeToBase(refreshUri, "meta", Link.REFER_HOP); } catch (URIException e) { if (getController() != null) { getController().logUriError(e, curi.getUURI(), refreshUri); } else { logger.info("Failed createAndAddLinkRelativeToBase " + curi + ", " + cs + ", " + refreshUri + ": " + e); } } } } return false; }
From source file:com.hichinaschool.flashcards.libanki.Media.java
/** * Percent-escape UTF-8 characters in local image filenames. * //from www. j a va 2 s . c o m * @param string The string to search for image references and escape the filenames. * @return The string with the filenames of any local images percent-escaped as UTF-8. */ public String escapeImages(String string) { Matcher m = fMediaRegexps[1].matcher(string); StringBuffer sb = new StringBuffer(); while (m.find()) { if (fRemoteFilePattern.matcher(m.group(2)).find()) { m.appendReplacement(sb, m.group()); } else { String tagBegin = m.group(1).substring(0, m.start(2)); String fname = m.group(2); String tagEnd = m.group(1).substring(m.end(2)); String tag = tagBegin + Uri.encode(fname) + tagEnd; m.appendReplacement(sb, tag); } } m.appendTail(sb); return sb.toString(); }
From source file:com.app.util.browser.BrowserSniffer.java
private ArrayList getMatches(Pattern pat, String str, int countGroups) { Matcher matcher = pat.matcher(str); ArrayList matches = new ArrayList(); try {/* w w w. j a va2 s . c o m*/ ArrayList groups = new ArrayList(); while (matcher.find()) { groups.clear(); int nullCount = 0; for (int i = 0; i < countGroups; i++) { int start = matcher.start(i); int end = matcher.end(i); if (start >= 0 && end >= 0) { String sub = str.substring(start, end); if (StringUtils.isNotEmpty(sub)) groups.add(sub); else { groups.add(null); nullCount++; } } else { groups.add(null); nullCount++; } } if (groups.size() > 0 && nullCount != groups.size()) matches.add(groups.toArray(new String[groups.size()])); } } catch (Exception e) { log.error(e); } return matches; }
From source file:com.edgenius.wiki.render.filter.MacroFilter.java
private void resetRegion(final int initPos, final CharSequence input, final List<Region> list) { final List<Region> pairRegions = new ArrayList<Region>(); singleMacroProvider.replaceByTokenVisitor(input, new TokenVisitor<Matcher>() { public void handleMatch(StringBuffer buffer, Matcher result) { String macroName = result.group(1); if (macroName != null && !macroName.startsWith("$")) { Macro macro = macroMgr.getMacro(macroName); if (macro != null && macro.isPaired()) { String body = result.group(0); int start = result.start(0); int end = result.end(0); Region pair = new Region(start, end); //no parameter, then mark as unknown, otherwise, must be a start macro if (StringUtils.isBlank(result.group(2))) { pair.setKey(MACRO_REGION_KEY_UNKNOWN); } else { pair.setKey(MACRO_REGION_KEY_START); }// w ww . ja va 2s . co m //just for temporary to remember the macro name... pair.setContent(macroName); pair.setBody(body); //sum to list pairRegions.add(pair); } } } }); int size = pairRegions.size(); if (size > 0) { StringBuffer inputBuf = new StringBuffer(input); for (int idx = 0; idx < size; idx++) { Region reg = pairRegions.get(idx); int deep = 0; Region pair = null; //looking for pairs... for (int chIdx = idx + 1; chIdx < size; chIdx++) { Region next = pairRegions.get(chIdx); if (StringUtils.equalsIgnoreCase(reg.getContent(), next.getContent())) { //start is unknown (no attribute), then end must be unknown if (MACRO_REGION_KEY_UNKNOWN.equals(reg.getKey()) && MACRO_REGION_KEY_UNKNOWN.equals(next.getKey())) { //matched pair = next; //skip all internal node - which is handle by embedded recursive idx = chIdx; break; } if (MACRO_REGION_KEY_START.equals(reg.getKey()) && MACRO_REGION_KEY_UNKNOWN.equals(next.getKey())) { if (deep == 0) { //matched; pair = next; //skip all internal node - which is handle by embedded recursive idx = chIdx; break; } else { //just another inner same name macro matched, deep minus deep--; } } if (MACRO_REGION_KEY_START.equals(next.getKey())) { //ok, it gets another start, in 4th scenarios - then add deep deep++; } } } //ok, success find paired if (pair != null) { int start = initPos + reg.getStart(); int end = initPos + pair.getEnd(); int contentStart = initPos + reg.getEnd(); int contentEnd = initPos + pair.getStart(); String macroName = reg.getContent(); Macro macro = macroMgr.getMacro(macroName); boolean immutable = macro instanceof ImmutableContentMacro; list.add(new Region(MacroFilter.this, immutable, start, end, contentStart, contentEnd)); if (macro.isProcessEmbedded() && (end > start)) { resetRegion(contentStart, inputBuf.subSequence(contentStart - initPos, contentEnd - initPos), list); } } } } }