List of usage examples for java.util.regex Matcher end
public int end(String name)
From source file:com.cyberway.issue.extractor.RegexpHTMLLinkExtractor.java
protected void processMeta(CharSequence cs) { Matcher attr = TextUtils.getMatcher(EACH_ATTRIBUTE_EXTRACTOR, cs); String name = null;/*from ww w. ja va 2s. c o m*/ String httpEquiv = null; String content = null; while (attr.find()) { int valueGroup = (attr.start(12) > -1) ? 12 : (attr.start(13) > -1) ? 13 : 14; CharSequence value = cs.subSequence(attr.start(valueGroup), attr.end(valueGroup)); if (attr.group(1).equalsIgnoreCase("name")) { name = value.toString(); } else if (attr.group(1).equalsIgnoreCase("http-equiv")) { httpEquiv = value.toString(); } else if (attr.group(1).equalsIgnoreCase("content")) { content = value.toString(); } // TODO: handle other stuff } TextUtils.recycleMatcher(attr); // Look for the 'robots' meta-tag if ("robots".equalsIgnoreCase(name) && content != null) { if (getHonorRobots()) { String contentLower = content.toLowerCase(); if ((contentLower.indexOf("nofollow") >= 0 || contentLower.indexOf("none") >= 0)) { // if 'nofollow' or 'none' is specified and we // are honoring robots, end html extraction logger.fine("HTML extraction skipped due to robots meta-tag for: " + source); cancelFurtherExtraction(); return; } } } else if ("refresh".equalsIgnoreCase(httpEquiv) && content != null) { String refreshUri = content.substring(content.indexOf("=") + 1); try { Link refreshLink = new Link(source, UURIFactory.getInstance(base, refreshUri), Link.elementContext("meta", httpEquiv), Link.REFER_HOP); next.addLast(refreshLink); } catch (URIException e) { extractErrorListener.noteExtractError(e, source, refreshUri); } } }
From source file:com.smartitengineering.cms.ws.resources.content.ContentResource.java
protected static void formFields(final Map<String, FieldDef> allDefs, final Map<String, List<FormDataBodyPart>> bodyParts, final Collection<com.smartitengineering.cms.ws.common.domains.Field> fields) { for (Entry<String, FieldDef> fieldDef : allDefs.entrySet()) { if (bodyParts != null && !bodyParts.isEmpty()) { if (LOGGER.isDebugEnabled()) { LOGGER.debug("Creating field for " + fieldDef.getKey() + " with type " + fieldDef.getValue().getValueDef().getType()); }// ww w .java 2 s . c om FieldImpl fieldImpl = new FieldImpl(); fieldImpl.setName(fieldDef.getKey()); final boolean containsKey = bodyParts.containsKey(fieldDef.getKey()); switch (fieldDef.getValue().getValueDef().getType()) { case COMPOSITE: { boolean hasCompositeValue = false; Map<String, List<FormDataBodyPart>> composites = new LinkedHashMap<String, List<FormDataBodyPart>>(); final String prefix = new StringBuilder(fieldDef.getKey()).append('.').toString(); for (String key : bodyParts.keySet()) { if (key.startsWith(prefix)) { hasCompositeValue = true; composites.put(key.substring(prefix.length()), bodyParts.get(key)); } } if (hasCompositeValue) { Collection<com.smartitengineering.cms.ws.common.domains.Field> composedFields = new ArrayList<com.smartitengineering.cms.ws.common.domains.Field>(); CompositeDataType compositeDataType = (CompositeDataType) fieldDef.getValue().getValueDef(); formFields(compositeDataType.getComposedFieldDefs(), composites, composedFields); CompositeFieldValueImpl valueImpl = new CompositeFieldValueImpl(); valueImpl.setValuesAsCollection(composedFields); valueImpl.setType(FieldValueType.COMPOSITE.name()); fieldImpl.setValue(valueImpl); } break; } case COLLECTION: { CollectionDataType collectionFieldDef = (CollectionDataType) fieldDef.getValue().getValueDef(); CollectionFieldValueImpl fieldValueImpl = new CollectionFieldValueImpl(); fieldValueImpl.setType(FieldValueType.COLLECTION.name()); switch (collectionFieldDef.getItemDataType().getType()) { case COMPOSITE: { boolean hasCompositeValue = false; Map<String, Map<String, List<FormDataBodyPart>>> compositesCollection = new HashMap<String, Map<String, List<FormDataBodyPart>>>(); final String prefixPattern = new StringBuilder(fieldDef.getKey()).append("\\.([0-9]+)\\..+") .toString(); Pattern pattern = Pattern.compile(prefixPattern); for (String key : bodyParts.keySet()) { Matcher matcher = pattern.matcher(key); if (matcher.matches()) { hasCompositeValue = true; final Map<String, List<FormDataBodyPart>> composites; String groupKey = matcher.group(1); if (compositesCollection.containsKey(groupKey)) { composites = compositesCollection.get(groupKey); } else { composites = new LinkedHashMap<String, List<FormDataBodyPart>>(); compositesCollection.put(groupKey, composites); } composites.put(key.substring(matcher.end(1) + 1), bodyParts.get(key)); } } if (hasCompositeValue) { CompositeDataType compositeDataType = (CompositeDataType) collectionFieldDef .getItemDataType(); for (Entry<String, Map<String, List<FormDataBodyPart>>> cols : compositesCollection .entrySet()) { Collection<com.smartitengineering.cms.ws.common.domains.Field> composedFields = new ArrayList<com.smartitengineering.cms.ws.common.domains.Field>(); formFields(compositeDataType.getComposedFieldDefs(), cols.getValue(), composedFields); CompositeFieldValueImpl valueImpl = new CompositeFieldValueImpl(); valueImpl.setValuesAsCollection(composedFields); valueImpl.setType(FieldValueType.COMPOSITE.name()); fieldValueImpl.getValues().add(valueImpl); } } break; } default: if (containsKey) { for (FormDataBodyPart bodyPart : bodyParts.get(fieldDef.getKey())) { if (bodyPart == null || org.apache.commons.lang.StringUtils.isBlank(bodyPart.getValue())) { continue; } FieldValueImpl valueImpl = addFieldFromBodyPart(bodyPart, collectionFieldDef.getItemDataType()); if (valueImpl != null) { fieldValueImpl.getValues().add(valueImpl); } } } } if (fieldValueImpl.getValues().isEmpty()) { continue; } fieldImpl.setValue(fieldValueImpl); break; } case OTHER: { if (containsKey) { final FormDataBodyPart singleBodyPart = bodyParts.get(fieldDef.getKey()).get(0); FieldValueImpl valueImpl = addFieldFromBodyPart(singleBodyPart, fieldDef.getValue().getValueDef()); fieldImpl.setValue(valueImpl); } break; } default: { if (containsKey) { final FormDataBodyPart singleBodyPart = bodyParts.get(fieldDef.getKey()).get(0); if (singleBodyPart == null || org.apache.commons.lang.StringUtils.isBlank(singleBodyPart.getValue())) { continue; } FieldValueImpl valueImpl = addFieldFromBodyPart(singleBodyPart, fieldDef.getValue().getValueDef()); fieldImpl.setValue(valueImpl); } break; } } fields.add(fieldImpl); } } }
From source file:Repackage.java
public void repackageJavaFile(String name) throws IOException { File sourceFile = new File(_sourceBase, name); StringBuffer sb = readFile(sourceFile); Matcher packageMatcher = _packagePattern.matcher(sb); if (packageMatcher.find()) { String pkg = packageMatcher.group(1); int pkgStart = packageMatcher.start(1); int pkgEnd = packageMatcher.end(1); if (packageMatcher.find()) throw new RuntimeException("Two package specifications found: " + name); List filePath = Repackager.splitPath(name, File.separatorChar); String srcDir = Repackager.dirForPath(name); // Sort the repackage spec so that longer from's are first to match // longest package first for (;;) { boolean swapped = false; for (int i = 1; i < filePath.size(); i++) { String spec1 = (String) filePath.get(i - 1); String spec2 = (String) filePath.get(i); if (spec1.indexOf(':') < spec2.indexOf(':')) { filePath.set(i - 1, spec2); filePath.set(i, spec1); swapped = true;/*from ww w.j ava 2 s . c o m*/ } } if (!swapped) break; } List pkgPath = Repackager.splitPath(pkg, '.'); int f = filePath.size() - 2; if (f < 0 || (filePath.size() - 1) < pkgPath.size()) throw new RuntimeException("Package spec differs from file path: " + name); for (int i = pkgPath.size() - 1; i >= 0; i--) { if (!pkgPath.get(i).equals(filePath.get(f))) throw new RuntimeException("Package spec differs from file path: " + name); f--; } List changeTo = null; List changeFrom = null; from: for (int i = 0; i < _fromPackages.size(); i++) { List from = (List) _fromPackages.get(i); if (from.size() <= pkgPath.size()) { for (int j = 0; j < from.size(); j++) if (!from.get(j).equals(pkgPath.get(j))) continue from; changeFrom = from; changeTo = (List) _toPackages.get(i); break; } } if (changeTo != null) { String newPkg = ""; String newName = ""; for (int i = 0; i < changeTo.size(); i++) { if (i > 0) { newPkg += "."; newName += File.separatorChar; } newPkg += changeTo.get(i); newName += changeTo.get(i); } for (int i = filePath.size() - pkgPath.size() - 2; i >= 0; i--) newName = (String) filePath.get(i) + File.separatorChar + newName; for (int i = changeFrom.size(); i < pkgPath.size(); i++) { newName += File.separatorChar + (String) pkgPath.get(i); newPkg += '.' + (String) pkgPath.get(i); } newName += File.separatorChar + (String) filePath.get(filePath.size() - 1); sb.replace(pkgStart, pkgEnd, newPkg); name = newName; String newDir = Repackager.dirForPath(name); if (!srcDir.equals(newDir)) { _movedDirs.put(srcDir, newDir); } } } File targetFile = new File(_targetBase, name); // new name if (sourceFile.lastModified() < targetFile.lastModified()) { _skippedFiles += 1; return; } writeFile(new File(_targetBase, name), _repackager.repackage(sb)); }
From source file:cn.dreampie.resource.LessSource.java
private String includeImportedContent(LessSource importedLessSource, Matcher importMatcher) { StringBuilder builder = new StringBuilder(); builder.append(normalizedContent.substring(0, importMatcher.start(1))); String mediaQuery = importMatcher.group(8); if (mediaQuery != null && mediaQuery.length() > 0) { builder.append("@media"); builder.append(mediaQuery);//w w w . j a v a 2s . c o m builder.append("{\n"); } builder.append(importedLessSource.getNormalizedContent()); if (mediaQuery != null && mediaQuery.length() > 0) { builder.append("}\n"); } builder.append(normalizedContent.substring(importMatcher.end(1))); return builder.toString(); }
From source file:com.haulmont.cuba.gui.config.WindowConfig.java
/** * Get screen information by screen ID.//from w w w .ja va2s. c o m * * @param id screen ID as set up in <code>screens.xml</code> * @param deviceInfo target device info * @return screen's registration information or null if not found */ @Nullable public WindowInfo findWindowInfo(String id, @Nullable DeviceInfo deviceInfo) { lock.readLock().lock(); try { checkInitialized(); List<WindowInfo> infos = screens.get(id); if (infos == null) { Matcher matcher = ENTITY_SCREEN_PATTERN.matcher(id); if (matcher.matches()) { MetaClass metaClass = metadata.getClass(matcher.group(1)); if (metaClass == null) { return null; } MetaClass originalMetaClass = metadata.getExtendedEntities().getOriginalMetaClass(metaClass); if (originalMetaClass != null) { String originalId = new StringBuilder(id) .replace(matcher.start(1), matcher.end(1), originalMetaClass.getName()).toString(); infos = screens.get(originalId); } } } List<WindowInfo> foundWindowInfos = infos; if (foundWindowInfos != null) { // do not perform stream processing in a simple case if (foundWindowInfos.size() == 1 && foundWindowInfos.get(0).getScreenAgent() == null) { return foundWindowInfos.get(0); } if (deviceInfo == null) { // find default screen return foundWindowInfos.stream().filter(windowInfo -> windowInfo.getScreenAgent() == null) .findFirst().orElse(null); } else { return infos.stream().filter( wi -> wi.getScreenAgent() != null && wi.getScreenAgent().isSupported(deviceInfo)) .findFirst() .orElseGet(() -> foundWindowInfos.stream() .filter(windowInfo -> windowInfo.getScreenAgent() == null).findFirst() .orElse(null)); } } return null; } finally { lock.readLock().unlock(); } }
From source file:com.cyberway.issue.extractor.RegexpHTMLLinkExtractor.java
protected boolean processGeneralTag(CharSequence element, CharSequence cs) { Matcher attr = TextUtils.getMatcher(EACH_ATTRIBUTE_EXTRACTOR, cs); // Just in case it's an OBJECT or APPLET tag String codebase = null;/*w w w . j a v a 2s . c o m*/ ArrayList<String> resources = null; long tally = next.size(); while (attr.find()) { int valueGroup = (attr.start(12) > -1) ? 12 : (attr.start(13) > -1) ? 13 : 14; int start = attr.start(valueGroup); int end = attr.end(valueGroup); CharSequence value = cs.subSequence(start, end); if (attr.start(2) > -1) { // HREF CharSequence context = Link.elementContext(element, attr.group(2)); if (element.toString().equalsIgnoreCase(LINK)) { // <LINK> elements treated as embeds (css, ico, etc) processEmbed(value, context); } else { if (element.toString().equalsIgnoreCase(BASE)) { try { base = UURIFactory.getInstance(value.toString()); } catch (URIException e) { extractErrorListener.noteExtractError(e, source, value); } } // other HREFs treated as links processLink(value, context); } } else if (attr.start(3) > -1) { // ACTION CharSequence context = Link.elementContext(element, attr.group(3)); processLink(value, context); } else if (attr.start(4) > -1) { // ON____ processScriptCode(value); // TODO: context? } else if (attr.start(5) > -1) { // SRC etc. CharSequence context = Link.elementContext(element, attr.group(5)); processEmbed(value, context); } else if (attr.start(6) > -1) { // CODEBASE // TODO: more HTML deescaping? codebase = TextUtils.replaceAll(ESCAPED_AMP, value, AMP); CharSequence context = Link.elementContext(element, attr.group(6)); processEmbed(codebase, context); } else if (attr.start(7) > -1) { // CLASSID, DATA if (resources == null) { resources = new ArrayList<String>(); } resources.add(value.toString()); } else if (attr.start(8) > -1) { // ARCHIVE if (resources == null) { resources = new ArrayList<String>(); } String[] multi = TextUtils.split(WHITESPACE, value); for (int i = 0; i < multi.length; i++) { resources.add(multi[i]); } } else if (attr.start(9) > -1) { // CODE if (resources == null) { resources = new ArrayList<String>(); } // If element is applet and code value does not end with // '.class' then append '.class' to the code value. if (element.toString().toLowerCase().equals(APPLET) && !value.toString().toLowerCase().endsWith(CLASSEXT)) { resources.add(value.toString() + CLASSEXT); } else { resources.add(value.toString()); } } else if (attr.start(10) > -1) { // VALUE if (TextUtils.matches(LIKELY_URI_PATH, value)) { CharSequence context = Link.elementContext(element, attr.group(10)); processLink(value, context); } } else if (attr.start(11) > -1) { // any other attribute // ignore for now // could probe for path- or script-looking strings, but // those should be vanishingly rare in other attributes, // and/or symptomatic of page bugs } } TextUtils.recycleMatcher(attr); // handle codebase/resources if (resources == null) { return (tally - next.size()) > 0; } Iterator iter = resources.iterator(); UURI codebaseURI = null; String res = null; try { if (codebase != null) { // TODO: Pass in the charset. codebaseURI = UURIFactory.getInstance(base, codebase); } while (iter.hasNext()) { res = iter.next().toString(); // TODO: more HTML deescaping? res = TextUtils.replaceAll(ESCAPED_AMP, res, AMP); if (codebaseURI != null) { res = codebaseURI.resolve(res).toString(); } processEmbed(res, element); // TODO: include attribute too } } catch (URIException e) { extractErrorListener.noteExtractError(e, source, codebase); } catch (IllegalArgumentException e) { DevUtils.logger.log(Level.WARNING, "processGeneralTag()\n" + "codebase=" + codebase + " res=" + res + "\n" + DevUtils.extraInfo(), e); } return (tally - next.size()) > 0; }
From source file:biz.astute.test.simulator.rest.RequestContext.java
/** * Return path portion of URL. The url may be modified to extract variables. * * @param globalProperties global properties * @return path portion of url/*from w w w . j a va 2 s .c o m*/ * @throws UnsupportedEncodingException exception */ public final String getResourcePath(final Properties globalProperties) throws UnsupportedEncodingException { uriProperties.clear(); String requestURI = URLDecoder.decode(request.getRequestURI(), "utf-8"); Pattern[] currentPatterns = getPatterns(globalProperties); if (currentPatterns.length < 1) { return requestURI; } StringBuilder resourceName = new StringBuilder(requestURI); resourceName.append('/'); // Remove this later - need for matcher for (Pattern pattern : currentPatterns) { Matcher matcher = pattern.matcher(resourceName); if (matcher.matches() && (matcher.groupCount() > 0)) { for (int index = 1; index <= matcher.groupCount(); index++) { String matched = matcher.group(index); uriProperties.add(matched); } // Do so in reverse order so as to not affect offset for (int index = matcher.groupCount(); index > 0; index--) { resourceName.replace(matcher.start(index), matcher.end(index), StringUtils.EMPTY); } break; } } // remove '/' appended earlier resourceName.setLength(resourceName.length() - 1); // Remove any // that result from pattern replacement return resourceName.toString().replaceAll("//", "/"); }
From source file:com.icesoft.faces.component.style.OutputStyleRenderer.java
public void encodeEnd(FacesContext facesContext, UIComponent uiComponent) throws IOException { validateParameters(facesContext, uiComponent, OutputStyle.class); try {//from w w w . j a va 2 s .co m DOMContext domContext = DOMContext.attachDOMContext(facesContext, uiComponent); if (!domContext.isInitialized()) { OutputStyle outputStyle = (OutputStyle) uiComponent; Element styleEle = buildCssElement(domContext); String href = outputStyle.getHref(); styleEle.setAttribute(HTML.HREF_ATTR, getResourceURL(facesContext, href)); domContext.setRootNode(styleEle); int browserType = browserType(facesContext, uiComponent); if (browserType != DEFAULT_TYPE) { if (href.endsWith(CSS_EXTENTION)) { int i = href.indexOf(CSS_EXTENTION); if (i > 0) { String start = href.substring(0, i); Element ieStyleEle = buildCssElement(domContext); String extention = IE_EXTENTION; if (browserType == SAFARI) { extention = SAFARI_EXTENTION; } if (browserType == DT) { extention = DT_EXTENTION; } if (browserType == IE_7) { extention = IE_7_EXTENTION; } if (browserType == IE_8) { extention = IE_8_EXTENSION; } if (browserType == SAFARI_MOBILE) { extention = SAFARI_MOBILE_EXTENTION; } if (browserType == OPERA) { extention = OPERA_EXTENTION; } if (browserType == OPERA_MOBILE) { extention = OPERA_MOBILE_EXTENTION; } String browserSpecificFilename = useSpecific(facesContext, start, extention); if (browserSpecificFilename != null) { // W3C spec: To make a style sheet preferred, set the rel attribute to "stylesheet" and name the style sheet with the title attribute ieStyleEle.setAttribute(HTML.TITLE_ATTR, extention); String hrefURL = CoreUtils.resolveResourceURL(facesContext, browserSpecificFilename); ieStyleEle.setAttribute(HTML.HREF_ATTR, hrefURL); styleEle.getParentNode().appendChild(ieStyleEle); } } else { throw new RuntimeException("OutputStyle file attribute is too short. " + "Needs at least one character before .css. Current Value is [" + href + "]"); } } else { Matcher matcher = Pattern .compile(".*javax\\.faces\\.resource/((.*)\\.css)(\\..*)?\\?ln=([^&]*)(&.*|$)") .matcher(href); if (matcher.matches()) { Element ieStyleEle = buildCssElement(domContext); String extension = browserType >= 0 && browserType < extensions.length ? extensions[browserType] : IE_EXTENTION; ieStyleEle.setAttribute(HTML.TITLE_ATTR, extension); String hrefURL = new StringBuffer(matcher.group(0)).insert(matcher.end(2), extension) .toString(); ieStyleEle.setAttribute(HTML.HREF_ATTR, hrefURL); String resourceName = new StringBuffer(matcher.group(1)) .insert(matcher.end(2) - matcher.start(2), extension).toString(); Resource resource = facesContext.getApplication().getResourceHandler() .createResource(resourceName, matcher.group(4)); if (resource != null) { styleEle.getParentNode().appendChild(ieStyleEle); } } } } } domContext.stepOver(); } catch (Exception e) { log.error("Error in OutputStyleRenderer", e); } }
From source file:com.cyberway.issue.crawler.extractor.ExtractorHTML.java
/** * Process metadata tags./*from w w w . ja v a 2 s . c o m*/ * @param curi CrawlURI we're processing. * @param cs Sequence from underlying ReplayCharSequence. This * is TRANSIENT data. Make a copy if you want the data to live outside * of this extractors' lifetime. * @return True robots exclusion metatag. */ protected boolean processMeta(CrawlURI curi, CharSequence cs) { Matcher attr = TextUtils.getMatcher(EACH_ATTRIBUTE_EXTRACTOR, cs); String name = null; String httpEquiv = null; String content = null; while (attr.find()) { int valueGroup = (attr.start(14) > -1) ? 14 : (attr.start(15) > -1) ? 15 : 16; CharSequence value = cs.subSequence(attr.start(valueGroup), attr.end(valueGroup)); if (attr.group(1).equalsIgnoreCase("name")) { name = value.toString(); } else if (attr.group(1).equalsIgnoreCase("http-equiv")) { httpEquiv = value.toString(); } else if (attr.group(1).equalsIgnoreCase("content")) { content = value.toString(); } // TODO: handle other stuff } TextUtils.recycleMatcher(attr); // Look for the 'robots' meta-tag if ("robots".equalsIgnoreCase(name) && content != null) { curi.putString(A_META_ROBOTS, content); RobotsHonoringPolicy policy = getSettingsHandler().getOrder().getRobotsHonoringPolicy(); String contentLower = content.toLowerCase(); if ((policy == null || (!policy.isType(curi, RobotsHonoringPolicy.IGNORE) && !policy.isType(curi, RobotsHonoringPolicy.CUSTOM))) && (contentLower.indexOf("nofollow") >= 0 || contentLower.indexOf("none") >= 0)) { // if 'nofollow' or 'none' is specified and the // honoring policy is not IGNORE or CUSTOM, end html extraction logger.fine("HTML extraction skipped due to robots meta-tag for: " + curi.toString()); return true; } } else if ("refresh".equalsIgnoreCase(httpEquiv) && content != null) { int urlIndex = content.indexOf("=") + 1; if (urlIndex > 0) { String refreshUri = content.substring(urlIndex); try { curi.createAndAddLinkRelativeToBase(refreshUri, "meta", Link.REFER_HOP); } catch (URIException e) { if (getController() != null) { getController().logUriError(e, curi.getUURI(), refreshUri); } else { logger.info("Failed createAndAddLinkRelativeToBase " + curi + ", " + cs + ", " + refreshUri + ": " + e); } } } } return false; }
From source file:com.app.util.browser.BrowserSniffer.java
private ArrayList getMatches(Pattern pat, String str, int countGroups) { Matcher matcher = pat.matcher(str); ArrayList matches = new ArrayList(); try {//from w ww.j a v a 2 s. co m ArrayList groups = new ArrayList(); while (matcher.find()) { groups.clear(); int nullCount = 0; for (int i = 0; i < countGroups; i++) { int start = matcher.start(i); int end = matcher.end(i); if (start >= 0 && end >= 0) { String sub = str.substring(start, end); if (StringUtils.isNotEmpty(sub)) groups.add(sub); else { groups.add(null); nullCount++; } } else { groups.add(null); nullCount++; } } if (groups.size() > 0 && nullCount != groups.size()) matches.add(groups.toArray(new String[groups.size()])); } } catch (Exception e) { log.error(e); } return matches; }