List of usage examples for java.util.regex Pattern UNICODE_CASE
int UNICODE_CASE
To view the source code for java.util.regex Pattern UNICODE_CASE.
Click Source Link
From source file:org.etudes.util.HtmlHelper.java
/** * Remove link and meta tags//w w w .ja v a2 s . co m * * @param data * the html data. * @return The cleaned up data. */ public static String stripLinks(String data) { if (data == null) return data; // pattern to find link/meta tags Pattern p = Pattern.compile("<(link|meta)\\s+.*?(/*>)", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.DOTALL); Matcher m = p.matcher(data); StringBuffer sb = new StringBuffer(); while (m.find()) { m.appendReplacement(sb, ""); } m.appendTail(sb); return sb.toString(); }
From source file:org.apache.nifi.processors.standard.EvaluateRegularExpression.java
int getCompileFlags(ProcessContext context) { int flags = (context.getProperty(UNIX_LINES).asBoolean() ? Pattern.UNIX_LINES : 0) | (context.getProperty(CASE_INSENSITIVE).asBoolean() ? Pattern.CASE_INSENSITIVE : 0) | (context.getProperty(COMMENTS).asBoolean() ? Pattern.COMMENTS : 0) | (context.getProperty(MULTILINE).asBoolean() ? Pattern.MULTILINE : 0) | (context.getProperty(LITERAL).asBoolean() ? Pattern.LITERAL : 0) | (context.getProperty(DOTALL).asBoolean() ? Pattern.DOTALL : 0) | (context.getProperty(UNICODE_CASE).asBoolean() ? Pattern.UNICODE_CASE : 0) | (context.getProperty(CANON_EQ).asBoolean() ? Pattern.CANON_EQ : 0) | (context.getProperty(UNICODE_CHARACTER_CLASS).asBoolean() ? Pattern.UNICODE_CHARACTER_CLASS : 0); return flags; }
From source file:org.eclipse.mylyn.internal.web.tasks.WebRepositoryConnector.java
public static IStatus performQuery(String resource, String regexp, String taskPrefix, IProgressMonitor monitor, TaskDataCollector resultCollector, TaskRepository repository) { NamedPattern p = new NamedPattern(regexp, Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL | Pattern.UNICODE_CASE | Pattern.CANON_EQ); Matcher matcher = p.matcher(resource); if (!matcher.find()) { return Status.OK_STATUS; } else {// w w w.ja v a 2s . co m boolean isCorrect = true; do { if (p.getGroups().isEmpty()) { // "classic" mode, no named patterns if (matcher.groupCount() < 2) { isCorrect = false; } if (matcher.groupCount() >= 1) { String id = matcher.group(1); String description = matcher.groupCount() > 1 ? cleanup(matcher.group(2), repository) : null; description = unescapeHtml(description); TaskData data = createTaskData(repository, id); TaskMapper mapper = new TaskMapper(data, true); mapper.setCreationDate(DEFAULT_DATE); mapper.setTaskUrl(taskPrefix + id); mapper.setSummary(description); mapper.setValue(KEY_TASK_PREFIX, taskPrefix); resultCollector.accept(data); } } else { String id = p.group("Id", matcher); //$NON-NLS-1$ String description = p.group("Description", matcher); //$NON-NLS-1$ if (id == null || description == null) { isCorrect = false; } if (id != null) { description = unescapeHtml(description); String owner = unescapeHtml(cleanup(p.group("Owner", matcher), repository)); //$NON-NLS-1$ String type = unescapeHtml(cleanup(p.group("Type", matcher), repository)); //$NON-NLS-1$ TaskData data = createTaskData(repository, id); TaskMapper mapper = new TaskMapper(data, true); mapper.setCreationDate(DEFAULT_DATE); mapper.setTaskUrl(taskPrefix + id); mapper.setSummary(description); mapper.setValue(KEY_TASK_PREFIX, taskPrefix); mapper.setOwner(owner); mapper.setTaskKind(type); String status = p.group("Status", matcher); //$NON-NLS-1$ if (status != null) { if (COMPLETED_STATUSES.contains(status.toLowerCase())) { // TODO set actual completion date here mapper.setCompletionDate(DEFAULT_DATE); } } resultCollector.accept(data); } } } while (matcher.find() && !monitor.isCanceled()); if (isCorrect) { return Status.OK_STATUS; } else { return new Status(IStatus.ERROR, TasksWebPlugin.ID_PLUGIN, IStatus.ERROR, Messages.WebRepositoryConnector_Require_two_matching_groups, null); } } }
From source file:org.sakaiproject.lessonbuildertool.service.LessonBuilderEntityProducer.java
public void init() { logger.info("init()"); try {/* w w w . j a v a 2s.c o m*/ EntityManager.registerEntityProducer(this, REFERENCE_ROOT); } catch (Exception e) { logger.warn("Error registering Link Tool Entity Producer", e); } lessonBuilderAccessAPI.setToolApi(this); // LinkMigrationHelper is not present before 2.10. So this code can compile on older systems, // find it via introspection. try { linkMigrationHelper = RequestFilter.class.getClassLoader() .loadClass("org.sakaiproject.util.api.LinkMigrationHelper"); // this is in the kernel, so it should already be loaded linkMigrationHelperInstance = ComponentManager.get(linkMigrationHelper); if (linkMigrationHelper != null) migrateAllLinks = linkMigrationHelper.getMethod("migrateAllLinks", new Class[] { Set.class, String.class }); } catch (Exception e) { System.out.println("Exception in introspection " + e); System.out.println("loader " + RequestFilter.class.getClassLoader()); } // Builds a Regexp selector. StringBuilder regexp = new StringBuilder("("); for (String attribute : attributes) { regexp.append(attribute); regexp.append("|"); } if (regexp.length() > 1) { regexp.deleteCharAt(regexp.length() - 1); } regexp.append(")[\\s]*=[\\s]*([\"'|])([^\"']*)(\\2|#)"); attributePattern = Pattern.compile(regexp.toString(), Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); pathPattern = Pattern.compile("/(?:access/content/group|web|dav|xsl-portal/site|portal/site)/([^/]+)/.*"); dummyPattern = Pattern.compile(ITEMDUMMY + "\\d+"); // Add the server name to the list of servers String serverName = ServerConfigurationService.getString("serverName", null); String serverId = ServerConfigurationService.getString("serverId", null); servers = new HashSet<String>(); // prefer servername to serverid, by doing it first if (serverName != null) servers.add(serverName); if (serverId != null) servers.add(serverId); try { String hostName = InetAddress.getLocalHost().getHostName(); servers.add(hostName); hostName = InetAddress.getLocalHost().getCanonicalHostName(); servers.add(hostName); } catch (Exception ignore) { } servers.add("localhost"); // if neither is defined we're in trouble; if (servers.size() == 0) System.out.println( "LessonBuilderEntityProducer ERROR: neither servername nor serverid defined in sakai.properties"); // this slightly odd code is for testing. It lets us test by reloading just lesson builder. // otherwise we have to restart sakai, since the entity stuff can't be restarted if (false) { try { Document doc = Xml.createDocument(); Stack stack = new Stack(); Element root = doc.createElement("archive"); doc.appendChild(root); root.setAttribute("source", "45d48248-ba23-4829-914a-7219c3ced2dd"); root.setAttribute("server", "foo"); root.setAttribute("date", "now"); root.setAttribute("system", "sakai"); stack.push(root); archive("45d48248-ba23-4829-914a-7219c3ced2dd", doc, stack, "/tmp/archive", null); stack.pop(); Xml.writeDocument(doc, "/tmp/xmlout"); // we don't have an actual user at this point, so need to force checks to work securityService.pushAdvisor(new SecurityAdvisor() { public SecurityAdvice isAllowed(String userId, String function, String reference) { return SecurityAdvice.ALLOWED; } }); merge("0134937b-ce16-440c-80a6-fb088d79e5ad", (Element) doc.getFirstChild().getFirstChild(), "/tmp/archive", "45d48248-ba23-4829-914a-7219c3ced2dd", null, null, null); } catch (Exception e) { System.out.println(e); } finally { securityService.popAdvisor(); } } try { ComponentManager.loadComponent("org.sakaiproject.lessonbuildertool.service.LessonBuilderEntityProducer", this); } catch (Exception e) { logger.warn( "Error registering Lesson Builder Entity Producer with Spring. Lessonbuilder will work, but Lesson Builder instances won't be imported from site archives. This normally happens only if you redeploy Lessonbuilder. Suggest restarting Sakai", e); } }
From source file:com.email.ReceiveEmail.java
/** * Strip out the emojis and symbols from the email so we can actually save * it in the database/* ww w. j av a2s . co m*/ * * @param content String * @return String */ private static String removeEmojiAndSymbolFromString(String content) { String utf8tweet = ""; if (content != null) { try { byte[] utf8Bytes = content.getBytes("UTF-8"); utf8tweet = new String(utf8Bytes, "UTF-8"); } catch (UnsupportedEncodingException ex) { ExceptionHandler.Handle(ex); } Pattern unicodeOutliers = Pattern.compile( "[\ud83c\udc00-\ud83c\udfff]|[\ud83d\udc00-\ud83d\udfff]|[\u2600-\u27ff]", Pattern.UNICODE_CASE | Pattern.CANON_EQ | Pattern.CASE_INSENSITIVE); Matcher unicodeOutlierMatcher = unicodeOutliers.matcher(utf8tweet); utf8tweet = unicodeOutlierMatcher.replaceAll(" "); } return utf8tweet; }
From source file:org.opennms.netmgt.collectd.HttpCollector.java
private static List<HttpCollectionAttribute> processResponse(final Locale responseLocale, final String responseBodyAsString, final HttpCollectionSet collectionSet, HttpCollectionResource collectionResource) { LOG.debug("processResponse:"); LOG.debug("responseBody = {}", responseBodyAsString); LOG.debug("getmatches = {}", collectionSet.getUriDef().getUrl().getMatches()); List<HttpCollectionAttribute> butes = new LinkedList<HttpCollectionAttribute>(); int flags = 0; if (collectionSet.getUriDef().getUrl().getCanonicalEquivalence()) { flags |= Pattern.CANON_EQ; }//from w w w.j a va 2 s .c o m if (collectionSet.getUriDef().getUrl().getCaseInsensitive()) { flags |= Pattern.CASE_INSENSITIVE; } if (collectionSet.getUriDef().getUrl().getComments()) { flags |= Pattern.COMMENTS; } if (collectionSet.getUriDef().getUrl().getDotall()) { flags |= Pattern.DOTALL; } if (collectionSet.getUriDef().getUrl().getLiteral()) { flags |= Pattern.LITERAL; } if (collectionSet.getUriDef().getUrl().getMultiline()) { flags |= Pattern.MULTILINE; } if (collectionSet.getUriDef().getUrl().getUnicodeCase()) { flags |= Pattern.UNICODE_CASE; } if (collectionSet.getUriDef().getUrl().getUnixLines()) { flags |= Pattern.UNIX_LINES; } LOG.debug("flags = {}", flags); Pattern p = Pattern.compile(collectionSet.getUriDef().getUrl().getMatches(), flags); Matcher m = p.matcher(responseBodyAsString); final boolean matches = m.matches(); if (matches) { LOG.debug("processResponse: found matching attributes: {}", matches); final List<Attrib> attribDefs = collectionSet.getUriDef().getAttributes().getAttribCollection(); final AttributeGroupType groupType = new AttributeGroupType(collectionSet.getUriDef().getName(), AttributeGroupType.IF_TYPE_ALL); final List<Locale> locales = new ArrayList<Locale>(); if (responseLocale != null) { locales.add(responseLocale); } locales.add(Locale.getDefault()); if (Locale.getDefault() != Locale.ENGLISH) { locales.add(Locale.ENGLISH); } for (final Attrib attribDef : attribDefs) { final String type = attribDef.getType(); String value = null; try { value = m.group(attribDef.getMatchGroup()); } catch (final IndexOutOfBoundsException e) { LOG.error( "IndexOutOfBoundsException thrown while trying to find regex group, your regex does not contain the following group index: {}", attribDef.getMatchGroup()); LOG.error("Regex statement: {}", collectionSet.getUriDef().getUrl().getMatches()); continue; } if (!type.matches("^([Oo](ctet|CTET)[Ss](tring|TRING))|([Ss](tring|TRING))$")) { Number num = null; for (final Locale locale : locales) { try { num = NumberFormat.getNumberInstance(locale).parse(value); LOG.debug("processResponse: found a parsable number with locale \"{}\".", locale); break; } catch (final ParseException e) { LOG.warn( "attribute {} failed to match a parsable number with locale \"{}\"! Matched \"{}\" instead.", attribDef.getAlias(), locale, value); } } if (num == null) { LOG.warn("processResponse: gave up attempting to parse numeric value, skipping group {}", attribDef.getMatchGroup()); continue; } final HttpCollectionAttribute bute = new HttpCollectionAttribute(collectionResource, new HttpCollectionAttributeType(attribDef, groupType), num); LOG.debug("processResponse: adding found numeric attribute: {}", bute); butes.add(bute); } else { HttpCollectionAttribute bute = new HttpCollectionAttribute(collectionResource, new HttpCollectionAttributeType(attribDef, groupType), value); LOG.debug("processResponse: adding found string attribute: {}", bute); butes.add(bute); } } } else { LOG.debug("processResponse: found matching attributes: {}", matches); } return butes; }
From source file:de.undercouch.bson4jackson.BsonParser.java
/** * Converts a BSON regex pattern string to a combined value of Java flags that * can be used in {@link Pattern#compile(String, int)} * @param pattern the regex pattern string * @return the Java flags/*from w ww . j a va 2 s .c om*/ * @throws JsonParseException if the pattern string contains a unsupported flag */ protected int regexStrToFlags(String pattern) throws JsonParseException { int flags = 0; for (int i = 0; i < pattern.length(); ++i) { char c = pattern.charAt(i); switch (c) { case 'i': flags |= Pattern.CASE_INSENSITIVE; break; case 'm': flags |= Pattern.MULTILINE; break; case 's': flags |= Pattern.DOTALL; break; case 'u': flags |= Pattern.UNICODE_CASE; break; case 'l': case 'x': //unsupported break; default: throw new JsonParseException("Invalid regex", getTokenLocation()); } } return flags; }
From source file:org.sakaiproject.tool.assessment.qti.util.XmlUtil.java
public static String processFormattedText(Log log, String value) { if (StringUtils.isEmpty(value)) { return value; }/*from w ww .j a v a2 s.com*/ StringBuilder alertMsg = new StringBuilder(); String finalValue = ""; Matcher matcher = M_htmlPattern.matcher(value); boolean hasHtmlPattern = false; int index = 0; StringBuilder textStringBuilder = new StringBuilder(); String tmpText = ""; if (M_goodTagsPatterns == null || M_goodCloseTagsPatterns == null) { M_goodTagsPatterns = new Pattern[M_goodTags.length]; M_goodCloseTagsPatterns = new Pattern[M_goodTags.length]; for (int i = 0; i < M_goodTags.length; i++) { M_goodTagsPatterns[i] = Pattern.compile(".*<\\s*" + M_goodTags[i] + "(\\s+.*>|>|/>).*", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.DOTALL); M_goodCloseTagsPatterns[i] = Pattern.compile("<\\s*/\\s*" + M_goodTags[i] + "(\\s.*>|>)", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.DOTALL); } } while (matcher.find()) { hasHtmlPattern = true; tmpText = value.substring(index, matcher.start()); textStringBuilder.append(convertoLTGT(tmpText)); String group = matcher.group(); boolean isGoodTag = false; for (int i = 0; i < M_goodTags.length; i++) { if (M_goodTagsPatterns[i].matcher(group).matches() || M_goodCloseTagsPatterns[i].matcher(group).matches()) { textStringBuilder.append(group); isGoodTag = true; break; } } if (!isGoodTag) { textStringBuilder.append(convertoLTGT(group)); } index = matcher.end(); } textStringBuilder.append(convertoLTGT(value.substring(index))); if (hasHtmlPattern) { finalValue = formattedText.processFormattedText(textStringBuilder.toString(), alertMsg); } else { finalValue = formattedText.processFormattedText(convertoLTGT(value), alertMsg); } if (alertMsg.length() > 0) { log.debug(alertMsg.toString()); } return finalValue; }