List of usage examples for java.util.regex Pattern UNICODE_CASE
int UNICODE_CASE
To view the source code for java.util.regex Pattern UNICODE_CASE.
Click Source Link
From source file:com.ikanow.infinit.e.harvest.enrichment.custom.UnstructuredAnalysisHarvester.java
private static Pattern createRegex(String regEx, String flags) { int nflags = 0; if (null != flags) { for (int i = 0; i < flags.length(); ++i) { char c = flags.charAt(i); switch (c) { case 'm': nflags |= Pattern.MULTILINE; break; case 'i': nflags |= Pattern.CASE_INSENSITIVE; break; case 'd': nflags |= Pattern.DOTALL; break; case 'u': nflags |= Pattern.UNICODE_CASE; break; case 'n': nflags |= Pattern.UNIX_LINES; break; }/*www. j a v a 2s. c om*/ } } return Pattern.compile(regEx, nflags); }
From source file:org.etudes.mneme.impl.AttachmentServiceImpl.java
/** * Collect all the attachment references in the html data:<br /> * Anything referenced by a src= or href=. in our content docs, or in a site content area <br /> * Ignore anything in a myWorkspace content area or the public content area. <br /> * * @param data//from ww w.ja v a 2 s . co m * The data string. * @param normalize * if true, decode the references by URL decoding rules. * @param parentRef * Reference string to the embedding (parent) resource - used to resolve relative references. * @return The set of attachment references. */ protected Set<String> harvestAttachmentsReferenced(String data, boolean normalize, String parentRef) { Set<String> rv = new HashSet<String>(); if (data == null) return rv; // pattern to find any src= or href= text // groups: 0: the whole matching text 1: src|href 2: the string in the quotes 3: the terminator character Pattern p = Pattern.compile("(src|href)[\\s]*=[\\s]*\"([^#\"]*)([#\"])", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); Matcher m = p.matcher(data); while (m.find()) { if (m.groupCount() == 3) { String ref = m.group(2); if (ref != null) ref = ref.trim(); // expand to a full reference if relative ref = adjustRelativeReference(ref, parentRef); // harvest any content hosting reference int index = ref.indexOf("/access/content/"); if (index != -1) { // except for any in /user/ or /public/ if (ref.indexOf("/access/content/user/") != -1) { index = -1; } else if (ref.indexOf("/access/content/public/") != -1) { index = -1; } } // harvest also the mneme docs references if (index == -1) index = ref.indexOf("/access/mneme/content/"); // TODO: further filter to docs root and context (optional) if (index != -1) { // save just the reference part (i.e. after the /access); String refString = ref.substring(index + 7); // deal with %20 and other encoded URL stuff if (normalize) { refString = decodeUrl(refString); } rv.add(refString); } } } return rv; }
From source file:org.etudes.mneme.impl.AttachmentServiceImpl.java
/** * {@inheritDoc}//from w w w.j a v a 2 s.c om */ protected String translateEmbeddedReferences(String data, Collection<Translation> translations, String parentRef) { if (data == null) return data; if (translations == null) return data; // pattern to find any src= or href= text // groups: 0: the whole matching text 1: src|href 2: the string in the quotes 3: the terminator character Pattern p = Pattern.compile("(src|href)[\\s]*=[\\s]*\"([^#\"]*)([#\"])", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); Matcher m = p.matcher(data); StringBuffer sb = new StringBuffer(); // process each "harvested" string (avoiding like strings that are not in src= or href= patterns) while (m.find()) { if (m.groupCount() == 3) { String ref = m.group(2); String terminator = m.group(3); if (ref != null) ref = ref.trim(); // expand to a full reference if relative ref = adjustRelativeReference(ref, parentRef); // harvest any content hosting reference int index = ref.indexOf("/access/content/"); if (index != -1) { // except for any in /user/ or /public/ if (ref.indexOf("/access/content/user/") != -1) { index = -1; } else if (ref.indexOf("/access/content/public/") != -1) { index = -1; } } // harvest also the mneme docs references if (index == -1) index = ref.indexOf("/access/mneme/content/"); if (index != -1) { // save just the reference part (i.e. after the /access); String normal = ref.substring(index + 7); // deal with %20, &, and other encoded URL stuff normal = decodeUrl(normal); // translate the normal form String translated = normal; for (Translation translation : translations) { translated = translation.translate(translated); } // URL encode translated String escaped = EscapeRefUrl.escapeUrl(translated); // if changed, replace if (!normal.equals(translated)) { m.appendReplacement(sb, Matcher.quoteReplacement( m.group(1) + "=\"" + ref.substring(0, index + 7) + escaped + terminator)); } } } } m.appendTail(sb); return sb.toString(); }
From source file:org.sakaiproject.tool.assessment.services.GradingService.java
public boolean fibmatch(String answer, String input, boolean casesensitive) { try {/*from www . jav a 2s . c o m*/ StringBuilder regex_quotebuf = new StringBuilder(); String REGEX = answer.replaceAll("\\*", "|*|"); String[] oneblank = REGEX.split("\\|"); for (int j = 0; j < oneblank.length; j++) { if ("*".equals(oneblank[j])) { regex_quotebuf.append(".+"); } else { regex_quotebuf.append(Pattern.quote(oneblank[j])); } } String regex_quote = regex_quotebuf.toString(); Pattern p; if (casesensitive) { p = Pattern.compile(regex_quote); } else { p = Pattern.compile(regex_quote, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); } Matcher m = p.matcher(input); boolean result = m.matches(); return result; } catch (Exception e) { return false; } }
From source file:lineage2.gameserver.Config.java
/** * Method abuseLoad.//w w w. j a va 2s . c om */ public static void abuseLoad() { List<Pattern> tmp = new ArrayList<Pattern>(); LineNumberReader lnr = null; try { String line; lnr = new LineNumberReader(new InputStreamReader(new FileInputStream(ANUSEWORDS_CONFIG_FILE), "UTF-8")); while ((line = lnr.readLine()) != null) { StringTokenizer st = new StringTokenizer(line, "\n\r"); if (st.hasMoreTokens()) { tmp.add(Pattern.compile(".*" + st.nextToken() + ".*", Pattern.DOTALL | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE)); } } ABUSEWORD_LIST = tmp.toArray(new Pattern[tmp.size()]); tmp.clear(); _log.info("Abuse: Loaded " + ABUSEWORD_LIST.length + " abuse words."); } catch (IOException e1) { _log.warn("Error reading abuse: " + e1); } finally { try { if (lnr != null) { lnr.close(); } } catch (Exception e2) { } } }
From source file:org.etudes.jforum.view.admin.ImportExportAction.java
/** * parse export content resource reference urls * @param message//from w ww . j a v a2s. co m * - messge * @param ref * - reference * @param parentPath * - parent path * @return * - modified content */ private String parseExportContentResourceUrls(String message, String ref, String parentPath) { ref = Validator.escapeUrl(ref); // file name with spaces doesn't have %20 for spaces // get file name /*This may not be needed as spaces have %20 String fileName = ref.substring(ref.lastIndexOf("/") + 1); try { fileName = URLDecoder.decode(fileName, "UTF-8"); } catch (UnsupportedEncodingException e) { if (logger.isWarnEnabled()) logger.warn("parseExportContentResourceUrls: " + e); }*/ //ref = ref.substring(0, ref.lastIndexOf("/") + 1) + fileName; parentPath = Validator.escapeUrl(parentPath); StringBuffer sb = new StringBuffer(); Pattern p = Pattern.compile("(src|href)[\\s]*=[\\s]*[\\\"'](/access" + ref + ")[\\\"']", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.UNICODE_CASE); Matcher m = p.matcher(message); while (m.find()) { if (m.groupCount() == 2) { String refMatch = m.group(2); if (parentPath == null || parentPath.trim().length() == 0) { String siteId = ToolManager.getCurrentPlacement().getContext(); refMatch = refMatch.substring(("/access/content/group/" + siteId).length() + 1); } else { if (refMatch.indexOf(parentPath) == -1) { String siteId = ToolManager.getCurrentPlacement().getContext(); refMatch = refMatch.substring(("/access/content/group/" + siteId).length() + 1); String pathRef[] = parentPath.split("/"); StringBuilder refPath = new StringBuilder(); for (int i = 0; i < (pathRef.length - 1); i++) { refPath.append("../"); } refMatch = refPath.toString() + refMatch; } else { int index = refMatch.indexOf(parentPath); refMatch = refMatch.substring(index + parentPath.length() + 1); } } /*String fileName1 = null; boolean escapeFilePath = false; try { if (logger.isDebugEnabled()) logger.debug("parseExportContentResourceUrls: refMatch :"+ refMatch); if (refMatch.lastIndexOf("/") != -1) { fileName1 = refMatch.substring(refMatch.lastIndexOf("/")+1); refMatch = refMatch.substring(0, refMatch.lastIndexOf("/")+1); if (logger.isDebugEnabled()) logger.debug("parseExportContentResourceUrls: refMatch sub string :"+ refMatch); fileName1 = URLDecoder.decode(fileName1, "UTF-8"); escapeFilePath = true; } } catch (UnsupportedEncodingException e) { if (logger.isWarnEnabled()) logger.warn("parseExportContentResourceUrls: " + e); } if (escapeFilePath) { m.appendReplacement(sb, Matcher.quoteReplacement(m.group(1)+ "=\""+ refMatch + fileName1 +"\"")); } else m.appendReplacement(sb, Matcher.quoteReplacement(m.group(1)+ "=\""+ refMatch + "\""));*/ m.appendReplacement(sb, Matcher.quoteReplacement(m.group(1) + "=\"" + refMatch + "\"")); } } m.appendTail(sb); return sb.toString(); }
From source file:org.etudes.jforum.view.admin.ImportExportAction.java
/** * Create the embedded reference detection pattern. It creates three groups: 0 - the entire matc, 1- src|href, 2-server url up to access/content/...., 3-siteid/refwithfolders. * /*from ww w. ja va2s . c o m*/ * @return The Pattern. */ private Pattern getExportContentResourcePattern() { return Pattern.compile("(src|href)[\\s]*=[\\s]*[\\\"'](.*?)/access/content/group/([^\"]*)[\\\"']", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); }
From source file:org.etudes.jforum.view.admin.ImportExportAction.java
/** * Create the embedded reference detection pattern. It creates three groups: 0 - the entire matc, 1- src|href, 2-the reference. * // ww w .jav a 2s . co m * @return The Pattern. */ private Pattern getContentResourcePattern() { return Pattern.compile("(src|href)[\\s]*=[\\s]*[\\\"']embeded_jf_content/content/group([^\"]*)[\\\"']", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); }
From source file:org.etudes.jforum.view.admin.ImportExportAction.java
/** * Create the embedded reference detection pattern. It creates three groups: 0 - the entire match, 1- src|href, 2-the reference. * /*from w ww .j a v a 2 s. c om*/ * @return The Pattern. */ private Pattern getEmbeddedContentResourcePattern() { return Pattern.compile("(src|href)[\\s]*=[\\s]*[\\\"'](?!http|www|file)([^\"]*)[\\\"']", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); }
From source file:org.etudes.jforum.view.admin.ImportExportAction.java
/** * Create the embedded reference detection pattern. It creates three groups: 0 - the entire matc, 1- src|href, 2-the reference. * /*from w w w. ja v a 2s . com*/ * @return The Pattern. */ private Pattern getMeleteResourcePattern() { return Pattern.compile( "(src|href)[\\s]*=[\\s]*[\\\"']embeded_jf_content/meleteDocs/content/private/meleteDocs([^\"]*)[\\\"']", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); }