Example usage for java.util.regex Pattern UNICODE

Introduction

In this page you can find the example usage for java.util.regex Pattern UNICODE_CASE.

Prototype

int UNICODE_CASE

To view the source code for java.util.regex Pattern UNICODE_CASE.

Click Source Link

Document

Enables Unicode-aware case folding.

Usage

From source file:net.jazdw.rql.parser.listfilter.ListFilter.java

@SuppressWarnings("unchecked")
@Override/*from  ww  w .  j  a  v  a 2  s  . com*/
public List<T> visit(ASTNode node, List<T> list) {
    switch (node.getName()) {
    case "and":
        for (Object obj : node) {
            if (obj instanceof ASTNode) {
                list = ((ASTNode) obj).accept(this, list);
            } else {
                throw new UnsupportedOperationException("Encountered a non-ASTNode argument in AND statement");
            }
        }
        return list;
    case "or":
        Set<T> set = new LinkedHashSet<T>();
        for (Object obj : node) {
            if (obj instanceof ASTNode) {
                set.addAll(((ASTNode) obj).accept(this, list));
            } else {
                throw new UnsupportedOperationException("Encountered a non-ASTNode argument in OR statement");
            }
        }
        return new ArrayList<>(set);
    case "eq":
    case "gt":
    case "ge":
    case "lt":
    case "le":
    case "ne":
        String propName = (String) node.getArgument(0);
        Object test = node.getArgumentsSize() > 1 ? node.getArgument(1) : null;

        List<T> result = new ArrayList<>();

        for (T item : list) {
            Object property = getProperty(item, propName);

            Comparable<Object> comparableProperty;
            if (property instanceof Comparable) {
                comparableProperty = (Comparable<Object>) property;
            } else {
                throw new UnsupportedOperationException(
                        String.format("Property '%s' is not comparable", propName));
            }

            int comparisonValue;
            try {
                comparisonValue = comparableProperty.compareTo(test);
            } catch (ClassCastException e) {
                throw new UnsupportedOperationException(
                        String.format("Couldn't compare '%s' to '%s'", property.toString(), test.toString()));
            }

            if (checkComparisonValue(node.getName(), comparisonValue)) {
                result.add(item);
            }
        }
        return result;
    case "like":
    case "match":
        propName = (String) node.getArgument(0);
        String matchString = (String) node.getArgument(1);
        Pattern matchPattern = Pattern.compile(matchString.replace("*", ".*"),
                Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);

        result = new ArrayList<>();

        for (T item : list) {
            Object property = getProperty(item, propName);

            String stringProperty;
            if (property instanceof String) {
                stringProperty = (String) property;
            } else {
                throw new UnsupportedOperationException(
                        String.format("Property '%s' is not a string", propName));
            }

            if (matchPattern.matcher(stringProperty).matches()) {
                result.add(item);
            }
        }
        return result;
    case "limit":
        int limit = (int) node.getArgument(0);
        int offset = node.getArgumentsSize() > 1 ? (int) node.getArgument(1) : 0;

        if (offset > list.size() - 1) {
            return Collections.emptyList();
        }

        int toIndex = offset + limit;
        if (toIndex > list.size()) {
            toIndex = list.size();
        }

        return list.subList(offset, toIndex);
    case "sort":
        ComparatorChain cc = new ComparatorChain();
        for (Object obj : node) {
            String sortOption = (String) obj;
            boolean desc = sortOption.startsWith("-");
            cc.addComparator(new BeanComparator<T>(sortOption.substring(1)), desc);
        }
        // copy the list as we are modifying it
        list = new ArrayList<>(list);
        Collections.sort(list, cc);
        return list;
    default:
        throw new UnsupportedOperationException(
                String.format("Encountered unknown operator '%s'", node.getName()));
    }
}

From source file:Normalization.TextNormalization.java

public String removeSymbolsFromString(String content) {

    String utf8tweet = "";
    try {// w  w w .  j a va  2s .  c  om
        byte[] utf8Bytes = content.getBytes("UTF-8");

        utf8tweet = new String(utf8Bytes, "UTF-8");
    } catch (UnsupportedEncodingException e) {
    }

    final String regex = "[\\./\\()\"':,.;<>~!$%^&*\\|+={}?\\-`1234567890_]";
    final Pattern unicodeOutliers = Pattern.compile(regex,
            Pattern.MULTILINE | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);

    Matcher unicodeOutlierMatcher = unicodeOutliers.matcher(utf8tweet);
    utf8tweet = unicodeOutlierMatcher.replaceAll(" ");
    return utf8tweet;
}

From source file:pl.otros.logview.gui.message.pattern.PropertyPatternMessageColorizer.java

public void init(InputStream in) throws ConfigurationException {
    propertiesConfiguration = new PropertiesConfiguration();
    propertiesConfiguration.setDelimiterParsingDisabled(true);
    propertiesConfiguration.load(in, "UTF-8");
    configuration = new DataConfiguration(propertiesConfiguration);
    configuration.setDelimiterParsingDisabled(true);
    String pa = configuration.getString(PROP_PATTERN);
    int flags = 0;
    flags = flags | (configuration.getBoolean(PROP_PATTERN_CANON_EQ, false) ? Pattern.CANON_EQ : 0);
    flags = flags//from w w  w.jav a 2s  .  c  o m
            | (configuration.getBoolean(PROP_PATTERN_CASE_INSENSITIVE, false) ? Pattern.CASE_INSENSITIVE : 0);
    flags = flags | (configuration.getBoolean(PROP_PATTERN_COMMENTS, false) ? Pattern.COMMENTS : 0);
    flags = flags | (configuration.getBoolean(PROP_PATTERN_DOTALL, false) ? Pattern.DOTALL : 0);
    flags = flags | (configuration.getBoolean(PROP_PATTERN_LITERAL, false) ? Pattern.LITERAL : 0);
    flags = flags | (configuration.getBoolean(PROP_PATTERN_MULTILINE, false) ? Pattern.MULTILINE : 0);
    flags = flags | (configuration.getBoolean(PROP_PATTERN_UNICODE_CASE, false) ? Pattern.UNICODE_CASE : 0);
    flags = flags | (configuration.getBoolean(PROP_PATTERN_UNIX_LINES, false) ? Pattern.UNIX_LINES : 0);

    pattern = Pattern.compile(pa, flags);
    groupCount = countGroups(pattern);
    name = configuration.getString(PROP_NAME, "NAME NOT SET!");
    description = configuration.getString(PROP_DESCRIPTION, "DESCRIPTION NOT SET!");
    testMessage = configuration.getString(PROP_TEST_MESSAGE, "");
    version = configuration.getInt(PROP_VERSION, 1);
}

From source file:de.dfki.km.perspecting.obie.model.Document.java

/***************************************************************************
 * Gets the pure plain text out of a html text. All html tags are replaced
 * by spaces. To do so, the head is replaced, all remaining javascript tags
 * (including the content) and finally all remaining html tags. Thus,
 * absolute positioning is possible./*from w  w w .j a v  a2 s. c  o  m*/
 * 
 * @param text
 *            content of the html document as text
 * @return text where all html was replaced by spaces
 */
private String extractPlainTextFromHtml(String text) {
    Collection<Pattern> patterns = new ArrayList<Pattern>(3);
    // Delete the head, then all remaining javascript items that might exist
    // in the body, then all remaining html tags.
    patterns.add(
            Pattern.compile("<head.*/head>", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.DOTALL));
    // .*? makes it non greedy -> take the shortes match
    // DOTALL does also include new lines
    patterns.add(Pattern.compile("<script.*?/script>",
            Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.DOTALL));
    patterns.add(Pattern.compile("<.+?>", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE));
    StringBuffer s = new StringBuffer(text);

    // Go for all patterns.
    for (Pattern p : patterns) {
        Matcher matcher = p.matcher(s);

        // As long as the matcher finds another occurance of the pattern we
        // replace it by the same number of spaces but keep new lines.
        while (matcher.find())
            s.replace(matcher.start(), matcher.end(), matcher.group().replaceAll(".", " "));
    }
    return s.toString();
}

From source file:org.eclipse.skalli.core.user.LocalUserComponent.java

@Override
public List<User> findUser(String search) {
    List<User> result = new ArrayList<User>();
    if (StringUtils.isNotBlank(search)) {
        String[] parts = StringUtils.split(NormalizeUtil.normalize(search), " ,"); //$NON-NLS-1$
        Pattern[] patterns = new Pattern[parts.length];
        for (int i = 0; i < parts.length; ++i) {
            patterns[i] = Pattern.compile(parts[i] + ".*", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);//$NON-NLS-1$
        }/*  w  w w. j  ava 2  s  .co m*/

        for (User user : getAll()) {
            if (parts.length == 1) {
                match(user, patterns[0], result);
            } else if (parts.length == 2) {
                // givenname surname ('Michael Ochmann')
                if (matches(patterns, user.getFirstname(), user.getLastname())) {
                    result.add(user);
                }
                // surname givenname('Ochmann, Michael')
                else if (matches(patterns, user.getLastname(), user.getFirstname())) {
                    result.add(user);
                }
            } else if (parts.length == 3) {
                // givenname initial surname, e.g. 'Michael R. Ochmann'
                // or title givenname surname or given name surname title
                if (matches(patterns, user.getFirstname(), null, user.getLastname())) {
                    result.add(user);
                } else if (matches(patterns, user.getLastname(), null, user.getFirstname())) {
                    result.add(user);
                } else if (matches(patterns, null, user.getFirstname(), user.getLastname())) {
                    result.add(user);
                } else if (matches(patterns, user.getFirstname(), user.getLastname(), null)) {
                    result.add(user);
                }
            }
        }
        if (result.isEmpty()) {
            for (User user : getAll()) {
                // try to match each part individually
                for (int i = 0; i < parts.length; ++i) {
                    match(user, patterns[i], result);
                }
            }
        }
    }
    return result;
}

From source file:Normalization.TextNormalization.java

public String removeSpacesFromString(String content) {

    String utf8tweet = "";
    try {//from  w w w .  j a v a  2  s. c  o m
        byte[] utf8Bytes = content.getBytes("UTF-8");

        utf8tweet = new String(utf8Bytes, "UTF-8");
    } catch (UnsupportedEncodingException e) {
    }

    final String regex = "\\s{2,}";
    final Pattern unicodeOutliers = Pattern.compile(regex,
            Pattern.MULTILINE | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);

    Matcher unicodeOutlierMatcher = unicodeOutliers.matcher(utf8tweet);
    utf8tweet = unicodeOutlierMatcher.replaceAll(" ");
    return utf8tweet;
}

From source file:org.carrot2.webapp.filter.QueryWordHighlighter.java

@Override
public void process() throws ProcessingException {
    if (!enabled) {
        return;/*from  w w w . j  av  a  2  s  .  co m*/
    }

    if (query == null) {
        query = "";
    }

    // Create regexp patterns for each query word
    final String[] queryTerms = querySanitizePatternCompiled.matcher(query).replaceAll("").split("\\s+");

    Pattern queryPattern = null;
    List<String> patterns = Lists.newArrayList();
    for (String queryTerm : queryTerms) {
        if (Strings.isNullOrEmpty(queryTerm)) {
            continue;
        }

        if (dontHighlightPatternCompiled != null && dontHighlightPatternCompiled.matcher(queryTerm).matches()) {
            continue;
        }

        patterns.add("(" + Pattern.quote(escapeLtGt(queryTerm)) + ")");
    }

    if (patterns.size() > 0) {
        queryPattern = Pattern.compile(Joiner.on("|").join(patterns),
                Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
    }

    // As we're going to modify documents, we need to copy them to
    // avoid ConcurrentModificationExceptions.
    final List<Document> inputDocuments = documents;
    final List<Document> outputDocuments = Lists.newArrayListWithCapacity(inputDocuments.size());

    for (Document document : inputDocuments) {
        final Document clonedDocument = document.clone();
        for (String fieldName : fields) {
            highlightQueryTerms(clonedDocument, fieldName, queryPattern);
        }
        outputDocuments.add(clonedDocument);
    }
    documents = outputDocuments;
}

From source file:GIST.IzbirkomExtractor.Russian.OrdinalFactory.java

/**
 * Default constructor//from   ww w. j a  va2 s . com
 */
public OrdinalFactory() {

    /* creating varions maps and hash tables */
    stems0_10_lookup = new HashMap<String, Integer>(stems0_10.length);
    for (int i = 0; i < stems0_10.length; i++)
        stems0_10_lookup.put(stems0_10[i], i);

    stems11_19_lookup = new HashMap<String, Integer>(stems11_19.length);
    for (int i = 0; i < stems11_19.length; i++)
        stems11_19_lookup.put(stems11_19[i], i);

    /* create matching patterns for parsing */
    /* pattern for 1-, 2-, ... */
    digits_numeral_pat = Pattern.compile("\\b(\\d{1,2})(?:(-)?([-?]{1,2}))?\\b",
            Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);

    /* pattern for , , ... */
    StringBuilder sb0_10_pat = new StringBuilder();
    sb0_10_pat.append("\\b(");
    sb0_10_pat.append(StringUtils.join(stems0_10, '|'));
    sb0_10_pat.append(")([-?]{1,3})\\b");
    stem0_10_numeral_pat = Pattern.compile(sb0_10_pat.toString(),
            Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);

    /* pattern for , , ... */
    StringBuilder sb11_19_pat = new StringBuilder();
    sb11_19_pat.append("\\b(");
    sb11_19_pat.append(StringUtils.join(stems11_19, '|'));
    sb11_19_pat.append(")([-?]{1,2})\\b");
    stem11_19_numeral_pat = Pattern.compile(sb11_19_pat.toString(),
            Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);

    // TODO: patterns for 20+
}

From source file:org.alfresco.repo.security.authority.GetAuthoritiesCannedQuery.java

private Pattern getPattern(String searchValue) {
    if (searchValue == null) {
        return null;
    }/*from ww w.j  ava 2s  . c om*/

    // Escape characters of regex expressions
    searchValue = "^"
            + searchValue.replaceAll("\\\\", "\\\\\\\\").replaceAll("\\.", "\\\\.").replaceAll("\\?", ".")
                    .replaceAll("\\*", ".*").replaceAll("\\[", "\\\\[").replaceAll("\\]", "\\\\]")
                    .replaceAll("\\(", "\\\\(").replaceAll("\\)", "\\\\)").replaceAll("\\{", "\\\\{")
                    .replaceAll("\\}", "\\\\}").replaceAll("\\^", "\\\\^").replaceAll("\\$", "\\\\\\$")
                    .replaceAll("\\:", "\\\\:").replaceAll("\\\"", "\\\\\"").replaceAll("\\<", "\\\\<")
                    .replaceAll("\\>", "\\\\>").replaceAll("\\/", "\\\\/").replaceAll("\\|", "\\\\|");
    return Pattern.compile(searchValue, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
}

From source file:Normalization.TextNormalization.java

public String removeTwoLetterWordsFromString(String content) {

    String utf8tweet = "";
    try {//from  ww w .j a va 2 s  .  c  o m
        byte[] utf8Bytes = content.getBytes("UTF-8");

        utf8tweet = new String(utf8Bytes, "UTF-8");
    } catch (UnsupportedEncodingException e) {
    }

    final String regex = "((^|\\s)(\\w{1,2})(\\s|$))";
    final Pattern unicodeOutliers = Pattern.compile(regex,
            Pattern.MULTILINE | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);

    Matcher unicodeOutlierMatcher = unicodeOutliers.matcher(utf8tweet);
    utf8tweet = unicodeOutlierMatcher.replaceAll(" ");
    return utf8tweet;
}

Example usage for java.util.regex Pattern UNICODE_CASE

Introduction

Prototype

Document

Usage