Example usage for java.util.regex Pattern UNICODE_CASE

List of usage examples for java.util.regex Pattern UNICODE_CASE

Introduction

In this page you can find the example usage for java.util.regex Pattern UNICODE_CASE.

Prototype

int UNICODE_CASE

To view the source code for java.util.regex Pattern UNICODE_CASE.

Click Source Link

Document

Enables Unicode-aware case folding.

Usage

From source file:Normalization.TextNormalization.java

public String removeNonEnglishWordsFromString(String content) {

    String utf8tweet = "";
    try {/*from  w ww.  jav a  2  s  . c om*/
        byte[] utf8Bytes = content.getBytes("UTF-8");

        utf8tweet = new String(utf8Bytes, "UTF-8");
    } catch (UnsupportedEncodingException e) {
    }

    final String regex = "[\\W]";
    final Pattern unicodeOutliers = Pattern.compile(regex,
            Pattern.MULTILINE | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);

    Matcher unicodeOutlierMatcher = unicodeOutliers.matcher(utf8tweet);
    utf8tweet = unicodeOutlierMatcher.replaceAll(" ");
    return utf8tweet;
}

From source file:com.ikanow.aleph2.enrichment.utils.services.SimpleRegexFilterService.java

/**
 * Converts a string of regex flags into a single int representing those
 * flags for using in the java Pattern object
 * //from  ww  w  .ja va 2  s. c o m
 * @param flagsStr
 * @return
 */
public static int parseFlags(final String flagsStr) {
    int flags = 0;
    for (int i = 0; i < flagsStr.length(); ++i) {
        switch (flagsStr.charAt(i)) {
        case 'i':
            flags |= Pattern.CASE_INSENSITIVE;
            break;
        case 'x':
            flags |= Pattern.COMMENTS;
            break;
        case 's':
            flags |= Pattern.DOTALL;
            break;
        case 'm':
            flags |= Pattern.MULTILINE;
            break;
        case 'u':
            flags |= Pattern.UNICODE_CASE;
            break;
        case 'd':
            flags |= Pattern.UNIX_LINES;
            break;
        }
    }
    return flags;
}

From source file:nya.miku.wishmaster.ui.settings.AutohideActivity.java

@SuppressLint("InflateParams")
@Override//from  w  w  w  . j a  va2s . c o m
protected void onListItemClick(ListView l, View v, int position, long id) {
    super.onListItemClick(l, v, position, id);
    Object item = l.getItemAtPosition(position);
    final int changeId;
    if (item instanceof AutohideRule) {
        changeId = position - 1;
    } else {
        changeId = -1; //-1 - ?  
    }

    Context dialogContext = Build.VERSION.SDK_INT < Build.VERSION_CODES.HONEYCOMB
            ? new ContextThemeWrapper(this, R.style.Neutron_Medium)
            : this;
    View dialogView = LayoutInflater.from(dialogContext).inflate(R.layout.dialog_autohide_rule, null);
    final EditText regexEditText = (EditText) dialogView.findViewById(R.id.dialog_autohide_regex);
    final Spinner chanSpinner = (Spinner) dialogView.findViewById(R.id.dialog_autohide_chan_spinner);
    final EditText boardEditText = (EditText) dialogView.findViewById(R.id.dialog_autohide_boardname);
    final EditText threadEditText = (EditText) dialogView.findViewById(R.id.dialog_autohide_threadnum);
    final CheckBox inCommentCheckBox = (CheckBox) dialogView.findViewById(R.id.dialog_autohide_in_comment);
    final CheckBox inSubjectCheckBox = (CheckBox) dialogView.findViewById(R.id.dialog_autohide_in_subject);
    final CheckBox inNameCheckBox = (CheckBox) dialogView.findViewById(R.id.dialog_autohide_in_name);

    chanSpinner.setAdapter(new ArrayAdapter<String>(this, android.R.layout.simple_spinner_item, chans));
    if (changeId != -1) {
        AutohideRule rule = (AutohideRule) item;
        regexEditText.setText(rule.regex);
        int chanPosition = chans.indexOf(rule.chanName);
        chanSpinner.setSelection(chanPosition != -1 ? chanPosition : 0);
        boardEditText.setText(rule.boardName);
        threadEditText.setText(rule.threadNumber);
        inCommentCheckBox.setChecked(rule.inComment);
        inSubjectCheckBox.setChecked(rule.inSubject);
        inNameCheckBox.setChecked(rule.inName);
    } else {
        chanSpinner.setSelection(0);
    }

    DialogInterface.OnClickListener save = new DialogInterface.OnClickListener() {
        @Override
        public void onClick(DialogInterface dialog, int which) {
            String regex = regexEditText.getText().toString();
            if (regex.length() == 0) {
                Toast.makeText(AutohideActivity.this, R.string.autohide_error_empty_regex, Toast.LENGTH_LONG)
                        .show();
                return;
            }

            try {
                Pattern.compile(regex, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.DOTALL);
            } catch (Exception e) {
                CharSequence message = null;
                if (e instanceof PatternSyntaxException) {
                    String eMessage = e.getMessage();
                    if (!TextUtils.isEmpty(eMessage)) {
                        SpannableStringBuilder a = new SpannableStringBuilder(
                                getString(R.string.autohide_error_incorrect_regex));
                        a.append('\n');
                        int startlen = a.length();
                        a.append(eMessage);
                        a.setSpan(new TypefaceSpan("monospace"), startlen, a.length(),
                                Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
                        message = a;
                    }
                }
                if (message == null)
                    message = getString(R.string.error_unknown);
                Toast.makeText(AutohideActivity.this, message, Toast.LENGTH_LONG).show();
                return;
            }

            AutohideRule rule = new AutohideRule();
            int spinnerSelectedPosition = chanSpinner.getSelectedItemPosition();
            rule.regex = regex;
            rule.chanName = spinnerSelectedPosition > 0 ? chans.get(spinnerSelectedPosition) : ""; // 0 ? = ? 
            rule.boardName = boardEditText.getText().toString();
            rule.threadNumber = threadEditText.getText().toString();
            rule.inComment = inCommentCheckBox.isChecked();
            rule.inSubject = inSubjectCheckBox.isChecked();
            rule.inName = inNameCheckBox.isChecked();

            if (!rule.inComment && !rule.inSubject && !rule.inName) {
                Toast.makeText(AutohideActivity.this, R.string.autohide_error_no_condition, Toast.LENGTH_LONG)
                        .show();
                return;
            }

            if (changeId == -1) {
                rulesJson.put(rule.toJson());
            } else {
                rulesJson.put(changeId, rule.toJson());
            }
            rulesChanged();
        }
    };
    AlertDialog dialog = new AlertDialog.Builder(this).setView(dialogView)
            .setTitle(changeId == -1 ? R.string.autohide_add_rule_title : R.string.autohide_edit_rule_title)
            .setPositiveButton(R.string.autohide_save_button, save)
            .setNegativeButton(android.R.string.cancel, null).create();
    dialog.setCanceledOnTouchOutside(false);
    dialog.show();
}

From source file:org.etudes.util.HtmlHelper.java

/**
 * Remove image tags that have for src "file://" "webkit-fake-url://" or "x-apple-ql-id://" prefixes (transports)
 * // w  w  w  . j a  va  2s .  c  o  m
 * @param data
 *        the html data.
 * @return The cleaned up data.
 */
public static String stripBadImageTags(String data) {
    if (data == null)
        return data;

    // pattern to find link/meta tags
    // TODO: the .*? needs to stop on a >, else if there's a good image and later a bad one, it combines the two into one and removes it all!
    Pattern p = Pattern.compile("<img\\s+.*?src=\"(file:|webkit-fake-url:|x-apple-ql-id:).*?/>",
            Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.DOTALL);

    Matcher m = p.matcher(data);
    StringBuffer sb = new StringBuffer();

    while (m.find()) {
        m.appendReplacement(sb, "");
    }

    m.appendTail(sb);

    return sb.toString();
}

From source file:org.etudes.mneme.impl.ImporteCollegeTextServiceImpl.java

public void importQuestions(String context, Pool pool, String text) throws AssessmentPermissionException {
    if ((text == null) || (text.length() == 0))
        return;//ww w.j  a v  a 2s.co m

    // replace any \r\n with just a \n
    text = text.replaceAll("\r\n", "\n");

    String title = "eCollege paste";
    Float points = new Float("1");

    if (pool == null) {
        pool = this.poolService.newPool(context);
        //read title from the first line ex: Unit 2: Week 2 - Quiz
        String findTitle = text.substring(0, text.indexOf("\n"));
        if (findTitle != null) {
            String[] titleParts = findTitle.split("[:-]");
            if (titleParts.length == 2 && titleParts[1] != null && titleParts[1].length() != 0)
                title = titleParts[1].trim();
            else if (titleParts.length > 2)
                title = findTitle.substring(findTitle.indexOf(titleParts[1]));
        }
        pool.setTitle(title);
        pool.setPointsEdit(points);

        // create assessment
        Assessment assmt = assessmentService.newAssessment(context);
        assmt.setType(AssessmentType.test);
        assmt.setTitle(title);

        Part part = assmt.getParts().addPart();

        Pattern p_groups = Pattern.compile("Collapse[\\s]*Question(.*?)[\\n]*[\\t]*row[\\t]*Move[\\s]*Question",
                Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.DOTALL);
        Matcher m = p_groups.matcher(text);

        StringBuffer sb = new StringBuffer();
        while (m.find()) {
            String workOn = m.group(0);
            String[] lines = workOn.split("[\\n]");
            processECollegeTextGroup(pool, part, lines);
            m.appendReplacement(sb, "");
        }
        m.appendTail(sb);

        // remaining last text
        if (sb != null && sb.length() != 0) {
            if (sb.indexOf("Collapse Question") != -1) {
                String workOn = sb.substring(sb.indexOf("Collapse Question"));
                String[] lines = workOn.split("[\\n]");
                processECollegeTextGroup(pool, part, lines);
            }
        }

        try {
            assmt.getGrading().setGradebookIntegration(Boolean.TRUE);

            if (assmt.getParts().getTotalPoints().floatValue() <= 0) {
                assmt.setNeedsPoints(Boolean.FALSE);
            }

            assessmentService.saveAssessment(assmt);
        } catch (AssessmentPolicyException ep) {

        }
        this.poolService.savePool(pool);

    }
}

From source file:org.eclipse.skalli.services.search.SearchQuery.java

public void setPattern(String pattern, boolean ignoreCase) throws QueryParseException {
    if (StringUtils.isNotBlank(pattern)) {
        try {//from w w w  .j a v a  2  s.  co m
            int flags = Pattern.DOTALL;
            if (ignoreCase) {
                flags = flags | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
            }
            setPattern(Pattern.compile(pattern, flags));
        } catch (PatternSyntaxException e) {
            throw new QueryParseException("Pattern has a syntax error", e);
        }
    }
}

From source file:org.etudes.util.HtmlHelper.java

/**
 * Remove any HTML comments from the data.
 * //from   w w w  .j a va 2s .  c o m
 * @param data
 *        the html data.
 * @return The cleaned up data.
 */
public static String stripComments(String data) {
    if (data == null)
        return data;

    // quick check for any comments
    if (data.indexOf("<!--") == -1)
        return data;

    // pattern to find html comments
    // Notes: DOTALL so the "." matches line terminators too, "*?" Reluctant quantifier so text between two different comments is not lost
    Pattern p = Pattern.compile("<!--.*?-->", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.DOTALL);

    Matcher m = p.matcher(data);
    StringBuffer sb = new StringBuffer();

    while (m.find()) {
        m.appendReplacement(sb, "");
    }

    m.appendTail(sb);

    data = sb.toString();

    // if any open tags are left, likely because of missing a matching close tag, we will remove them.
    // if we leave them in, a missing close comment tag will be inserted by HtmlCleaner at the very END of the document, making the rest a big comment.
    // this fix exposes some comment text into the content, but preserves actual content.
    data = data.replaceAll("<!--", "");

    return data;
}

From source file:org.jamwiki.migrate.MediaWikiXmlImporter.java

/**
 * Convert all namespaces names from MediaWiki to JAMWiki local representation.
 *///from  w  ww  .j  av  a 2 s  .  co  m
private String convertToJAMWikiNamespaces(String topicContent) {
    // convert all namespaces names from MediaWiki to JAMWiki local representation
    Pattern pattern;
    for (Map.Entry<String, String> entry : mediawikiNamespaceMap.entrySet()) {
        if (entry.getValue() == null || StringUtils.equalsIgnoreCase(entry.getValue(), entry.getKey())) {
            continue;
        }
        pattern = this.convertNamespaceMap.get(entry.getKey());
        if (pattern == null) {
            // convert from Mediawiki to JAMWiki namespaces.  handle "[[", "[[:", "{{", "{{:".
            // note that "?:" is a regex non-capturing group.
            String patternString = "((?:(?:\\[\\[)|(?:\\{\\{))[ ]*(?::)?)" + entry.getKey()
                    + Namespace.SEPARATOR;
            Pattern mediawikiPattern = Pattern.compile(patternString,
                    Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
            pattern = mediawikiPattern;
            this.convertNamespaceMap.put(entry.getKey(), mediawikiPattern);
        }
        topicContent = pattern.matcher(topicContent).replaceAll("$1" + entry.getValue() + Namespace.SEPARATOR);
    }
    return topicContent;
}

From source file:com.sangupta.pep.Generator.java

private SlideVariables getSlideVariables(String slideContents) {
    SlideVariables vars = new SlideVariables();

    Matcher matcher = PATTERN.matcher(slideContents);

    if (matcher != null && matcher.matches()) {
        vars.setHeader(matcher.group(1));
        vars.setLevel(Integer.valueOf(matcher.group(2)));
        vars.setTitle(matcher.group(3));
        vars.setContent(matcher.group(4));
    } else {//from   ww  w .  jav  a  2s .  com
        vars.setHeader("");
        vars.setTitle("");
        vars.setContent(slideContents);
        vars.setLevel(0);
    }

    // process slide classes
    ContentAndClasses cc = processMacros(vars);

    String content = cc.getContent();
    vars.setContent(content);
    vars.setClasses(cc.getClasses().toArray(new String[0]));

    if (StringUtils.isNotEmpty(content)) {
        content = content.trim();
        Pattern p2 = Pattern.compile("<h\\d[^>]*>presenter notes</h\\d>",
                Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.DOTALL);
        Matcher m2 = p2.matcher(content);

        if (m2 != null && m2.matches()) {
            vars.setPresenterNotes(content.substring(m2.end()).trim());
            content = content.substring(0, m2.start());

            vars.setContent(content);
        }
    }

    vars.setRelativeSourcePath(this.inputFile.getPath());
    vars.setAbsoluteSourcePath(this.inputFile.getAbsolutePath());

    return vars;
}

From source file:org.codelibs.fess.helper.ViewHelper.java

public String getContentTitle(final Map<String, Object> document) {
    final FessConfig fessConfig = ComponentUtil.getFessConfig();
    String title = DocumentUtil.getValue(document, fessConfig.getIndexFieldTitle(), String.class);
    if (StringUtil.isBlank(title)) {
        title = DocumentUtil.getValue(document, fessConfig.getIndexFieldFilename(), String.class);
        if (StringUtil.isBlank(title)) {
            title = DocumentUtil.getValue(document, fessConfig.getIndexFieldUrl(), String.class);
        }//from w ww .  j  a  v  a  2 s . c om
    }
    final int size = fessConfig.getResponseMaxTitleLengthAsInteger();
    if (size > -1) {
        title = StringUtils.abbreviate(title, size);
    }
    final String value = LaFunctions.h(title);
    if (!fessConfig.isResponseHighlightContentTitleEnabled()) {
        return value;
    }
    return getQuerySet().map(querySet -> {
        final Matcher matcher = Pattern
                .compile(querySet.stream().map(LaFunctions::h).map(Pattern::quote)
                        .collect(Collectors.joining("|")), Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE)
                .matcher(value);
        final StringBuffer buf = new StringBuffer(value.length() + 100);
        while (matcher.find()) {
            matcher.appendReplacement(buf, highlightTagPre + matcher.group(0) + highlightTagPost);
        }
        matcher.appendTail(buf);
        return buf.toString();
    }).orElse(value);
}