Example usage for java.text Normalizer normalize

List of usage examples for java.text Normalizer normalize

Introduction

In this page you can find the example usage for java.text Normalizer normalize.

Prototype

public static String normalize(CharSequence src, Form form) 

Source Link

Document

Normalize a sequence of char values.

Usage

From source file:com.evolveum.midpoint.model.impl.filter.DiacriticsFilter.java

@Override
public <T extends Object> PrismPropertyValue<T> apply(PrismPropertyValue<T> propertyValue) {
    Validate.notNull(propertyValue, "Node must not be null.");

    String text = getStringValue(propertyValue);
    if (StringUtils.isEmpty(text)) {
        return propertyValue;
    }/*from   w  w  w  .j  ava2 s  .  c  o  m*/

    String newValue = Normalizer.normalize(text, Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
    propertyValue.setValue((T) newValue);

    return propertyValue;
}

From source file:de.micromata.genome.util.matcher.norm.StringNormalizeUtils.java

/**
 * Remove umlauts and accents. Also converts scharfes s to ss.
 * //w ww  . j  a  v  a 2 s.c o  m
 * @param str the input
 * @return the converted string
 */
public static String deAccent(String str) {

    // makes from one composit character to two characters. Second is the accent or umlaut sign
    String norm = Normalizer.normalize(str, Normalizer.Form.NFD);
    // throw away accent/umlaut character
    norm = DEACCENT_PATTERN.get().matcher(norm).replaceAll("");
    norm = replaceSpecifcComposits(norm);
    return norm;
}

From source file:org.brushingbits.jnap.common.seo.SeoStringUtil.java

/**
 * //from   w  w  w . j  ava  2s . c  o  m
 * @param src
 * @param locale
 * @return
 */
public static String makeSeoFriendly(String src, Locale locale) {
    String seoFriendlyText = src.trim();

    // normalize
    seoFriendlyText = Normalizer.normalize(src, Form.NFD);

    // try to remove stop words if locale is specified
    if (locale != null) {
        SeoStopWordCleaner wordCleaner = null;
        for (SeoStopWordCleaner cleaner : seoStopWordCleaners) {
            if (ArrayUtils.contains(cleaner.getSupportedLocales(), locale)) {
                wordCleaner = cleaner;
                break;
            }
        }
        if (wordCleaner == null) {
            logger.warn(MessageFormat.format(
                    "A locale was specified ({0}) but no " + "SeoStopWordCleaner was found for it",
                    locale.toString()));
        } else {
            seoFriendlyText = wordCleaner.clean(seoFriendlyText);
        }
    }

    // replace duplicated spaces with a single one
    seoFriendlyText = seoFriendlyText.replaceAll("[\\s]{2,}", " ");

    // replace spaces with '-'
    seoFriendlyText = seoFriendlyText.replaceAll("[\\s]", "-");

    // remove remaining non-latin characters
    seoFriendlyText = seoFriendlyText.replaceAll("[^\\w-]", StringUtils.EMPTY);

    // convert to lowercase (using english locale rules) and return
    return seoFriendlyText.toLowerCase(Locale.ENGLISH);
}

From source file:com.evolveum.midpoint.prism.polystring.AbstractPolyStringNormalizer.java

/**
 * Unicode Normalization Form Compatibility Decomposition (NFKD)
 *///from w ww.  j  a  va  2  s .c o  m
protected String nfkd(String s) {
    return Normalizer.normalize(s, Normalizer.Form.NFKD);
}

From source file:com.mec.DAO.Superior.SuperiorDAO.java

private String clean(String s) {
    return Normalizer.normalize(s, Normalizer.Form.NFD).replaceAll("[^\\p{ASCII}]", "").toUpperCase();
}

From source file:org.opensingular.lib.commons.base.SingularUtil.java

public static String normalize(String original) {
    return Normalizer.normalize(original, Normalizer.Form.NFD).replaceAll("[^\\p{ASCII}]", "");
}

From source file:biblivre3.utils.TextUtils.java

public static String removeDiacriticals(final String input) {
    if (input == null) {
        return input;
    }//from  www.j ava 2s.  co  m
    final String decomposed = Normalizer.normalize(input, Normalizer.Form.NFD);
    String final2 = decomposed.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
    return final2;
}

From source file:org.drftpd.util.HttpUtils.java

public static String htmlToString(String input) {
    String str = input.replaceAll("\n", "");
    str = StringEscapeUtils.unescapeHtml4(str);
    str = Normalizer.normalize(str, Normalizer.Form.NFD);
    str = str.replaceAll("\\P{InBasic_Latin}", "");
    while (str.contains("<")) {
        int startPos = str.indexOf("<");
        int endPos = str.indexOf(">", startPos);
        if (endPos > startPos) {
            String beforeTag = str.substring(0, startPos);
            String afterTag = str.substring(endPos + 1);
            str = beforeTag + afterTag;// w  w  w .j a  v a2s.c  om
        }
    }
    return str;
}

From source file:org.voyanttools.trombone.tool.analysis.DistributedTermFrequencies.java

public DistributedTermFrequencies(String string, int[] freqs) {
    this.string = string;
    this.freqs = new int[freqs.length];
    for (int i = 0, len = freqs.length; i < len; i++) {
        add(i, freqs[i]);//from  w ww  .  j  av  a 2s .  c o  m
    }
    this.normalizedString = Normalizer.normalize(string.toLowerCase(), Normalizer.Form.NFD);
}

From source file:biblivre3.utils.TextUtils.java

public static String combine(final String input) {
    if (input == null) {
        return input;
    }/*w  w  w  .j  a  va  2  s  .c o  m*/
    return Normalizer.normalize(input, Normalizer.Form.NFC);
}