Example usage for java.text Normalizer normalize

List of usage examples for java.text Normalizer normalize

Introduction

In this page you can find the example usage for java.text Normalizer normalize.

Prototype

public static String normalize(CharSequence src, Form form) 

Source Link

Document

Normalize a sequence of char values.

Usage

From source file:org.commcare.utils.StringUtils.java

/**
 * @param input A non-null string/*from  ww  w. j  av a2  s  .  c  om*/
 * @return a canonical version of the passed in string that is lower cased and has removed diacritical marks
 * like accents.
 */
@SuppressLint("NewApi")
public synchronized static String normalize(String input) {
    if (normalizationCache == null) {
        normalizationCache = new LruCache<>(cacheSize);

        diacritics = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
    }
    String cachedString = normalizationCache.get(input);
    if (cachedString != null) {
        return cachedString;
    }

    //Initialized the normalized string (If we can, we'll use the Normalizer API on it)
    String normalized = input;

    //If we're above gingerbread we'll normalize this in NFD form 
    //which helps a lot. Otherwise we won't be able to clear up some of those
    //issues, but we can at least still eliminate diacritics.
    if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.GINGERBREAD) {
        normalized = Normalizer.normalize(input, Normalizer.Form.NFD);
    } else {
        //TODO: I doubt it's worth it, but in theory we could run
        //some other normalization for the minority of pre-API9
        //devices.
    }

    String output = diacritics.matcher(normalized).replaceAll("").toLowerCase();

    normalizationCache.put(input, output);

    return output;
}

From source file:com.switchfly.inputvalidation.canonicalizer.StringCanonicalizer.java

@Override
public String execute(String content) {
    if (StringUtils.isBlank(content)) {
        return content;
    }// w  w  w. ja  v  a  2s  . co  m
    try {
        return Normalizer.normalize(content, Normalizer.Form.NFC);
    } catch (Exception e) {
        throw new IllegalArgumentException("Canonicalization error", e);
    }
}

From source file:com.evolveum.midpoint.prism.polystring.PrismDefaultPolyStringNormalizer.java

@Override
public String normalize(String orig) {
    if (orig == null) {
        return null;
    }//from  w  ww. j av  a 2  s . co m
    String s = StringUtils.trim(orig);
    s = Normalizer.normalize(s, Normalizer.Form.NFKD);
    s = s.replaceAll("[^\\w\\s\\d]", "");
    s = s.replaceAll("\\s+", " ");
    if (StringUtils.isBlank(s)) {
        s = "";
    }
    return StringUtils.lowerCase(s);
}

From source file:Utils.StringOperations.java

public static String stripAccentsWithoutUnnecessaryCharacters(String s) {
    s = org.apache.commons.lang.StringUtils.replaceEachRepeatedly(s.toLowerCase(), InputReplace, OutputReplace);
    s = StringEscapeUtils.escapeSql(s);//www  .  jav  a2s  .c o  m
    s = Normalizer.normalize(s.toLowerCase(), Normalizer.Form.NFD);
    s = s.replaceAll("''", "'");
    s = s.replaceAll("\"", "");
    s = s.replaceAll("\\]", "");
    s = s.replaceAll("\\[", "");

    //LOG.debug("after stripAccents: " + s);
    return s;
}

From source file:com.github.javarch.support.SlugGenerator.java

public String encode(String str) {
    Pattern p = Pattern.compile("\\p{InCombiningDiacriticalMarks}+", Pattern.UNICODE_CASE);
    Pattern p2 = Pattern.compile("\\p{Punct}+", Pattern.UNICODE_CASE);
    Pattern p3 = Pattern.compile("\\s+", Pattern.UNICODE_CASE);

    // Decompose any funny characters.
    String link = Normalizer.normalize(str, Normalizer.Form.NFD).replaceAll(p.pattern(), "") // remove all the diacritic marks
            .replaceAll(p2.pattern(), " ").trim() // transform the punctuation into spaces first, so that we can trim some ending or beginning punctuation
            .replaceAll(p3.pattern(), "-") // and replace all the whitespace with a dash.
            .toLowerCase();//ww w  .  j  a v a2s .  c o m

    return link;
}

From source file:org.sonar.fortify.base.FortifyConstants.java

private static String slugifyForKey(String s) {
    return Normalizer.normalize(s, Normalizer.Form.NFD).replaceAll("[^\\p{ASCII}]", "")
            .replaceAll("[^\\w+]", "_").replaceAll("\\s+", "_").replaceAll("[-]+", "_").replaceAll("^_", "")
            .replaceAll("_$", "").toLowerCase(Locale.ENGLISH);
}

From source file:com.docdoku.core.util.Tools.java

public static String unAccent(String s) {
    String temp = Normalizer.normalize(s, Normalizer.Form.NFD);
    Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
    return pattern.matcher(temp).replaceAll("").replaceAll("\\p{javaSpaceChar}", "_");
}

From source file:com.github.tomakehurst.wiremock.common.SafeNames.java

public static String makeSafeName(String name) {
    String nowhitespace = WHITESPACE.matcher(name).replaceAll("-");
    String normalized = Normalizer.normalize(nowhitespace, Normalizer.Form.NFD);
    String slug = sanitise(normalized);

    slug = slug.replaceAll("^[_]*", "");
    slug = slug.replaceAll("[_]*$", "");

    slug = StringUtils.truncate(slug, 200);

    return slug.toLowerCase(Locale.ENGLISH);
}

From source file:com.geecko.QuickLyric.lyrics.LyricWiki.java

@Reflection
public static ArrayList<Lyrics> search(String query) {
    ArrayList<Lyrics> results = new ArrayList<>();
    query = query + " song";
    query = Normalizer.normalize(query, Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+",
            "");//from  w ww  .  j a  va2  s .  c  o  m
    try {
        URL queryURL = new URL(String.format(baseSearchUrl, URLEncoder.encode(query, "UTF-8")));
        Document searchpage = Jsoup.connect(queryURL.toExternalForm()).get();
        Elements searchResults = searchpage.getElementsByClass("Results");
        if (searchResults.size() >= 1) {
            searchResults = searchResults.get(0).getElementsByClass("result");
            for (Element searchResult : searchResults) {
                String[] tags = searchResult.getElementsByTag("h1").text().split(":");
                if (tags.length != 2)
                    continue;
                String url = searchResult.getElementsByTag("a").attr("href");
                Lyrics lyrics = new Lyrics(SEARCH_ITEM);
                lyrics.setArtist(tags[0]);
                lyrics.setTitle(tags[1]);
                lyrics.setURL(url);
                lyrics.setSource(domain);
                results.add(lyrics);
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return results;
}

From source file:eu.annocultor.api.Common.java

public static String removeDiacritics(String text) {
    String nfdNormalizedString = Normalizer.normalize(text, Normalizer.Form.NFD);
    return removeDiacriticPattern.matcher(nfdNormalizedString).replaceAll("");
}