List of usage examples for java.text Normalizer normalize
public static String normalize(CharSequence src, Form form)
From source file:org.commcare.utils.StringUtils.java
/** * @param input A non-null string/*from ww w. j av a2 s . c om*/ * @return a canonical version of the passed in string that is lower cased and has removed diacritical marks * like accents. */ @SuppressLint("NewApi") public synchronized static String normalize(String input) { if (normalizationCache == null) { normalizationCache = new LruCache<>(cacheSize); diacritics = Pattern.compile("\\p{InCombiningDiacriticalMarks}+"); } String cachedString = normalizationCache.get(input); if (cachedString != null) { return cachedString; } //Initialized the normalized string (If we can, we'll use the Normalizer API on it) String normalized = input; //If we're above gingerbread we'll normalize this in NFD form //which helps a lot. Otherwise we won't be able to clear up some of those //issues, but we can at least still eliminate diacritics. if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.GINGERBREAD) { normalized = Normalizer.normalize(input, Normalizer.Form.NFD); } else { //TODO: I doubt it's worth it, but in theory we could run //some other normalization for the minority of pre-API9 //devices. } String output = diacritics.matcher(normalized).replaceAll("").toLowerCase(); normalizationCache.put(input, output); return output; }
From source file:com.switchfly.inputvalidation.canonicalizer.StringCanonicalizer.java
@Override public String execute(String content) { if (StringUtils.isBlank(content)) { return content; }// w w w. ja v a 2s . co m try { return Normalizer.normalize(content, Normalizer.Form.NFC); } catch (Exception e) { throw new IllegalArgumentException("Canonicalization error", e); } }
From source file:com.evolveum.midpoint.prism.polystring.PrismDefaultPolyStringNormalizer.java
@Override public String normalize(String orig) { if (orig == null) { return null; }//from w ww. j av a 2 s . co m String s = StringUtils.trim(orig); s = Normalizer.normalize(s, Normalizer.Form.NFKD); s = s.replaceAll("[^\\w\\s\\d]", ""); s = s.replaceAll("\\s+", " "); if (StringUtils.isBlank(s)) { s = ""; } return StringUtils.lowerCase(s); }
From source file:Utils.StringOperations.java
public static String stripAccentsWithoutUnnecessaryCharacters(String s) { s = org.apache.commons.lang.StringUtils.replaceEachRepeatedly(s.toLowerCase(), InputReplace, OutputReplace); s = StringEscapeUtils.escapeSql(s);//www . jav a2s .c o m s = Normalizer.normalize(s.toLowerCase(), Normalizer.Form.NFD); s = s.replaceAll("''", "'"); s = s.replaceAll("\"", ""); s = s.replaceAll("\\]", ""); s = s.replaceAll("\\[", ""); //LOG.debug("after stripAccents: " + s); return s; }
From source file:com.github.javarch.support.SlugGenerator.java
public String encode(String str) { Pattern p = Pattern.compile("\\p{InCombiningDiacriticalMarks}+", Pattern.UNICODE_CASE); Pattern p2 = Pattern.compile("\\p{Punct}+", Pattern.UNICODE_CASE); Pattern p3 = Pattern.compile("\\s+", Pattern.UNICODE_CASE); // Decompose any funny characters. String link = Normalizer.normalize(str, Normalizer.Form.NFD).replaceAll(p.pattern(), "") // remove all the diacritic marks .replaceAll(p2.pattern(), " ").trim() // transform the punctuation into spaces first, so that we can trim some ending or beginning punctuation .replaceAll(p3.pattern(), "-") // and replace all the whitespace with a dash. .toLowerCase();//ww w . j a v a2s . c o m return link; }
From source file:org.sonar.fortify.base.FortifyConstants.java
private static String slugifyForKey(String s) { return Normalizer.normalize(s, Normalizer.Form.NFD).replaceAll("[^\\p{ASCII}]", "") .replaceAll("[^\\w+]", "_").replaceAll("\\s+", "_").replaceAll("[-]+", "_").replaceAll("^_", "") .replaceAll("_$", "").toLowerCase(Locale.ENGLISH); }
From source file:com.docdoku.core.util.Tools.java
public static String unAccent(String s) { String temp = Normalizer.normalize(s, Normalizer.Form.NFD); Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+"); return pattern.matcher(temp).replaceAll("").replaceAll("\\p{javaSpaceChar}", "_"); }
From source file:com.github.tomakehurst.wiremock.common.SafeNames.java
public static String makeSafeName(String name) { String nowhitespace = WHITESPACE.matcher(name).replaceAll("-"); String normalized = Normalizer.normalize(nowhitespace, Normalizer.Form.NFD); String slug = sanitise(normalized); slug = slug.replaceAll("^[_]*", ""); slug = slug.replaceAll("[_]*$", ""); slug = StringUtils.truncate(slug, 200); return slug.toLowerCase(Locale.ENGLISH); }
From source file:com.geecko.QuickLyric.lyrics.LyricWiki.java
@Reflection public static ArrayList<Lyrics> search(String query) { ArrayList<Lyrics> results = new ArrayList<>(); query = query + " song"; query = Normalizer.normalize(query, Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");//from w ww . j a va2 s . c o m try { URL queryURL = new URL(String.format(baseSearchUrl, URLEncoder.encode(query, "UTF-8"))); Document searchpage = Jsoup.connect(queryURL.toExternalForm()).get(); Elements searchResults = searchpage.getElementsByClass("Results"); if (searchResults.size() >= 1) { searchResults = searchResults.get(0).getElementsByClass("result"); for (Element searchResult : searchResults) { String[] tags = searchResult.getElementsByTag("h1").text().split(":"); if (tags.length != 2) continue; String url = searchResult.getElementsByTag("a").attr("href"); Lyrics lyrics = new Lyrics(SEARCH_ITEM); lyrics.setArtist(tags[0]); lyrics.setTitle(tags[1]); lyrics.setURL(url); lyrics.setSource(domain); results.add(lyrics); } } } catch (IOException e) { e.printStackTrace(); } return results; }
From source file:eu.annocultor.api.Common.java
public static String removeDiacritics(String text) { String nfdNormalizedString = Normalizer.normalize(text, Normalizer.Form.NFD); return removeDiacriticPattern.matcher(nfdNormalizedString).replaceAll(""); }