List of usage examples for java.text Normalizer normalize
public static String normalize(CharSequence src, Form form)
From source file:com.evolveum.midpoint.model.impl.filter.DiacriticsFilter.java
@Override public <T extends Object> PrismPropertyValue<T> apply(PrismPropertyValue<T> propertyValue) { Validate.notNull(propertyValue, "Node must not be null."); String text = getStringValue(propertyValue); if (StringUtils.isEmpty(text)) { return propertyValue; }/*from w w w .j ava2 s . c o m*/ String newValue = Normalizer.normalize(text, Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", ""); propertyValue.setValue((T) newValue); return propertyValue; }
From source file:de.micromata.genome.util.matcher.norm.StringNormalizeUtils.java
/** * Remove umlauts and accents. Also converts scharfes s to ss. * //w ww . j a v a 2 s.c o m * @param str the input * @return the converted string */ public static String deAccent(String str) { // makes from one composit character to two characters. Second is the accent or umlaut sign String norm = Normalizer.normalize(str, Normalizer.Form.NFD); // throw away accent/umlaut character norm = DEACCENT_PATTERN.get().matcher(norm).replaceAll(""); norm = replaceSpecifcComposits(norm); return norm; }
From source file:org.brushingbits.jnap.common.seo.SeoStringUtil.java
/** * //from w w w . j ava 2s . c o m * @param src * @param locale * @return */ public static String makeSeoFriendly(String src, Locale locale) { String seoFriendlyText = src.trim(); // normalize seoFriendlyText = Normalizer.normalize(src, Form.NFD); // try to remove stop words if locale is specified if (locale != null) { SeoStopWordCleaner wordCleaner = null; for (SeoStopWordCleaner cleaner : seoStopWordCleaners) { if (ArrayUtils.contains(cleaner.getSupportedLocales(), locale)) { wordCleaner = cleaner; break; } } if (wordCleaner == null) { logger.warn(MessageFormat.format( "A locale was specified ({0}) but no " + "SeoStopWordCleaner was found for it", locale.toString())); } else { seoFriendlyText = wordCleaner.clean(seoFriendlyText); } } // replace duplicated spaces with a single one seoFriendlyText = seoFriendlyText.replaceAll("[\\s]{2,}", " "); // replace spaces with '-' seoFriendlyText = seoFriendlyText.replaceAll("[\\s]", "-"); // remove remaining non-latin characters seoFriendlyText = seoFriendlyText.replaceAll("[^\\w-]", StringUtils.EMPTY); // convert to lowercase (using english locale rules) and return return seoFriendlyText.toLowerCase(Locale.ENGLISH); }
From source file:com.evolveum.midpoint.prism.polystring.AbstractPolyStringNormalizer.java
/** * Unicode Normalization Form Compatibility Decomposition (NFKD) *///from w ww. j a va 2 s .c o m protected String nfkd(String s) { return Normalizer.normalize(s, Normalizer.Form.NFKD); }
From source file:com.mec.DAO.Superior.SuperiorDAO.java
private String clean(String s) { return Normalizer.normalize(s, Normalizer.Form.NFD).replaceAll("[^\\p{ASCII}]", "").toUpperCase(); }
From source file:org.opensingular.lib.commons.base.SingularUtil.java
public static String normalize(String original) { return Normalizer.normalize(original, Normalizer.Form.NFD).replaceAll("[^\\p{ASCII}]", ""); }
From source file:biblivre3.utils.TextUtils.java
public static String removeDiacriticals(final String input) { if (input == null) { return input; }//from www.j ava 2s. co m final String decomposed = Normalizer.normalize(input, Normalizer.Form.NFD); String final2 = decomposed.replaceAll("\\p{InCombiningDiacriticalMarks}+", ""); return final2; }
From source file:org.drftpd.util.HttpUtils.java
public static String htmlToString(String input) { String str = input.replaceAll("\n", ""); str = StringEscapeUtils.unescapeHtml4(str); str = Normalizer.normalize(str, Normalizer.Form.NFD); str = str.replaceAll("\\P{InBasic_Latin}", ""); while (str.contains("<")) { int startPos = str.indexOf("<"); int endPos = str.indexOf(">", startPos); if (endPos > startPos) { String beforeTag = str.substring(0, startPos); String afterTag = str.substring(endPos + 1); str = beforeTag + afterTag;// w w w .j a v a2s.c om } } return str; }
From source file:org.voyanttools.trombone.tool.analysis.DistributedTermFrequencies.java
public DistributedTermFrequencies(String string, int[] freqs) { this.string = string; this.freqs = new int[freqs.length]; for (int i = 0, len = freqs.length; i < len; i++) { add(i, freqs[i]);//from w ww . j av a 2s . c o m } this.normalizedString = Normalizer.normalize(string.toLowerCase(), Normalizer.Form.NFD); }
From source file:biblivre3.utils.TextUtils.java
public static String combine(final String input) { if (input == null) { return input; }/*w w w .j a va 2 s .c o m*/ return Normalizer.normalize(input, Normalizer.Form.NFC); }