List of usage examples for java.text Normalizer normalize
public static String normalize(CharSequence src, Form form)
From source file:org.sakaiproject.util.ParameterParser.java
/** * Get a FileItem parameter by name./*from w w w . j a va2 s . com*/ * * @param name * The parameter name. * @return The parameter FileItem value, or null if it's not defined. */ public FileItem getFileItem(String name) { // wrap the Apache FileItem in our own homegrown FileItem Object o = m_req.getAttribute(name); if (o != null && o instanceof org.apache.commons.fileupload.FileItem) { org.apache.commons.fileupload.FileItem item = (org.apache.commons.fileupload.FileItem) o; try { return new FileItem(Normalizer.normalize(item.getName(), Normalizer.Form.NFC), item.getContentType(), item.getInputStream()); } catch (IOException e) { return new FileItem(Normalizer.normalize(item.getName(), Normalizer.Form.NFC), item.getContentType(), item.get()); } } return null; }
From source file:org.mycore.common.xml.MCRXMLFunctions.java
/** * returns the given String in unicode NFC normal form. * * @param arg0 String to be normalized// w w w . ja v a 2 s . c om * @see Normalizer#normalize(CharSequence, java.text.Normalizer.Form) */ public static String normalizeUnicode(String arg0) { return Normalizer.normalize(arg0, Normalizer.Form.NFC); }
From source file:org.jajuk.services.lyrics.providers.LyricsManiaWebLyricsProvider.java
/** * Replace each accent in the string with the non accent character. * @param s the string to process// w ww . j a v a 2 s.c o m * @return the string without accents */ public String removeAccent(String s) { String strTemp = Normalizer.normalize(s, Normalizer.Form.NFD); Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+"); return pattern.matcher(strTemp).replaceAll(""); }
From source file:com.csc.fi.ioapi.utils.LDHelper.java
public static String removeAccents(String text) { return text == null ? null : Normalizer.normalize(text, Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", ""); }
From source file:com.aestheticsw.jobkeywords.service.termextractor.impl.fivefilters.FiveFiltersClient.java
private String removeHtmlTagsAndOtherBogusContent(String content, Locale locale) { content = content.toLowerCase(locale); // remote accented characters content = Normalizer.normalize(content, Normalizer.Form.NFD); for (Pattern key : regExMap.keySet()) { content = key.matcher(content).replaceAll(regExMap.get(key)); }// w w w. jav a 2s .c om return content; }
From source file:it.tidalwave.northernwind.core.impl.model.ResourcePropertiesDelegate.java
/******************************************************************************************************************* * * See http://stackoverflow.com/questions/1008802/converting-symbols-accent-letters-to-english-alphabet * ******************************************************************************************************************/ @Nonnull// w w w . ja va2s . c o m public String deAccent(final @Nonnull String string) { final String nfdNormalizedString = Normalizer.normalize(string, Normalizer.Form.NFD); final Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+"); return pattern.matcher(nfdNormalizedString).replaceAll(""); }
From source file:org.tinymediamanager.scraper.thetvdb.TheTvDbMetadataProvider.java
public static String clearSearchString(String searchString) { // This will separate all of the accent marks from the characters String cleanedString = Normalizer.normalize(searchString, Normalizer.Form.NFD); // For unicode, \\P{M} matches the base glyph and \\p{M} (lowercase) matches each accent. cleanedString = cleanedString.replaceAll("\\p{M}", ""); // remove accents // cleanedString = cleanedString.replaceAll("\\p{Punct}", ""); // too much? // cleanedString = cleanedString.replaceAll("[^\\p{Alnum}\\s\\-]", ""); // damn - removes ALL cyrillic chars // next try: punctuation !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ plus , but not !- cleanedString = cleanedString.replaceAll("(?![\\-!])[\\p{Punct}]", ""); return cleanedString; }
From source file:com.moviejukebox.model.scriptablescraper.SectionContentSS.java
public String getVariable(final String name) { try {/*from w w w .j a va2 s.com*/ String result = ""; boolean safe = false; boolean htmldecode = false; boolean striptags = false; if (StringUtils.isNotBlank(name)) { preparedVariable variable = prepareVariable(name); if (isDebug()) { LOG.debug("getVariable: {} value: {} index0: {} index1: {}", name, variable.value, variable.index0, variable.index1); } if (variable.value == null) { return ""; } safe = name.contains(":safe"); htmldecode = name.contains(":htmldecode"); striptags = -1 <= name.indexOf(":striptags"); if (variable.index0 > -1) { if (variable.value.contains(ARRAY_GROUP_DIVIDER)) { List<String> values = Arrays.asList(variable.value.split(ARRAY_GROUP_DIVIDER)); if (values.size() > variable.index0) { variable.value = values.get(variable.index0); if (variable.index1 > -1) { values = Arrays.asList(variable.value.split(ARRAY_ITEM_DIVIDER)); if (values.size() > variable.index1) { result = values.get(variable.index1); } } else { result = variable.value; } } } else if (variable.index1 == -1) { List<String> values = Arrays.asList(variable.value.split(ARRAY_ITEM_DIVIDER)); if (values.size() > variable.index0) { result = values.get(variable.index0); } } } else { result = variable.value; } } if (safe) { result = Normalizer.normalize(result, Normalizer.Form.NFD) .replaceAll("\\p{InCombiningDiacriticalMarks}+", "").replace(" ", "+"); if (name.contains(":safe(")) { String encodeName = name.substring(name.indexOf(":safe(") + 6); encodeName = name.substring(0, name.indexOf(")")); if (isDebug()) { LOG.debug("encode result to ''{}'", encodeName); } result = URLEncoder.encode(result, encodeName); } } if (striptags) { result = HTMLTools.removeHtmlTags(result); } if (htmldecode) { result = HTMLTools.decodeHtml(result); } if (isDebug()) { LOG.debug("getVariable: result: '{}'", result); } return result; } catch (IOException error) { LOG.error("Failed get variable : {}", name); LOG.error("Error : {}", error.getMessage()); return ""; } }
From source file:com.evolveum.midpoint.model.common.expression.functions.BasicExpressionFunctions.java
public String toAscii(Object input) { if (input == null) { return null; }/*from w ww.j a va 2 s . co m*/ String inputString = stringify(input); String decomposed = Normalizer.normalize(inputString, Normalizer.Form.NFKD); return decomposed.replaceAll("\\p{M}", ""); }
From source file:module.entities.NameFinder.RegexNameFinder.java
public static String getSignatureFromParagraphs(Elements paragraphs) { String signature = ""; String signName = "", roleName = ""; int signIdx = 0, roleIdx = 0; int row = 0;/*from ww w. jav a2s. c o m*/ TreeMap<Integer, String> roles = new TreeMap<Integer, String>(); for (Element n : paragraphs) { row++; String formatedText = Normalizer.normalize(n.text().toUpperCase(locale), Normalizer.Form.NFD) .replaceAll("\\p{M}", ""); if (formatedText.contains(" ") && !formatedText.matches(".*[0-9].*")) { // if (formatedText.contains("<br>")) { // formatedText = formatedText.replaceAll("<br\\s*/>", " "); // } String[] splitedText = formatedText.split(" "); // System.out.println(splitedText.length); if (splitedText.length < 7) { boolean isSign = false; String text = ""; for (int z = 0; z < splitedText.length; z++) { String splText = splitedText[z].replaceAll("[\\s.]", "").replaceAll("\u00a0", "") .replaceAll("", "").replaceAll(",", ""); if (names.contains(splText) || surnames.contains(splText)) { signName += splText + " "; signIdx = row; isSign = true; } text += splText + " "; // if (z == splitedText.length-1){ // System.out.println(signName.trim()); // } } if (!isSign) { roleIdx = row; if (!text.contains(" ") && !text.contains("")) { roles.put(roleIdx, text.trim()); } } } } } for (Integer roleRow : roles.keySet()) { // if (signName.length() == 0) { if (Math.abs(signIdx - roleRow) < 4) { roleName += roles.get(roleRow) + " "; } } if (signName.length() > 0) { signature = signName + "#" + roleName; } return signature; }