List of usage examples for java.text Normalizer normalize
public static String normalize(CharSequence src, Form form)
From source file:org.opensextant.util.TextUtils.java
/** * Normalize to "Normalization Form Canonical Decomposition" (NFD) REF: * http:/* ww w . j a v a 2s.co m*/ * //stackoverflow.com/questions/3610013/file-listfiles-mangles-unicode- * names-with-jdk-6-unicode-normalization-issues This supports proper file * name retrieval from file system, among other things. In many situations * we see unicode file names -- Java can list them, but in using the * Java-provided version of the filename the OS/FS may not be able to find * the file by the name given in a particular normalized form. * * @param str * text * @return normalized string, encoded with NFD bytes */ public static String normalizeUnicode(String str) { Normalizer.Form form = Normalizer.Form.NFD; if (!Normalizer.isNormalized(str, form)) { return Normalizer.normalize(str, form); } return str; }
From source file:bfile.util.StringUtils.java
/** * <p>Removes diacritics (~= accents) from a string. The case will not be altered.</p> * <p>For instance, 'à' will be replaced by 'a'.</p> * <p>Note that ligatures will be left as is.</p> * * <pre>//from ww w. ja v a 2s . c o m * StringUtils.stripAccents(null) = null * StringUtils.stripAccents("") = "" * StringUtils.stripAccents("control") = "control" * StringUtils.stripAccents("éclair") = "eclair" * </pre> * * @param input String to be stripped * @return input text with diacritics removed * * @since 3.0 */ // See also Lucene's ASCIIFoldingFilter (Lucene 2.9) that replaces accented characters by their unaccented equivalent (and uncommitted bug fix: https://issues.apache.org/jira/browse/LUCENE-1343?focusedCommentId=12858907&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#action_12858907). public static String stripAccents(final String input) { if (input == null) { return null; } final Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");//$NON-NLS-1$ final StringBuilder decomposed = new StringBuilder(Normalizer.normalize(input, Normalizer.Form.NFD)); convertRemainingAccentCharacters(decomposed); // Note that this doesn't correctly remove ligatures... return pattern.matcher(decomposed).replaceAll(StringUtils.EMPTY); }
From source file:com.rdm.common.util.StringUtils.java
/** * <p>Removes diacritics (~= accents) from a string. The case will not be altered.</p> * <p>For instance, 'à' will be replaced by 'a'.</p> * <p>Note that ligatures will be left as is.</p> * * <pre>//ww w . j ava 2s . c o m * StringUtils.stripAccents(null) = null * StringUtils.stripAccents("") = "" * StringUtils.stripAccents("control") = "control" * StringUtils.stripAccents("éclair") = "eclair" * </pre> * * @param input String to be stripped * @return input text with diacritics removed * * @since 3.0 */ // See also Lucene's ASCIIFoldingFilter (Lucene 2.9) that replaces accented characters by their unaccented equivalent (and uncommitted bug fix: https://issues.apache.org/jira/browse/LUCENE-1343?focusedCommentId=12858907&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#action_12858907). public static String stripAccents(final String input) { if (input == null) { return null; } final Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");//$NON-NLS-1$ final String decomposed = Normalizer.normalize(input, Normalizer.Form.NFD); // Note that this doesn't correctly remove ligatures... return pattern.matcher(decomposed).replaceAll("");//$NON-NLS-1$ }
From source file:example.rest.ApiREST.java
public static String stripAccents(String s) { s = Normalizer.normalize(s, Normalizer.Form.NFD); s = s.replaceAll("[\\p{InCombiningDiacriticalMarks}]", ""); return s;//from ww w. ja v a 2s. c o m }
From source file:org.exoplatform.utils.ExoDocumentUtils.java
/** * On Platform 4.1-M2, the upload service renames the uploaded file. Therefore * the link to this file in the activity becomes incorrect. To fix this, we * rename the file before upload so the same name is used in the activity. * //from w w w. j a v a2 s.c o m * @param originalName the name to clean * @return a String without forbidden characters */ public static String cleanupFilename(String originalName) { final String TILDE_HYPHENS_COLONS_SPACES = "[~_:\\s]"; final String MULTIPLE_HYPHENS = "-{2,}"; final String FORBIDDEN_CHARS = "[`!@#\\$%\\^&\\*\\|;\"'<>/\\\\\\[\\]\\{\\}\\(\\)\\?,=\\+\\.]+"; String name = originalName; String ext = ""; int lastDot = name.lastIndexOf('.'); if (lastDot > 0 && lastDot < name.length()) { ext = name.substring(lastDot); // the ext with the dot name = name.substring(0, lastDot); // the name before the ext } // [~_:\s] Replaces ~ _ : and spaces by - name = Pattern.compile(TILDE_HYPHENS_COLONS_SPACES).matcher(name).replaceAll("-"); // [`!@#\$%\^&\*\|;"'<>/\\\[\]\{\}\(\)\?,=\+\.]+ Deletes forbidden chars name = Pattern.compile(FORBIDDEN_CHARS).matcher(name).replaceAll(""); // Converts accents to regular letters name = Normalizer.normalize(name, Normalizer.Form.NFD).replaceAll("[^\\p{ASCII}]", ""); // Replaces upper case characters by lower case // Locale loc = new // Locale(SettingUtils.getPrefsLanguage(getApplicationContext())); name = name.toLowerCase(Locale.getDefault()); // Remove consecutive - name = Pattern.compile(MULTIPLE_HYPHENS).matcher(name).replaceAll("-"); // Save return (name + ext); }
From source file:uk.sipperfly.ui.BackgroundWorker.java
/** * create bag-info.xml file at transfer destination *//*from w w w .j ava 2s . co m*/ public void createXML(String payload, String date, String size) { try { char[] charArray = { '<', '>', '&', '"', '\\', '!', '#', '$', '%', '\'', '(', ')', '*', '.', ':', '+', ',', '/', ';', '=', '?', '@', '[', ']', '^', '`', '{', '|', '}', '~' }; // List<char[]> asList = Arrays.asList(charArray); DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder docBuilder = docFactory.newDocumentBuilder(); Document doc = docBuilder.newDocument(); Element rootElement = doc.createElement("transfer_metadata"); doc.appendChild(rootElement); Attr attr1 = doc.createAttribute("xmlns:xsi"); attr1.setValue("http://www.w3.org/2001/XMLSchema-instance"); rootElement.setAttributeNode(attr1); Element payLoad = doc.createElement("Payload-Oxum"); payLoad.appendChild(doc.createTextNode(payload)); rootElement.appendChild(payLoad); Element baggingDate = doc.createElement("Bagging-Date"); baggingDate.appendChild(doc.createTextNode(date)); rootElement.appendChild(baggingDate); Element bagsize = doc.createElement("Bag-Size"); bagsize.appendChild(doc.createTextNode(size)); rootElement.appendChild(bagsize); BagInfoRepo bagInfoRepo = new BagInfoRepo(); List<BagInfo> bagInfo = bagInfoRepo.getOneOrCreateOne(); for (BagInfo b : bagInfo) { StringBuilder stringBuilder = new StringBuilder(); char[] txt = Normalizer.normalize(b.getLabel(), Normalizer.Form.NFD).toCharArray(); for (int i = 0; i < b.getLabel().length(); i++) { int check = 0; for (int j = 0; j < charArray.length; j++) { if (txt[i] == charArray[j]) { check = 1; } } if (check == 0) { stringBuilder.append(txt[i]); } } Element firstname = doc.createElement(stringBuilder.toString().replace(" ", "-")); firstname.appendChild(doc.createTextNode(b.getValue().trim())); rootElement.appendChild(firstname); } // write the content into xml file TransformerFactory transformerFactory = TransformerFactory.newInstance(); Transformer transformer = transformerFactory.newTransformer(); transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); DOMSource source = new DOMSource(doc); StreamResult result = new StreamResult( new File(this.target.toString() + File.separator + "bag-info.xml")); transformer.transform(source, result); } catch (ParserConfigurationException ex) { Logger.getLogger(BackgroundWorker.class.getName()).log(Level.SEVERE, null, ex); } catch (TransformerConfigurationException ex) { Logger.getLogger(BackgroundWorker.class.getName()).log(Level.SEVERE, null, ex); } catch (TransformerException ex) { Logger.getLogger(BackgroundWorker.class.getName()).log(Level.SEVERE, null, ex); } catch (DOMException ex) { Logger.getLogger(BackgroundWorker.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:org.totschnig.myexpenses.util.Utils.java
/** * @param str/*from w w w . j ava 2 s. com*/ * @return a representation of str converted to lower case, Unicode * normalization applied and markers removed this allows * case-insentive comparison for non-ascii and non-latin strings works * only above Gingerbread, on Froyo only lower case transformation is * performed */ @SuppressLint({ "NewApi", "DefaultLocale" }) public static String normalize(String str) { str = str.toLowerCase(); if (Build.VERSION.SDK_INT < Build.VERSION_CODES.GINGERBREAD) { return str; } // Credits: http://stackoverflow.com/a/3322174/1199911 return Normalizer.normalize(str, Normalizer.Form.NFD).replaceAll("\\p{M}", ""); }
From source file:org.medici.bia.service.peoplebase.PeopleBaseServiceImpl.java
/** * {@inheritDoc}/* w w w .ja va 2 s . co m*/ */ @Transactional(readOnly = false, propagation = Propagation.REQUIRED) @Override public People editDetailsPerson(People person) throws ApplicationThrowable { try { User user = getCurrentUser(); People personToUpdate = getPeopleDAO().find(person.getPersonId()); Set<AltName> altNames = personToUpdate.getAltName(); AltName searchName = null; // fill fields to update person details section if (personToUpdate.getFirst() != null && !personToUpdate.getFirst().equals(person.getFirst())) { for (AltName current : altNames) { if (current.getAltName().equals(personToUpdate.getFirst()) && current.getNameType().equals(NameType.Given.toString())) { current.setAltName(person.getFirst()); getAltNameDAO().merge(current); break; } } } if (personToUpdate.getLast() != null && !personToUpdate.getLast().equals(person.getLast())) { for (AltName current : altNames) { if (current.getAltName().equals(personToUpdate.getLast()) && current.getNameType().equals(NameType.Family.toString())) { current.setAltName(person.getLast()); getAltNameDAO().merge(current); break; } } } if (!personToUpdate.getMapNameLf().equals(PersonUtils.generateMapNameLf(person))) { boolean found = false; String toCompare = personToUpdate.getMapNameLf(); toCompare = Normalizer.normalize(toCompare, Normalizer.Form.NFD); toCompare = toCompare.replaceAll("\\p{InCombiningDiacriticalMarks}+", ""); toCompare = toCompare.replace(",", ""); toCompare = toCompare.replace("(", ""); toCompare = toCompare.replace(")", ""); toCompare = toCompare.toUpperCase(); for (AltName current : altNames) { if (current.getAltName().equals(toCompare) && current.getNameType().equals(NameType.SearchName.toString())) { current.setAltName( Normalizer.normalize(PersonUtils.generateMapNameLf(person), Normalizer.Form.NFD)); current.setAltName( current.getAltName().replaceAll("\\p{InCombiningDiacriticalMarks}+", "")); current.setAltName(current.getAltName().replace(",", "")); current.setAltName(current.getAltName().replace("(", "")); current.setAltName(current.getAltName().replace(")", "")); current.setAltName(current.getAltName().toUpperCase()); found = true; getAltNameDAO().merge(current); break; } } if (!found) { searchName = new AltName(); searchName.setAltName( Normalizer.normalize(PersonUtils.generateMapNameLf(person), Normalizer.Form.NFD)); searchName.setAltName( searchName.getAltName().replaceAll("\\p{InCombiningDiacriticalMarks}+", "")); searchName.setAltName(searchName.getAltName().replace(",", "")); searchName.setAltName(searchName.getAltName().toUpperCase()); searchName.setNameType(NameType.SearchName.toString()); } } personToUpdate.setFirst(person.getFirst()); personToUpdate.setSucNum(person.getSucNum()); personToUpdate.setMidPrefix(person.getMidPrefix()); personToUpdate.setMiddle(person.getMiddle()); personToUpdate.setLastPrefix(person.getLastPrefix()); personToUpdate.setLast(person.getLast()); personToUpdate.setPostLastPrefix(person.getPostLastPrefix()); personToUpdate.setPostLast(person.getPostLast()); personToUpdate.setGender((!person.getGender().equals(People.Gender.NULL)) ? person.getGender() : null); //Update setMapNameLf personToUpdate.setMapNameLf(PersonUtils.generateMapNameLf(personToUpdate)); personToUpdate.setBornYear(person.getBornYear()); if (person.getBornMonth() != null) { personToUpdate.setBornMonth(getMonthDAO().find(person.getBornMonth().getMonthNum())); } else { personToUpdate.setBornMonth(null); } // Born Information personToUpdate.setBornDay(person.getBornDay()); personToUpdate.setBornDate(DateUtils.getLuceneDate(personToUpdate.getBornYear(), personToUpdate.getBornMonth(), personToUpdate.getBornDay())); personToUpdate.setBornApprox(person.getBornApprox()); personToUpdate.setBornDateBc(person.getBornDateBc()); if (!ObjectUtils.toString(person.getBornPlace()).equals("")) { personToUpdate.setBornPlace(getPlaceDAO().find(person.getBornPlace().getPlaceAllId())); if (personToUpdate.getBornPlace().getPrefFlag().equals("V")) { personToUpdate.setBornPlace( getPlaceDAO().findPrinicipalPlace(personToUpdate.getBornPlace().getGeogKey())); } } else { personToUpdate.setBornPlace(null); } personToUpdate.setBornPlaceUnsure(person.getBornPlaceUnsure()); if (ObjectUtils.toString(person.getActiveStart()).equals("")) { personToUpdate.setActiveStart(null); } else { personToUpdate.setActiveStart(person.getActiveStart()); } // Death Information personToUpdate.setDeathYear(person.getDeathYear()); if (person.getDeathMonth() != null) { personToUpdate.setDeathMonth(getMonthDAO().find(person.getDeathMonth().getMonthNum())); } else { personToUpdate.setDeathMonth(null); } personToUpdate.setDeathDay(person.getDeathDay()); personToUpdate.setDeathDate(DateUtils.getLuceneDate(personToUpdate.getDeathYear(), personToUpdate.getDeathMonth(), personToUpdate.getDeathDay())); personToUpdate.setDeathApprox(person.getDeathApprox()); personToUpdate.setDeathDateBc(person.getDeathDateBc()); if (!ObjectUtils.toString(person.getDeathPlace()).equals("")) { personToUpdate.setDeathPlace(getPlaceDAO().find(person.getDeathPlace().getPlaceAllId())); if (personToUpdate.getDeathPlace().getPrefFlag().equals("V")) { personToUpdate.setDeathPlace( getPlaceDAO().findPrinicipalPlace(personToUpdate.getDeathPlace().getGeogKey())); } } else { personToUpdate.setDeathPlace(null); } personToUpdate.setDeathPlaceUnsure(person.getDeathPlaceUnsure()); if (ObjectUtils.toString(person.getActiveEnd()).equals("")) { personToUpdate.setActiveEnd(null); } else { personToUpdate.setActiveEnd(person.getActiveEnd()); } personToUpdate.setLastUpdate(new Date()); personToUpdate.setLastUpdateBy(user); getPeopleDAO().merge(personToUpdate); if (searchName != null) { searchName.setPerson(personToUpdate); getAltNameDAO().persist(searchName); } getUserHistoryDAO() .persist(new UserHistory(user, "Edit details", Action.MODIFY, Category.PEOPLE, personToUpdate)); getVettingHistoryDAO().persist( new VettingHistory(user, "Edit details", org.medici.bia.domain.VettingHistory.Action.MODIFY, org.medici.bia.domain.VettingHistory.Category.PEOPLE, personToUpdate)); return personToUpdate; } catch (Throwable th) { throw new ApplicationThrowable(th); } }
From source file:info.ajaxplorer.synchro.SyncJob.java
protected String normalizeUnicode(String str) { Normalizer.Form form = Normalizer.Form.NFD; if (!Normalizer.isNormalized(str, form)) { return Normalizer.normalize(str, form); }//ww w .j ava 2 s . com return str; }
From source file:edu.ucsd.library.xdre.imports.RDFDAMS4ImportTsHandler.java
private String encodeLiteralValue(Node node) { String value = null;//from www.j ava 2 s.c o m if (node != null) { value = "\"" + StringEscapeUtils.escapeJava(Normalizer.normalize(node.getText(), Normalizer.Form.NFC)) .replace("'", "\\'").replace("\"", "\\\"") + "\""; Node langAttr = node.selectSingleNode("@xml:lang"); if (langAttr != null) value += "@" + langAttr.getStringValue(); } return value; }