Example usage for java.text Normalizer normalize

List of usage examples for java.text Normalizer normalize

Introduction

In this page you can find the example usage for java.text Normalizer normalize.

Prototype

public static String normalize(CharSequence src, Form form) 

Source Link

Document

Normalize a sequence of char values.

Usage

From source file:org.opensextant.util.TextUtils.java

/**
 * Normalize to "Normalization Form Canonical Decomposition" (NFD) REF:
 * http:/*  ww w .  j a  v a 2s.co m*/
 * //stackoverflow.com/questions/3610013/file-listfiles-mangles-unicode-
 * names-with-jdk-6-unicode-normalization-issues This supports proper file
 * name retrieval from file system, among other things. In many situations
 * we see unicode file names -- Java can list them, but in using the
 * Java-provided version of the filename the OS/FS may not be able to find
 * the file by the name given in a particular normalized form.
 *
 * @param str
 *            text
 * @return normalized string, encoded with NFD bytes
 */
public static String normalizeUnicode(String str) {
    Normalizer.Form form = Normalizer.Form.NFD;
    if (!Normalizer.isNormalized(str, form)) {
        return Normalizer.normalize(str, form);
    }
    return str;
}

From source file:bfile.util.StringUtils.java

/**
 * <p>Removes diacritics (~= accents) from a string. The case will not be altered.</p>
 * <p>For instance, '&agrave;' will be replaced by 'a'.</p>
 * <p>Note that ligatures will be left as is.</p>
 *
 * <pre>//from  ww w. ja  v a 2s  . c  o  m
 * StringUtils.stripAccents(null)                = null
 * StringUtils.stripAccents("")                  = ""
 * StringUtils.stripAccents("control")           = "control"
 * StringUtils.stripAccents("&eacute;clair")     = "eclair"
 * </pre>
 *
 * @param input String to be stripped
 * @return input text with diacritics removed
 *
 * @since 3.0
 */
// See also Lucene's ASCIIFoldingFilter (Lucene 2.9) that replaces accented characters by their unaccented equivalent (and uncommitted bug fix: https://issues.apache.org/jira/browse/LUCENE-1343?focusedCommentId=12858907&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#action_12858907).
public static String stripAccents(final String input) {
    if (input == null) {
        return null;
    }
    final Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");//$NON-NLS-1$
    final StringBuilder decomposed = new StringBuilder(Normalizer.normalize(input, Normalizer.Form.NFD));
    convertRemainingAccentCharacters(decomposed);
    // Note that this doesn't correctly remove ligatures...
    return pattern.matcher(decomposed).replaceAll(StringUtils.EMPTY);
}

From source file:com.rdm.common.util.StringUtils.java

/**
 * <p>Removes diacritics (~= accents) from a string. The case will not be altered.</p>
 * <p>For instance, '&agrave;' will be replaced by 'a'.</p>
 * <p>Note that ligatures will be left as is.</p>
 *
 * <pre>//ww w . j ava  2s  .  c o  m
 * StringUtils.stripAccents(null)                = null
 * StringUtils.stripAccents("")                  = ""
 * StringUtils.stripAccents("control")           = "control"
 * StringUtils.stripAccents("&eacute;clair")     = "eclair"
 * </pre>
 *
 * @param input String to be stripped
 * @return input text with diacritics removed
 *
 * @since 3.0
 */
// See also Lucene's ASCIIFoldingFilter (Lucene 2.9) that replaces accented characters by their unaccented equivalent (and uncommitted bug fix: https://issues.apache.org/jira/browse/LUCENE-1343?focusedCommentId=12858907&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#action_12858907).
public static String stripAccents(final String input) {
    if (input == null) {
        return null;
    }
    final Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");//$NON-NLS-1$
    final String decomposed = Normalizer.normalize(input, Normalizer.Form.NFD);
    // Note that this doesn't correctly remove ligatures...
    return pattern.matcher(decomposed).replaceAll("");//$NON-NLS-1$
}

From source file:example.rest.ApiREST.java

public static String stripAccents(String s) {
    s = Normalizer.normalize(s, Normalizer.Form.NFD);
    s = s.replaceAll("[\\p{InCombiningDiacriticalMarks}]", "");
    return s;//from ww w. ja v a  2s. c  o  m
}

From source file:org.exoplatform.utils.ExoDocumentUtils.java

/**
 * On Platform 4.1-M2, the upload service renames the uploaded file. Therefore
 * the link to this file in the activity becomes incorrect. To fix this, we
 * rename the file before upload so the same name is used in the activity.
 * //from  w  w w. j  a  v  a2  s.c o  m
 * @param originalName the name to clean
 * @return a String without forbidden characters
 */
public static String cleanupFilename(String originalName) {
    final String TILDE_HYPHENS_COLONS_SPACES = "[~_:\\s]";
    final String MULTIPLE_HYPHENS = "-{2,}";
    final String FORBIDDEN_CHARS = "[`!@#\\$%\\^&\\*\\|;\"'<>/\\\\\\[\\]\\{\\}\\(\\)\\?,=\\+\\.]+";
    String name = originalName;
    String ext = "";
    int lastDot = name.lastIndexOf('.');
    if (lastDot > 0 && lastDot < name.length()) {
        ext = name.substring(lastDot); // the ext with the dot
        name = name.substring(0, lastDot); // the name before the ext
    }
    // [~_:\s] Replaces ~ _ : and spaces by -
    name = Pattern.compile(TILDE_HYPHENS_COLONS_SPACES).matcher(name).replaceAll("-");
    // [`!@#\$%\^&\*\|;"'<>/\\\[\]\{\}\(\)\?,=\+\.]+ Deletes forbidden chars
    name = Pattern.compile(FORBIDDEN_CHARS).matcher(name).replaceAll("");
    // Converts accents to regular letters
    name = Normalizer.normalize(name, Normalizer.Form.NFD).replaceAll("[^\\p{ASCII}]", "");
    // Replaces upper case characters by lower case
    // Locale loc = new
    // Locale(SettingUtils.getPrefsLanguage(getApplicationContext()));
    name = name.toLowerCase(Locale.getDefault());
    // Remove consecutive -
    name = Pattern.compile(MULTIPLE_HYPHENS).matcher(name).replaceAll("-");
    // Save
    return (name + ext);
}

From source file:uk.sipperfly.ui.BackgroundWorker.java

/**
 * create bag-info.xml file at transfer destination
 *//*from   w w  w  .j  ava  2s  .  co m*/
public void createXML(String payload, String date, String size) {
    try {
        char[] charArray = { '<', '>', '&', '"', '\\', '!', '#', '$', '%', '\'', '(', ')', '*', '.', ':', '+',
                ',', '/', ';', '=', '?', '@', '[', ']', '^', '`', '{', '|', '}', '~' };
        //      List<char[]> asList = Arrays.asList(charArray);
        DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
        Document doc = docBuilder.newDocument();
        Element rootElement = doc.createElement("transfer_metadata");
        doc.appendChild(rootElement);

        Attr attr1 = doc.createAttribute("xmlns:xsi");
        attr1.setValue("http://www.w3.org/2001/XMLSchema-instance");
        rootElement.setAttributeNode(attr1);

        Element payLoad = doc.createElement("Payload-Oxum");
        payLoad.appendChild(doc.createTextNode(payload));
        rootElement.appendChild(payLoad);

        Element baggingDate = doc.createElement("Bagging-Date");
        baggingDate.appendChild(doc.createTextNode(date));
        rootElement.appendChild(baggingDate);

        Element bagsize = doc.createElement("Bag-Size");
        bagsize.appendChild(doc.createTextNode(size));
        rootElement.appendChild(bagsize);

        BagInfoRepo bagInfoRepo = new BagInfoRepo();
        List<BagInfo> bagInfo = bagInfoRepo.getOneOrCreateOne();

        for (BagInfo b : bagInfo) {
            StringBuilder stringBuilder = new StringBuilder();
            char[] txt = Normalizer.normalize(b.getLabel(), Normalizer.Form.NFD).toCharArray();
            for (int i = 0; i < b.getLabel().length(); i++) {
                int check = 0;
                for (int j = 0; j < charArray.length; j++) {
                    if (txt[i] == charArray[j]) {
                        check = 1;

                    }
                }
                if (check == 0) {
                    stringBuilder.append(txt[i]);
                }
            }
            Element firstname = doc.createElement(stringBuilder.toString().replace(" ", "-"));
            firstname.appendChild(doc.createTextNode(b.getValue().trim()));
            rootElement.appendChild(firstname);

        }
        // write the content into xml file
        TransformerFactory transformerFactory = TransformerFactory.newInstance();

        Transformer transformer = transformerFactory.newTransformer();
        transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
        DOMSource source = new DOMSource(doc);
        StreamResult result = new StreamResult(
                new File(this.target.toString() + File.separator + "bag-info.xml"));
        transformer.transform(source, result);
    } catch (ParserConfigurationException ex) {
        Logger.getLogger(BackgroundWorker.class.getName()).log(Level.SEVERE, null, ex);
    } catch (TransformerConfigurationException ex) {
        Logger.getLogger(BackgroundWorker.class.getName()).log(Level.SEVERE, null, ex);
    } catch (TransformerException ex) {
        Logger.getLogger(BackgroundWorker.class.getName()).log(Level.SEVERE, null, ex);
    } catch (DOMException ex) {
        Logger.getLogger(BackgroundWorker.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:org.totschnig.myexpenses.util.Utils.java

/**
 * @param str/*from   w  w w . j ava 2  s.  com*/
 * @return a representation of str converted to lower case, Unicode
 *         normalization applied and markers removed this allows
 *         case-insentive comparison for non-ascii and non-latin strings works
 *         only above Gingerbread, on Froyo only lower case transformation is
 *         performed
 */
@SuppressLint({ "NewApi", "DefaultLocale" })
public static String normalize(String str) {
    str = str.toLowerCase();
    if (Build.VERSION.SDK_INT < Build.VERSION_CODES.GINGERBREAD) {
        return str;
    }
    // Credits: http://stackoverflow.com/a/3322174/1199911
    return Normalizer.normalize(str, Normalizer.Form.NFD).replaceAll("\\p{M}", "");
}

From source file:org.medici.bia.service.peoplebase.PeopleBaseServiceImpl.java

/**
 * {@inheritDoc}/* w w w .ja va  2 s . co m*/
 */
@Transactional(readOnly = false, propagation = Propagation.REQUIRED)
@Override
public People editDetailsPerson(People person) throws ApplicationThrowable {
    try {
        User user = getCurrentUser();

        People personToUpdate = getPeopleDAO().find(person.getPersonId());

        Set<AltName> altNames = personToUpdate.getAltName();
        AltName searchName = null;

        // fill fields to update person details section
        if (personToUpdate.getFirst() != null && !personToUpdate.getFirst().equals(person.getFirst())) {
            for (AltName current : altNames) {
                if (current.getAltName().equals(personToUpdate.getFirst())
                        && current.getNameType().equals(NameType.Given.toString())) {
                    current.setAltName(person.getFirst());
                    getAltNameDAO().merge(current);
                    break;
                }
            }
        }

        if (personToUpdate.getLast() != null && !personToUpdate.getLast().equals(person.getLast())) {
            for (AltName current : altNames) {
                if (current.getAltName().equals(personToUpdate.getLast())
                        && current.getNameType().equals(NameType.Family.toString())) {
                    current.setAltName(person.getLast());
                    getAltNameDAO().merge(current);
                    break;
                }
            }
        }

        if (!personToUpdate.getMapNameLf().equals(PersonUtils.generateMapNameLf(person))) {
            boolean found = false;
            String toCompare = personToUpdate.getMapNameLf();
            toCompare = Normalizer.normalize(toCompare, Normalizer.Form.NFD);
            toCompare = toCompare.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
            toCompare = toCompare.replace(",", "");
            toCompare = toCompare.replace("(", "");
            toCompare = toCompare.replace(")", "");
            toCompare = toCompare.toUpperCase();
            for (AltName current : altNames) {
                if (current.getAltName().equals(toCompare)
                        && current.getNameType().equals(NameType.SearchName.toString())) {
                    current.setAltName(
                            Normalizer.normalize(PersonUtils.generateMapNameLf(person), Normalizer.Form.NFD));
                    current.setAltName(
                            current.getAltName().replaceAll("\\p{InCombiningDiacriticalMarks}+", ""));
                    current.setAltName(current.getAltName().replace(",", ""));
                    current.setAltName(current.getAltName().replace("(", ""));
                    current.setAltName(current.getAltName().replace(")", ""));
                    current.setAltName(current.getAltName().toUpperCase());
                    found = true;
                    getAltNameDAO().merge(current);
                    break;
                }

            }
            if (!found) {
                searchName = new AltName();
                searchName.setAltName(
                        Normalizer.normalize(PersonUtils.generateMapNameLf(person), Normalizer.Form.NFD));
                searchName.setAltName(
                        searchName.getAltName().replaceAll("\\p{InCombiningDiacriticalMarks}+", ""));
                searchName.setAltName(searchName.getAltName().replace(",", ""));
                searchName.setAltName(searchName.getAltName().toUpperCase());
                searchName.setNameType(NameType.SearchName.toString());
            }
        }

        personToUpdate.setFirst(person.getFirst());
        personToUpdate.setSucNum(person.getSucNum());
        personToUpdate.setMidPrefix(person.getMidPrefix());
        personToUpdate.setMiddle(person.getMiddle());
        personToUpdate.setLastPrefix(person.getLastPrefix());
        personToUpdate.setLast(person.getLast());
        personToUpdate.setPostLastPrefix(person.getPostLastPrefix());
        personToUpdate.setPostLast(person.getPostLast());
        personToUpdate.setGender((!person.getGender().equals(People.Gender.NULL)) ? person.getGender() : null);
        //Update setMapNameLf
        personToUpdate.setMapNameLf(PersonUtils.generateMapNameLf(personToUpdate));

        personToUpdate.setBornYear(person.getBornYear());
        if (person.getBornMonth() != null) {
            personToUpdate.setBornMonth(getMonthDAO().find(person.getBornMonth().getMonthNum()));
        } else {
            personToUpdate.setBornMonth(null);
        }

        // Born Information
        personToUpdate.setBornDay(person.getBornDay());
        personToUpdate.setBornDate(DateUtils.getLuceneDate(personToUpdate.getBornYear(),
                personToUpdate.getBornMonth(), personToUpdate.getBornDay()));
        personToUpdate.setBornApprox(person.getBornApprox());
        personToUpdate.setBornDateBc(person.getBornDateBc());
        if (!ObjectUtils.toString(person.getBornPlace()).equals("")) {
            personToUpdate.setBornPlace(getPlaceDAO().find(person.getBornPlace().getPlaceAllId()));
            if (personToUpdate.getBornPlace().getPrefFlag().equals("V")) {
                personToUpdate.setBornPlace(
                        getPlaceDAO().findPrinicipalPlace(personToUpdate.getBornPlace().getGeogKey()));
            }
        } else {
            personToUpdate.setBornPlace(null);
        }
        personToUpdate.setBornPlaceUnsure(person.getBornPlaceUnsure());

        if (ObjectUtils.toString(person.getActiveStart()).equals("")) {
            personToUpdate.setActiveStart(null);
        } else {
            personToUpdate.setActiveStart(person.getActiveStart());
        }

        // Death Information
        personToUpdate.setDeathYear(person.getDeathYear());
        if (person.getDeathMonth() != null) {
            personToUpdate.setDeathMonth(getMonthDAO().find(person.getDeathMonth().getMonthNum()));
        } else {
            personToUpdate.setDeathMonth(null);
        }
        personToUpdate.setDeathDay(person.getDeathDay());
        personToUpdate.setDeathDate(DateUtils.getLuceneDate(personToUpdate.getDeathYear(),
                personToUpdate.getDeathMonth(), personToUpdate.getDeathDay()));

        personToUpdate.setDeathApprox(person.getDeathApprox());
        personToUpdate.setDeathDateBc(person.getDeathDateBc());
        if (!ObjectUtils.toString(person.getDeathPlace()).equals("")) {
            personToUpdate.setDeathPlace(getPlaceDAO().find(person.getDeathPlace().getPlaceAllId()));
            if (personToUpdate.getDeathPlace().getPrefFlag().equals("V")) {
                personToUpdate.setDeathPlace(
                        getPlaceDAO().findPrinicipalPlace(personToUpdate.getDeathPlace().getGeogKey()));
            }
        } else {
            personToUpdate.setDeathPlace(null);
        }
        personToUpdate.setDeathPlaceUnsure(person.getDeathPlaceUnsure());

        if (ObjectUtils.toString(person.getActiveEnd()).equals("")) {
            personToUpdate.setActiveEnd(null);
        } else {
            personToUpdate.setActiveEnd(person.getActiveEnd());
        }

        personToUpdate.setLastUpdate(new Date());
        personToUpdate.setLastUpdateBy(user);

        getPeopleDAO().merge(personToUpdate);

        if (searchName != null) {
            searchName.setPerson(personToUpdate);
            getAltNameDAO().persist(searchName);
        }

        getUserHistoryDAO()
                .persist(new UserHistory(user, "Edit details", Action.MODIFY, Category.PEOPLE, personToUpdate));
        getVettingHistoryDAO().persist(
                new VettingHistory(user, "Edit details", org.medici.bia.domain.VettingHistory.Action.MODIFY,
                        org.medici.bia.domain.VettingHistory.Category.PEOPLE, personToUpdate));

        return personToUpdate;
    } catch (Throwable th) {
        throw new ApplicationThrowable(th);
    }
}

From source file:info.ajaxplorer.synchro.SyncJob.java

protected String normalizeUnicode(String str) {
    Normalizer.Form form = Normalizer.Form.NFD;
    if (!Normalizer.isNormalized(str, form)) {
        return Normalizer.normalize(str, form);
    }//ww  w .j  ava 2 s  . com
    return str;
}

From source file:edu.ucsd.library.xdre.imports.RDFDAMS4ImportTsHandler.java

private String encodeLiteralValue(Node node) {
    String value = null;//from  www.j ava  2 s.c  o m
    if (node != null) {
        value = "\"" + StringEscapeUtils.escapeJava(Normalizer.normalize(node.getText(), Normalizer.Form.NFC))
                .replace("'", "\\'").replace("\"", "\\\"") + "\"";
        Node langAttr = node.selectSingleNode("@xml:lang");
        if (langAttr != null)
            value += "@" + langAttr.getStringValue();
    }
    return value;
}