Here you can find the source of stripAccents(String input)
Removes diacritics (~= accents) from a string.
Parameter | Description |
---|---|
input | String to be stripped |
public static String stripAccents(String input)
//package com.java2s; //License from project: Apache License import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.util.regex.Pattern; public class Main { private static boolean sunAvailable = false; private static Method sunDecomposeMethod = null; private static final Pattern sunPattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+"); private static boolean java6Available = false; private static Method java6NormalizeMethod = null; private static Object java6NormalizerFormNFD = null; private static final Pattern java6Pattern = sunPattern; /**// w w w .j a v a 2 s. c o m * <p> * Removes diacritics (~= accents) from a string. The case will not be * altered. * </p> * <p> * For instance, 'à' will be replaced by 'a'. * </p> * <p> * Note that ligatures will be left as is. * </p> * * <p> * This method will use the first available implementation of: Java 6's * {@link java.text.Normalizer}, Java 1.3–1.5's * {@code sun.text.Normalizer} * </p> * * <pre> * StringUtils.stripAccents(null) = null * StringUtils.stripAccents("") = "" * StringUtils.stripAccents("control") = "control" * StringUtils.stripAccents("éclair") = "eclair" * </pre> * * @param input * String to be stripped * @return input text with diacritics removed * * @since 3.0 */ // See also Lucene's ASCIIFoldingFilter (Lucene 2.9) that replaces accented // characters by their unaccented equivalent (and uncommitted bug fix: // https://issues.apache.org/jira/browse/LUCENE-1343?focusedCommentId=12858907&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#action_12858907). public static String stripAccents(String input) { if (input == null) { return null; } try { String result = null; if (java6Available) { result = removeAccentsJava6(input); } else if (sunAvailable) { result = removeAccentsSUN(input); } else { throw new UnsupportedOperationException( "The stripAccents(CharSequence) method requires at least Java 1.6 or a Sun JVM"); } // Note that none of the above methods correctly remove ligatures... return result; } catch (IllegalArgumentException iae) { throw new RuntimeException("IllegalArgumentException occurred", iae); } catch (IllegalAccessException iae) { throw new RuntimeException("IllegalAccessException occurred", iae); } catch (InvocationTargetException ite) { throw new RuntimeException("InvocationTargetException occurred", ite); } catch (SecurityException se) { throw new RuntimeException("SecurityException occurred", se); } } /** * Use {@code java.text.Normalizer#normalize(CharSequence, Normalizer.Form)} * (but be careful, this class exists in Java 1.3, with an entirely * different meaning!) * * @param text * the text to be processed * @return the processed string * @throws IllegalAccessException * may be thrown by a reflection call * @throws InvocationTargetException * if a reflection call throws an exception * @throws IllegalStateException * if the {@code Normalizer} class is not available */ private static String removeAccentsJava6(CharSequence text) throws IllegalAccessException, InvocationTargetException { /* * String decomposed = java.text.Normalizer.normalize(CharSequence, * Normalizer.Form.NFD); return * java6Pattern.matcher(decomposed).replaceAll("");//$NON-NLS-1$ */ if (!java6Available || java6NormalizerFormNFD == null) { throw new IllegalStateException("java.text.Normalizer is not available"); } String result; result = (String) java6NormalizeMethod.invoke(null, new Object[] { text, java6NormalizerFormNFD }); result = java6Pattern.matcher(result).replaceAll("");//$NON-NLS-1$ return result; } /** * Use {@code sun.text.Normalizer#decompose(String, boolean, int)} * * @param text * the text to be processed * @return the processed string * @throws IllegalAccessException * may be thrown by a reflection call * @throws InvocationTargetException * if a reflection call throws an exception * @throws IllegalStateException * if the {@code Normalizer} class is not available */ private static String removeAccentsSUN(CharSequence text) throws IllegalAccessException, InvocationTargetException { /* * String decomposed = sun.text.Normalizer.decompose(text, false, 0); * return sunPattern.matcher(decomposed).replaceAll("");//$NON-NLS-1$ */ if (!sunAvailable) { throw new IllegalStateException("sun.text.Normalizer is not available"); } String result; result = (String) sunDecomposeMethod.invoke(null, new Object[] { text, Boolean.FALSE, Integer.valueOf(0) }); result = sunPattern.matcher(result).replaceAll("");//$NON-NLS-1$ return result; } }