Java String Accent stripAccents(String input)

Here you can find the source of stripAccents(String input)

Description

Removes diacritics (~= accents) from a string.

License

Apache License

Parameter

Parameter Description
input String to be stripped

Return

input text with diacritics removed

Declaration




public static String stripAccents(String input) 

Method Source Code

//package com.java2s;
//License from project: Apache License 

import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;

import java.util.regex.Pattern;

public class Main {
    private static boolean sunAvailable = false;
    private static Method sunDecomposeMethod = null;
    private static final Pattern sunPattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
    private static boolean java6Available = false;
    private static Method java6NormalizeMethod = null;
    private static Object java6NormalizerFormNFD = null;
    private static final Pattern java6Pattern = sunPattern;

    /**//  w w w  .j  a  v  a 2 s. c o m
      * <p>
      * Removes diacritics (~= accents) from a string. The case will not be
      * altered.
      * </p>
      * <p>
      * For instance, '&agrave;' will be replaced by 'a'.
      * </p>
      * <p>
      * Note that ligatures will be left as is.
      * </p>
      * 
      * <p>
      * This method will use the first available implementation of: Java 6's
      * {@link java.text.Normalizer}, Java 1.3&ndash;1.5's
      * {@code sun.text.Normalizer}
      * </p>
      * 
      * <pre>
      * StringUtils.stripAccents(null)                = null
      * StringUtils.stripAccents("")                  = ""
      * StringUtils.stripAccents("control")           = "control"
      * StringUtils.stripAccents("&eacute;clair")     = "eclair"
      * </pre>
      * 
      * @param input
      *            String to be stripped
      * @return input text with diacritics removed
      * 
      * @since 3.0
      */
    // See also Lucene's ASCIIFoldingFilter (Lucene 2.9) that replaces accented
    // characters by their unaccented equivalent (and uncommitted bug fix:
    // https://issues.apache.org/jira/browse/LUCENE-1343?focusedCommentId=12858907&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#action_12858907).
    public static String stripAccents(String input) {
        if (input == null) {
            return null;
        }
        try {
            String result = null;
            if (java6Available) {
                result = removeAccentsJava6(input);
            } else if (sunAvailable) {
                result = removeAccentsSUN(input);
            } else {
                throw new UnsupportedOperationException(
                        "The stripAccents(CharSequence) method requires at least Java 1.6 or a Sun JVM");
            }
            // Note that none of the above methods correctly remove ligatures...
            return result;
        } catch (IllegalArgumentException iae) {
            throw new RuntimeException("IllegalArgumentException occurred", iae);
        } catch (IllegalAccessException iae) {
            throw new RuntimeException("IllegalAccessException occurred", iae);
        } catch (InvocationTargetException ite) {
            throw new RuntimeException("InvocationTargetException occurred", ite);
        } catch (SecurityException se) {
            throw new RuntimeException("SecurityException occurred", se);
        }
    }

    /**
      * Use {@code java.text.Normalizer#normalize(CharSequence, Normalizer.Form)}
      * (but be careful, this class exists in Java 1.3, with an entirely
      * different meaning!)
      * 
      * @param text
      *            the text to be processed
      * @return the processed string
      * @throws IllegalAccessException
      *             may be thrown by a reflection call
      * @throws InvocationTargetException
      *             if a reflection call throws an exception
      * @throws IllegalStateException
      *             if the {@code Normalizer} class is not available
      */
    private static String removeAccentsJava6(CharSequence text)
            throws IllegalAccessException, InvocationTargetException {
        /*
         * String decomposed = java.text.Normalizer.normalize(CharSequence,
         * Normalizer.Form.NFD); return
         * java6Pattern.matcher(decomposed).replaceAll("");//$NON-NLS-1$
         */
        if (!java6Available || java6NormalizerFormNFD == null) {
            throw new IllegalStateException("java.text.Normalizer is not available");
        }
        String result;
        result = (String) java6NormalizeMethod.invoke(null, new Object[] { text, java6NormalizerFormNFD });
        result = java6Pattern.matcher(result).replaceAll("");//$NON-NLS-1$
        return result;
    }

    /**
      * Use {@code sun.text.Normalizer#decompose(String, boolean, int)}
      * 
      * @param text
      *            the text to be processed
      * @return the processed string
      * @throws IllegalAccessException
      *             may be thrown by a reflection call
      * @throws InvocationTargetException
      *             if a reflection call throws an exception
      * @throws IllegalStateException
      *             if the {@code Normalizer} class is not available
      */
    private static String removeAccentsSUN(CharSequence text)
            throws IllegalAccessException, InvocationTargetException {
        /*
         * String decomposed = sun.text.Normalizer.decompose(text, false, 0);
         * return sunPattern.matcher(decomposed).replaceAll("");//$NON-NLS-1$
         */
        if (!sunAvailable) {
            throw new IllegalStateException("sun.text.Normalizer is not available");
        }
        String result;
        result = (String) sunDecomposeMethod.invoke(null, new Object[] { text, Boolean.FALSE, Integer.valueOf(0) });
        result = sunPattern.matcher(result).replaceAll("");//$NON-NLS-1$
        return result;
    }
}

Related

  1. replaceAccentedChars(StringBuilder buffer)
  2. replaceAccents(String string)
  3. stripAccents(final String input)
  4. stripAccents(final String input)
  5. stripAccents(final String s)
  6. stripAccents(String input)
  7. stripAccents(String input)
  8. stripAccents(String v)
  9. stripAccentsToLowerCase(String str)