Here you can find the source of stripAccents(final String input)
Removes diacritics (~= accents) from a string.
Parameter | Description |
---|---|
input | String to be stripped |
public static String stripAccents(final String input)
//package com.java2s; /*/* ww w . ja v a 2 s . co m*/ * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.text.Normalizer; import java.util.regex.Pattern; public class Main { /** * <p> * Removes diacritics (~= accents) from a string. The case will not be * altered. * </p> * <p> * For instance, 'à' will be replaced by 'a'. * </p> * <p> * Note that ligatures will be left as is. * </p> * * <pre> * StringUtils.stripAccents(null) = null * StringUtils.stripAccents("") = "" * StringUtils.stripAccents("control") = "control" * StringUtils.stripAccents("éclair") = "eclair" * </pre> * * @param input * String to be stripped * @return input text with diacritics removed * * @since 3.0 */ // See also Lucene's ASCIIFoldingFilter (Lucene 2.9) that replaces accented characters by their unaccented equivalent (and uncommitted bug fix: https://issues.apache.org/jira/browse/LUCENE-1343?focusedCommentId=12858907&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#action_12858907). public static String stripAccents(final String input) { if (input == null) { return null; } final Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");//$NON-NLS-1$ final String decomposed = Normalizer.normalize(input, Normalizer.Form.NFD); // Note that this doesn't correctly remove ligatures... return pattern.matcher(decomposed).replaceAll("");//$NON-NLS-1$ } public static String normalize(String s) { if (s == null) return null; if (s.length() == 0) return s; return s.toLowerCase().trim(); } /** * Gets a CharSequence length or {@code 0} if the CharSequence is * {@code null}. * * @param cs * a CharSequence or {@code null} * @return CharSequence length or {@code 0} if the CharSequence is * {@code null}. * @since 2.4 * @since 3.0 Changed signature from length(String) to length(CharSequence) */ public static int length(final CharSequence cs) { return cs == null ? 0 : cs.length(); } /** * <p> * Removes control characters (char <= 32) from both ends of this String, * handling {@code null} by returning {@code null}. * </p> * * <p> * The String is trimmed using {@link String#trim()}. Trim removes start and * end characters <= 32. To strip whitespace use {@link #strip(String)}. * </p> * * <p> * To trim your choice of characters, use the {@link #strip(String, String)} * methods. * </p> * * <pre> * StringUtils.trim(null) = null * StringUtils.trim("") = "" * StringUtils.trim(" ") = "" * StringUtils.trim("abc") = "abc" * StringUtils.trim(" abc ") = "abc" * </pre> * * @param str * the String to be trimmed, may be null * @return the trimmed string, {@code null} if null String input */ public static String trim(final String str) { return str == null ? null : str.trim(); } }