Here you can find the source of removeAccents(String input)
Parameter | Description |
---|---|
input | the string possibly containing accented letters. |
public static String removeAccents(String input)
//package com.java2s; /******************************************************************************* * Copyright (c) 2010, 2012 Institute for Dutch Lexicology * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License./* w w w . j a v a 2s . c o m*/ *******************************************************************************/ import java.text.Normalizer; import java.util.regex.Pattern; public class Main { /** * Matches Unicode diacritics composition characters, which are separated out by the Normalizer * and then discarded using this regex. */ private static final Pattern PATT_DIACRITICS = Pattern.compile("\\p{InCombiningDiacriticalMarks}+"); /** * Convert accented letters to their unaccented counterparts. * * @param input * the string possibly containing accented letters. * @return the unaccented version */ public static String removeAccents(String input) { // Separate characters into base character and diacritics characters String normalized = Normalizer.normalize(input, Normalizer.Form.NFD); // Remove diacritics return PATT_DIACRITICS.matcher(normalized).replaceAll(""); } }