Here you can find the source of removeDiacritics(String word)
public static String removeDiacritics(String word)
//package com.java2s; /*/*from www . j av a2s . c om*/ Copyright 2009-2013 The MITRE Corporation. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. * ************************************************************************** * NOTICE * This software was produced for the U. S. Government under Contract No. * W15P7T-12-C-F600, and is subject to the Rights in Noncommercial Computer * Software and Noncommercial Computer Software Documentation Clause * 252.227-7014 (JUN 1995) * * (c) 2012 The MITRE Corporation. All Rights Reserved. * ************************************************************************** **/ import java.text.Normalizer; public class Main { /** * Remove diacritics from word. */ public static String removeDiacritics(String word) { // first, fully decomposed all chars String tmpWord = Normalizer.normalize(word, Normalizer.Form.NFD); StringBuilder newWord = new StringBuilder(); char[] chars = tmpWord.toCharArray(); // now, discard any characters from one of the "Mark" categories. for (char c : chars) { if (Character.getType(c) != Character.NON_SPACING_MARK && Character.getType(c) != Character.COMBINING_SPACING_MARK && Character.getType(c) != Character.ENCLOSING_MARK) { newWord.append(c); } } return newWord.toString(); } }