Here you can find the source of removeAccentsAndNonStandardCharacters(String string)
public static String removeAccentsAndNonStandardCharacters(String string)
//package com.java2s; /*/*w ww . j a v a2s. c o m*/ * Copyright (c) 2008 - Tomas Janecek. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ import java.lang.reflect.Method; import java.util.regex.Pattern; public class Main { private static final Pattern CLEANUP_PATTERN = Pattern.compile("(\\p{InCombiningDiacriticalMarks}+)|(\\W)"); private static Method NORMALIZE_METHOD; private static Object NFD_VALUE; public static String removeAccentsAndNonStandardCharacters(String string) { String normalizedText = string; // Normalize text using a Normalizer if possible if (NORMALIZE_METHOD != null) { try { // Invoke the bellow via reflection // java.text.Normalizer.normalize(string, java.text.Normalizer.Form.NFD); // Normalize text normalizedText = (String) NORMALIZE_METHOD.invoke(null, string, NFD_VALUE); } catch (Exception ex) { normalizedText = string; } } // Update pattern. return CLEANUP_PATTERN.matcher(normalizedText).replaceAll("").toLowerCase(); } }