Here you can find the source of sanitizeUnicode(String str)
public static String sanitizeUnicode(String str)
//package com.java2s; //License from project: LGPL public class Main { public static String sanitizeUnicode(String str) { if (str == null) { return null; }/* w w w.j ava 2 s.co m*/ StringBuilder buf = new StringBuilder(); int len = str.length(); for (int i = 0; i < len; i++) { char ch = str.charAt(i); if (Character.isHighSurrogate(ch) || Character.isLowSurrogate(ch)) { buf.append(ch); } else { switch (ch) { case '\u2013': // EN Dash -> HHyphen-Minus ch = '-'; break; case '\u2015': // Horizontal Bar -> EM Dash ch = '\u2014'; break; case '\u2225': // Parallel To -> Double Vertical Line ch = '\u2016'; break; case '\uff0d': // Fullwidth Hyphen-Minus -> Minus Sign ch = '\u2212'; break; case '\uff5e': // Fullwidth Tilde -> Wave Dash ch = '\u301c'; break; case '\uffe0': // Fullwidth Cent Sign -> Cent Sign ch = '\u00a2'; break; case '\uffe1': // Fullwidth Pound Sign -> Pound Sign ch = '\u00a3'; break; case '\uffe2': // Fullwidth Not Sign -> Not Sign ch = '\u00ac'; break; default: break; } buf.append(ch); } } return buf.toString(); } }