List of usage examples for java.text Normalizer normalize
public static String normalize(CharSequence src, Form form)
From source file:module.entities.UsernameChecker.CheckOpengovUsernames.java
/** * @param args the command line arguments *///from w w w . j av a2 s. c om public static void main(String[] args) throws SQLException, IOException { // args = new String[1]; // args[0] = "searchConf.txt"; Date d = new Date(); long milTime = d.getTime(); long execStart = System.nanoTime(); Timestamp startTime = new Timestamp(milTime); long lStartTime; long lEndTime = 0; int status_id = 1; JSONObject obj = new JSONObject(); if (args.length != 1) { System.out.println("None or too many argument parameters where defined! " + "\nPlease provide ONLY the configuration file name as the only argument."); } else { try { configFile = args[0]; initLexicons(); Database.init(); lStartTime = System.currentTimeMillis(); System.out.println("Opengov username identification process started at: " + startTime); usernameCheckerId = Database.LogUsernameChecker(lStartTime); TreeMap<Integer, String> OpenGovUsernames = Database.GetOpenGovUsers(); HashSet<ReportEntry> report_names = new HashSet<>(); if (OpenGovUsernames.size() > 0) { for (int userID : OpenGovUsernames.keySet()) { String DBusername = Normalizer .normalize(OpenGovUsernames.get(userID).toUpperCase(locale), Normalizer.Form.NFD) .replaceAll("\\p{M}", ""); String username = ""; int type; String[] splitUsername = DBusername.split(" "); if (checkNameInLexicons(splitUsername)) { for (String splText : splitUsername) { username += splText + " "; } type = 1; } else if (checkOrgInLexicons(splitUsername)) { for (String splText : splitUsername) { username += splText + " "; } type = 2; } else { username = DBusername; type = -1; } ReportEntry cerEntry = new ReportEntry(userID, username.trim(), type); report_names.add(cerEntry); } status_id = 2; obj.put("message", "Opengov username checker finished with no errors"); obj.put("details", ""); Database.UpdateOpengovUsersReportName(report_names); lEndTime = System.currentTimeMillis(); } else { status_id = 2; obj.put("message", "Opengov username checker finished with no errors"); obj.put("details", "No usernames needed to be checked"); lEndTime = System.currentTimeMillis(); } } catch (Exception ex) { System.err.println(ex.getMessage()); status_id = 3; obj.put("message", "Opengov username checker encountered an error"); obj.put("details", ex.getMessage().toString()); lEndTime = System.currentTimeMillis(); } } long execEnd = System.nanoTime(); long executionTime = (execEnd - execStart); System.out.println("Total process time: " + (((executionTime / 1000000) / 1000) / 60) + " minutes."); Database.UpdateLogUsernameChecker(lEndTime, status_id, usernameCheckerId, obj); Database.closeConnection(); }
From source file:Main.java
public static String normalizer(String string) { return Normalizer.normalize(string, Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");/*from w w w . ja v a 2 s . c o m*/ }
From source file:Main.java
public static String normalizerCase(String string) { return Normalizer.normalize(string, Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "") .toLowerCase();/*from w w w .j a va2 s . c om*/ }
From source file:Main.java
public static String formatString(String s) { String temp = Normalizer.normalize(s, Normalizer.Form.NFD); return temp.replaceAll("[^\\p{ASCII}]", ""); }
From source file:Main.java
public static String removeAccents(CharSequence title) { String decomposed = Normalizer.normalize(title, Normalizer.Form.NFD); String removed = decomposed.replaceAll("\\p{InCombiningDiacriticalMarks}+", ""); return removed; }
From source file:Main.java
public static boolean isContainText(String search, String originalText) { if (search != null && !search.equalsIgnoreCase("")) { String normalizedText = Normalizer.normalize(originalText, Normalizer.Form.NFD) .replaceAll("\\p{InCombiningDiacriticalMarks}+", "").toLowerCase(); int start = normalizedText.indexOf(search); if (start < 0) { return false; } else {/* w w w . j a va 2s. c o m*/ return true; } } return false; }
From source file:Main.java
public static CharSequence highlightText(String search, String originalText) { if (search != null && !search.equalsIgnoreCase("")) { String normalizedText = Normalizer.normalize(originalText, Normalizer.Form.NFD) .replaceAll("\\p{InCombiningDiacriticalMarks}+", "").toLowerCase(); int start = normalizedText.indexOf(search); if (start < 0) { return originalText; } else {// w w w .jav a 2 s . com Spannable highlighted = new SpannableString(originalText); while (start >= 0) { int spanStart = Math.min(start, originalText.length()); int spanEnd = Math.min(start + search.length(), originalText.length()); highlighted.setSpan(new ForegroundColorSpan(Color.BLUE), spanStart, spanEnd, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE); start = normalizedText.indexOf(search, spanEnd); } return highlighted; } } return originalText; }
From source file:com.github.slugify.Slugify.java
private static String normalize(String input) { String ret = StringUtils.trim(input); if (StringUtils.isBlank(ret)) { return ""; }//from w w w. ja v a 2 s. co m ret = ret.replace("", "ss"); return Normalizer.normalize(ret, Normalizer.Form.NFD).replaceAll("[^\\p{ASCII}]", "") .replaceAll("[^a-zA-Z0-9 ]", ""); }
From source file:org.wso2.carbon.ml.algorithms.SoundexMatchUtility.java
/** * Used to deAccent words to english letters *///w w w . ja v a 2 s.com private static String deAccent(String str) { String nfdNormalizedString = Normalizer.normalize(str, Normalizer.Form.NFD); Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+"); return pattern.matcher(nfdNormalizedString).replaceAll(""); }
From source file:graph.module.NLPToSyntaxModule.java
/** * Replaces special characters from the latin-1 table with the nearest * characters from the ascii table:/* w w w . ja v a2 s . co m*/ * * For example: ,,, will become a, becomes ss, , * become c, C... */ public static String convertToAscii(String str) { if (str == null || str.isEmpty()) return str; // Pre-normalisation String temp = Normalizer.normalize(str, Normalizer.Form.NFD); Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+"); str = pattern.matcher(temp).replaceAll(""); StringBuilder buffer = new StringBuilder(); char[] strArray = str.toCharArray(); for (int i = 0; i < strArray.length; i++) { char c = strArray[i]; if (c < 128) buffer.append(c); else if (c == 176) buffer.append(" degrees "); else if (c == 198) buffer.append("AE"); else if (c == 230) buffer.append("ae"); else if (c == 338) buffer.append("OE"); else if (c == 339) buffer.append("oe"); else if (c == 223) buffer.append("ss"); else if (c == 8211 || c == 8212) buffer.append("-"); else if (c == 8217) buffer.append("'"); else { // System.out.println("Unknown character: " + c + " (" + ((int) // c) // + ")" + ((c + "") == "?")); buffer.append("?"); } } return buffer.toString(); }