Example usage for java.text Normalizer normalize

Introduction

In this page you can find the example usage for java.text Normalizer normalize.

Prototype

public static String normalize(CharSequence src, Form form)

Source Link

Document

Normalize a sequence of char values.

Usage

From source file:module.entities.UsernameChecker.CheckOpengovUsernames.java

/**
 * @param args the command line arguments
 *///from  w  w w . j  av  a2 s.  c  om
public static void main(String[] args) throws SQLException, IOException {
    //        args = new String[1];
    //        args[0] = "searchConf.txt";
    Date d = new Date();
    long milTime = d.getTime();
    long execStart = System.nanoTime();
    Timestamp startTime = new Timestamp(milTime);
    long lStartTime;
    long lEndTime = 0;
    int status_id = 1;
    JSONObject obj = new JSONObject();
    if (args.length != 1) {
        System.out.println("None or too many argument parameters where defined! "
                + "\nPlease provide ONLY the configuration file name as the only argument.");
    } else {
        try {
            configFile = args[0];
            initLexicons();
            Database.init();
            lStartTime = System.currentTimeMillis();
            System.out.println("Opengov username identification process started at: " + startTime);
            usernameCheckerId = Database.LogUsernameChecker(lStartTime);
            TreeMap<Integer, String> OpenGovUsernames = Database.GetOpenGovUsers();
            HashSet<ReportEntry> report_names = new HashSet<>();
            if (OpenGovUsernames.size() > 0) {
                for (int userID : OpenGovUsernames.keySet()) {
                    String DBusername = Normalizer
                            .normalize(OpenGovUsernames.get(userID).toUpperCase(locale), Normalizer.Form.NFD)
                            .replaceAll("\\p{M}", "");
                    String username = "";
                    int type;
                    String[] splitUsername = DBusername.split(" ");
                    if (checkNameInLexicons(splitUsername)) {
                        for (String splText : splitUsername) {
                            username += splText + " ";
                        }
                        type = 1;
                    } else if (checkOrgInLexicons(splitUsername)) {
                        for (String splText : splitUsername) {
                            username += splText + " ";
                        }
                        type = 2;
                    } else {
                        username = DBusername;
                        type = -1;
                    }
                    ReportEntry cerEntry = new ReportEntry(userID, username.trim(), type);
                    report_names.add(cerEntry);
                }
                status_id = 2;
                obj.put("message", "Opengov username checker finished with no errors");
                obj.put("details", "");
                Database.UpdateOpengovUsersReportName(report_names);
                lEndTime = System.currentTimeMillis();
            } else {
                status_id = 2;
                obj.put("message", "Opengov username checker finished with no errors");
                obj.put("details", "No usernames needed to be checked");
                lEndTime = System.currentTimeMillis();
            }
        } catch (Exception ex) {
            System.err.println(ex.getMessage());
            status_id = 3;
            obj.put("message", "Opengov username checker encountered an error");
            obj.put("details", ex.getMessage().toString());
            lEndTime = System.currentTimeMillis();
        }
    }
    long execEnd = System.nanoTime();
    long executionTime = (execEnd - execStart);
    System.out.println("Total process time: " + (((executionTime / 1000000) / 1000) / 60) + " minutes.");
    Database.UpdateLogUsernameChecker(lEndTime, status_id, usernameCheckerId, obj);
    Database.closeConnection();
}

From source file:Main.java

public static String normalizer(String string) {
    return Normalizer.normalize(string, Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+",
            "");/*from   w w  w  .  ja  v  a 2  s . c o m*/
}

From source file:Main.java

public static String normalizerCase(String string) {
    return Normalizer.normalize(string, Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "")
            .toLowerCase();/*from  w  w  w  .j  a va2 s . c  om*/
}

From source file:Main.java

public static String formatString(String s) {
    String temp = Normalizer.normalize(s, Normalizer.Form.NFD);
    return temp.replaceAll("[^\\p{ASCII}]", "");
}

From source file:Main.java

public static String removeAccents(CharSequence title) {
    String decomposed = Normalizer.normalize(title, Normalizer.Form.NFD);
    String removed = decomposed.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
    return removed;
}

From source file:Main.java

public static boolean isContainText(String search, String originalText) {
    if (search != null && !search.equalsIgnoreCase("")) {
        String normalizedText = Normalizer.normalize(originalText, Normalizer.Form.NFD)
                .replaceAll("\\p{InCombiningDiacriticalMarks}+", "").toLowerCase();
        int start = normalizedText.indexOf(search);
        if (start < 0) {
            return false;
        } else {/* w  w  w  . j  a  va 2s. c o m*/
            return true;
        }
    }
    return false;
}

From source file:Main.java

public static CharSequence highlightText(String search, String originalText) {
    if (search != null && !search.equalsIgnoreCase("")) {
        String normalizedText = Normalizer.normalize(originalText, Normalizer.Form.NFD)
                .replaceAll("\\p{InCombiningDiacriticalMarks}+", "").toLowerCase();
        int start = normalizedText.indexOf(search);
        if (start < 0) {
            return originalText;
        } else {//  w w w  .jav  a 2  s .  com
            Spannable highlighted = new SpannableString(originalText);
            while (start >= 0) {
                int spanStart = Math.min(start, originalText.length());
                int spanEnd = Math.min(start + search.length(), originalText.length());
                highlighted.setSpan(new ForegroundColorSpan(Color.BLUE), spanStart, spanEnd,
                        Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
                start = normalizedText.indexOf(search, spanEnd);
            }
            return highlighted;
        }
    }
    return originalText;
}

From source file:com.github.slugify.Slugify.java

private static String normalize(String input) {
    String ret = StringUtils.trim(input);
    if (StringUtils.isBlank(ret)) {
        return "";
    }//from  w w  w.  ja v a  2 s.  co  m

    ret = ret.replace("", "ss");
    return Normalizer.normalize(ret, Normalizer.Form.NFD).replaceAll("[^\\p{ASCII}]", "")
            .replaceAll("[^a-zA-Z0-9 ]", "");
}

From source file:org.wso2.carbon.ml.algorithms.SoundexMatchUtility.java

/**
 * Used to deAccent words to english letters
 *///w  w  w  . ja  v  a 2  s.com
private static String deAccent(String str) {
    String nfdNormalizedString = Normalizer.normalize(str, Normalizer.Form.NFD);
    Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
    return pattern.matcher(nfdNormalizedString).replaceAll("");
}

From source file:graph.module.NLPToSyntaxModule.java

/**
 * Replaces special characters from the latin-1 table with the nearest
 * characters from the ascii table:/*  w w  w  . ja v  a2 s  . co m*/
 * 
 * For example: ,,, will become a,  becomes ss, ,
 * become c, C...
 */
public static String convertToAscii(String str) {
    if (str == null || str.isEmpty())
        return str;
    // Pre-normalisation
    String temp = Normalizer.normalize(str, Normalizer.Form.NFD);
    Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
    str = pattern.matcher(temp).replaceAll("");

    StringBuilder buffer = new StringBuilder();
    char[] strArray = str.toCharArray();
    for (int i = 0; i < strArray.length; i++) {
        char c = strArray[i];

        if (c < 128)
            buffer.append(c);
        else if (c == 176)
            buffer.append(" degrees ");
        else if (c == 198)
            buffer.append("AE");
        else if (c == 230)
            buffer.append("ae");
        else if (c == 338)
            buffer.append("OE");
        else if (c == 339)
            buffer.append("oe");
        else if (c == 223)
            buffer.append("ss");
        else if (c == 8211 || c == 8212)
            buffer.append("-");
        else if (c == 8217)
            buffer.append("'");
        else {
            // System.out.println("Unknown character: " + c + " (" + ((int)
            // c)
            // + ")" + ((c + "") == "?"));
            buffer.append("?");
        }
    }
    return buffer.toString();
}