Example usage for java.text Normalizer normalize

List of usage examples for java.text Normalizer normalize

Introduction

In this page you can find the example usage for java.text Normalizer normalize.

Prototype

public static String normalize(CharSequence src, Form form) 

Source Link

Document

Normalize a sequence of char values.

Usage

From source file:org.sakaiproject.util.ParameterParser.java

/**
 * Get a FileItem parameter by name./*from   w w w  . j a  va2  s . com*/
 * 
 * @param name
 *        The parameter name.
 * @return The parameter FileItem value, or null if it's not defined.
 */
public FileItem getFileItem(String name) {
    // wrap the Apache FileItem in our own homegrown FileItem
    Object o = m_req.getAttribute(name);
    if (o != null && o instanceof org.apache.commons.fileupload.FileItem) {
        org.apache.commons.fileupload.FileItem item = (org.apache.commons.fileupload.FileItem) o;
        try {
            return new FileItem(Normalizer.normalize(item.getName(), Normalizer.Form.NFC),
                    item.getContentType(), item.getInputStream());
        } catch (IOException e) {
            return new FileItem(Normalizer.normalize(item.getName(), Normalizer.Form.NFC),
                    item.getContentType(), item.get());
        }
    }

    return null;
}

From source file:org.mycore.common.xml.MCRXMLFunctions.java

/**
 * returns the given String in unicode NFC normal form.
 *
 * @param arg0 String to be normalized// w  w  w  . ja v  a  2  s  . c  om
 * @see Normalizer#normalize(CharSequence, java.text.Normalizer.Form)
 */
public static String normalizeUnicode(String arg0) {
    return Normalizer.normalize(arg0, Normalizer.Form.NFC);
}

From source file:org.jajuk.services.lyrics.providers.LyricsManiaWebLyricsProvider.java

/**
 * Replace each accent in the string with the non accent character.
 * @param s the string to process// w  ww .  j a v a 2  s.c  o m
 * @return the string without accents
 */
public String removeAccent(String s) {
    String strTemp = Normalizer.normalize(s, Normalizer.Form.NFD);
    Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
    return pattern.matcher(strTemp).replaceAll("");
}

From source file:com.csc.fi.ioapi.utils.LDHelper.java

public static String removeAccents(String text) {
    return text == null ? null
            : Normalizer.normalize(text, Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
}

From source file:com.aestheticsw.jobkeywords.service.termextractor.impl.fivefilters.FiveFiltersClient.java

private String removeHtmlTagsAndOtherBogusContent(String content, Locale locale) {

    content = content.toLowerCase(locale);
    // remote accented characters
    content = Normalizer.normalize(content, Normalizer.Form.NFD);

    for (Pattern key : regExMap.keySet()) {
        content = key.matcher(content).replaceAll(regExMap.get(key));
    }//  w w  w. jav  a  2s  .c om
    return content;
}

From source file:it.tidalwave.northernwind.core.impl.model.ResourcePropertiesDelegate.java

/*******************************************************************************************************************
 *
 * See http://stackoverflow.com/questions/1008802/converting-symbols-accent-letters-to-english-alphabet
 *
 ******************************************************************************************************************/
@Nonnull//  w  w w  .  ja  va2s  .  c  o m
public String deAccent(final @Nonnull String string) {
    final String nfdNormalizedString = Normalizer.normalize(string, Normalizer.Form.NFD);
    final Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
    return pattern.matcher(nfdNormalizedString).replaceAll("");
}

From source file:org.tinymediamanager.scraper.thetvdb.TheTvDbMetadataProvider.java

public static String clearSearchString(String searchString) {

    // This will separate all of the accent marks from the characters
    String cleanedString = Normalizer.normalize(searchString, Normalizer.Form.NFD);
    // For unicode, \\P{M} matches the base glyph and \\p{M} (lowercase) matches each accent.
    cleanedString = cleanedString.replaceAll("\\p{M}", ""); // remove accents

    // cleanedString = cleanedString.replaceAll("\\p{Punct}", ""); // too much?
    // cleanedString = cleanedString.replaceAll("[^\\p{Alnum}\\s\\-]", ""); // damn - removes ALL cyrillic chars

    // next try: punctuation !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ plus , but not !-
    cleanedString = cleanedString.replaceAll("(?![\\-!])[\\p{Punct}]", "");

    return cleanedString;
}

From source file:com.moviejukebox.model.scriptablescraper.SectionContentSS.java

public String getVariable(final String name) {
    try {/*from   w w  w  .j a  va2  s.com*/
        String result = "";
        boolean safe = false;
        boolean htmldecode = false;
        boolean striptags = false;

        if (StringUtils.isNotBlank(name)) {
            preparedVariable variable = prepareVariable(name);
            if (isDebug()) {
                LOG.debug("getVariable: {} value: {} index0: {} index1: {}", name, variable.value,
                        variable.index0, variable.index1);
            }
            if (variable.value == null) {
                return "";
            }

            safe = name.contains(":safe");
            htmldecode = name.contains(":htmldecode");
            striptags = -1 <= name.indexOf(":striptags");

            if (variable.index0 > -1) {
                if (variable.value.contains(ARRAY_GROUP_DIVIDER)) {
                    List<String> values = Arrays.asList(variable.value.split(ARRAY_GROUP_DIVIDER));
                    if (values.size() > variable.index0) {
                        variable.value = values.get(variable.index0);
                        if (variable.index1 > -1) {
                            values = Arrays.asList(variable.value.split(ARRAY_ITEM_DIVIDER));
                            if (values.size() > variable.index1) {
                                result = values.get(variable.index1);
                            }
                        } else {
                            result = variable.value;
                        }
                    }
                } else if (variable.index1 == -1) {
                    List<String> values = Arrays.asList(variable.value.split(ARRAY_ITEM_DIVIDER));
                    if (values.size() > variable.index0) {
                        result = values.get(variable.index0);
                    }
                }
            } else {
                result = variable.value;
            }
        }

        if (safe) {
            result = Normalizer.normalize(result, Normalizer.Form.NFD)
                    .replaceAll("\\p{InCombiningDiacriticalMarks}+", "").replace(" ", "+");
            if (name.contains(":safe(")) {
                String encodeName = name.substring(name.indexOf(":safe(") + 6);
                encodeName = name.substring(0, name.indexOf(")"));
                if (isDebug()) {
                    LOG.debug("encode result to ''{}'", encodeName);
                }
                result = URLEncoder.encode(result, encodeName);
            }
        }
        if (striptags) {
            result = HTMLTools.removeHtmlTags(result);
        }
        if (htmldecode) {
            result = HTMLTools.decodeHtml(result);
        }

        if (isDebug()) {
            LOG.debug("getVariable: result: '{}'", result);
        }
        return result;
    } catch (IOException error) {
        LOG.error("Failed get variable : {}", name);
        LOG.error("Error : {}", error.getMessage());
        return "";
    }
}

From source file:com.evolveum.midpoint.model.common.expression.functions.BasicExpressionFunctions.java

public String toAscii(Object input) {
    if (input == null) {
        return null;
    }/*from   w  ww.j a va 2 s  . co m*/
    String inputString = stringify(input);
    String decomposed = Normalizer.normalize(inputString, Normalizer.Form.NFKD);
    return decomposed.replaceAll("\\p{M}", "");
}

From source file:module.entities.NameFinder.RegexNameFinder.java

public static String getSignatureFromParagraphs(Elements paragraphs) {
    String signature = "";
    String signName = "", roleName = "";
    int signIdx = 0, roleIdx = 0;
    int row = 0;/*from ww  w.  jav a2s. c  o  m*/
    TreeMap<Integer, String> roles = new TreeMap<Integer, String>();
    for (Element n : paragraphs) {
        row++;
        String formatedText = Normalizer.normalize(n.text().toUpperCase(locale), Normalizer.Form.NFD)
                .replaceAll("\\p{M}", "");
        if (formatedText.contains(" ") && !formatedText.matches(".*[0-9].*")) {
            //                  if (formatedText.contains("<br>")) {
            //                      formatedText = formatedText.replaceAll("<br\\s*/>", " ");
            //                   }
            String[] splitedText = formatedText.split(" ");
            //                    System.out.println(splitedText.length);
            if (splitedText.length < 7) {
                boolean isSign = false;
                String text = "";
                for (int z = 0; z < splitedText.length; z++) {
                    String splText = splitedText[z].replaceAll("[\\s.]", "").replaceAll("\u00a0", "")
                            .replaceAll("", "").replaceAll(",", "");
                    if (names.contains(splText) || surnames.contains(splText)) {
                        signName += splText + " ";
                        signIdx = row;
                        isSign = true;
                    }
                    text += splText + " ";
                    //                            if (z == splitedText.length-1){
                    //                                System.out.println(signName.trim());
                    //                            }
                }
                if (!isSign) {
                    roleIdx = row;
                    if (!text.contains(" ") && !text.contains("")) {
                        roles.put(roleIdx, text.trim());
                    }
                }
            }
        }
    }
    for (Integer roleRow : roles.keySet()) {
        //                    if (signName.length() == 0) {
        if (Math.abs(signIdx - roleRow) < 4) {
            roleName += roles.get(roleRow) + " ";
        }

    }

    if (signName.length() > 0) {
        signature = signName + "#" + roleName;
    }
    return signature;
}