Example usage for java.text Normalizer normalize

Introduction

In this page you can find the example usage for java.text Normalizer normalize.

Prototype

public static String normalize(CharSequence src, Form form)

Source Link

Document

Normalize a sequence of char values.

Usage

From source file:nodomain.freeyourgadget.gadgetbridge.util.LanguageUtils.java

/**
 * Converts the diacritics/*from w w  w .j av a  2 s  .  c o  m*/
 * @param string input text
 * @return converted text
 */
private static String flattenToAscii(String string) {
    string = Normalizer.normalize(string, Normalizer.Form.NFD);
    return string.replaceAll("\\p{M}", "");
}

From source file:org.uiautomation.ios.server.servlet.UIAScriptServlet.java

private void getResponse(HttpServletRequest request, HttpServletResponse response)
        throws Exception, JSONException {

    if (request.getInputStream() != null) {
        StringWriter writer = new StringWriter();
        IOUtils.copy(request.getInputStream(), writer, "UTF-8");
        String json = writer.toString();
        json = Normalizer.normalize(json, LanguageDictionary.norme);
        UIAScriptResponse r = new UIAScriptResponse(json);

        if (r.isFirstResponse()) {
            log.fine("got first response");
            Response resp = r.getResponse();
            GetCapabilitiesNHandler.setCachedResponse(resp);
            getDriver().getSession(resp.getSessionId()).communication().registerUIAScript();
        } else {//from www.  j  av  a2s.c o m
            communication(request).setNextResponse(r);
        }
        log.fine("wait for next command");
        UIAScriptRequest nextCommand = communication(request).getNextCommand();
        String script = nextCommand.getScript();
        log.fine("got " + script);

        response.setContentType("text/html");
        response.setCharacterEncoding("UTF-8");
        response.setStatus(200);
        response.getWriter().print(script);
        response.getWriter().close();
    }
}

From source file:de.xwic.sandbox.base.model.StringUtil.java

/**
 * Replaces all the funky characters with normal ASCII
 * //from  www.j a va  2  s .c  o  m
 * @param s
 * @return
 */
public static String normalize(String s) {
    return Normalizer.normalize(s, Normalizer.Form.NFD).replaceAll("[^\\p{ASCII}]", "");
}

From source file:cz.muni.fi.mir.tools.Tools.java

/**
 * Method normalizes input string. Simply said it removes diacritics and other nonascii characters.
 * @param input to be normalized/* w  w  w  .  j a v  a2s.  c  om*/
 * @return normalized input, an input without unicode symbols
 * @throws IllegalArgumentException if input contains unsupported encoding
 */
public String normalizeString(String input) throws IllegalArgumentException {
    StringBuilder sb = new StringBuilder();

    String s1 = Normalizer.normalize(input, Normalizer.Form.NFKD);
    String regex = Pattern.quote("[\\p{InCombiningDiacriticalMarks}+");

    String s2 = null;
    try {
        s2 = new String(s1.replaceAll(regex, "").getBytes("ascii"), "ascii");
    } catch (UnsupportedEncodingException uee) {
        throw new IllegalArgumentException(uee);
    }

    char[] data = s2.toCharArray();

    for (char c : data) {
        if (c != '?') {
            sb.append(c);
        }
    }

    return sb.toString();
}

From source file:ca.nines.ise.dom.DOMStream.java

/**
 * Construct a DOMStream from an input stream and record the source of the
 * input data./*from   ww w .  ja v  a 2  s.  c  o m*/
 *
 * @param in
 * @param source
 * @throws java.io.IOException
 */
public DOMStream(InputStream in, String source) throws IOException {
    lines = new ArrayList<>();
    boolean warnedSmartQuotes = false;

    BOMInputStream bomStream = new BOMInputStream(in, ByteOrderMark.UTF_8, ByteOrderMark.UTF_32LE,
            ByteOrderMark.UTF_32BE, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE);
    bom = bomStream.getBOM();
    if (bom != null) {
        Message m = Message.builder("builder.bom").setSource(source)
                .addNote("The byte order mark was " + bom.getCharsetName()).build();
        Log.addMessage(m);
        encoding = bom.getCharsetName();
    } else {
        encoding = "UTF-8";
    }

    if (!encoding.equals("UTF-8")) {
        Message m = Message.builder("builder.notutf8").setSource(source)
                .addNote("The incorrect encoding is " + encoding).build();
        Log.addMessage(m);
    }

    BufferedReader buffer = new BufferedReader(new InputStreamReader(bomStream, encoding));
    String line;
    StringBuilder sb = new StringBuilder();

    Pattern p = Pattern.compile("\u201C|\u201D");

    while ((line = buffer.readLine()) != null) {
        line = Normalizer.normalize(line, Form.NFKC);
        Matcher m = p.matcher(line);
        if (m.find()) {
            line = m.replaceAll("\"");
            if (!warnedSmartQuotes) {
                warnedSmartQuotes = true;
                Message msg = Message.builder("builder.smartquotes").setSource(source)
                        .addNote("The first occurence of smart quotes was at line " + lines.size()).build();
                Log.addMessage(msg);
            }
        }
        lines.add(line);
        sb.append(line).append("\n");
    }

    content = sb.toString().trim();
}

From source file:org.silverpeas.core.util.StringUtil.java

/**
 * Same treatment as the one of {@link #normalize(String)} but removes also the accented
 * characters.//from   w ww.  jav  a 2 s  .  c  om
 * @param string the string to normalize. There is no guarantee when the string is not encoded
 * into UTF8.
 * @return the normalized string.
 */
public static String normalizeByRemovingAccent(final String string) {
    String normalized = string;
    if (normalized != null) {
        // separating all of the accent marks from the characters
        normalized = Normalizer.normalize(normalized, Normalizer.Form.NFD);
        // removing accent
        normalized = normalized.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
    }
    return normalized;
}

From source file:cn.org.once.cstack.model.Snapshot.java

public void setTag(String tag) {
    if (tag != null) {
        tag = tag.toLowerCase();/*from  w  w w  .jav  a 2s.c  o  m*/
        tag = Normalizer.normalize(tag, Normalizer.Form.NFD);
        tag = tag.replaceAll("[\\p{InCombiningDiacriticalMarks}]", "");
        tag = tag.replaceAll("[^a-z0-9]", "");
    }
    this.tag = tag;
}

From source file:bit.changepurse.wdk.bip.MnemonicService.java

private String normalizeNFKD(String mnemonic) {
    return Normalizer.normalize(mnemonic, Normalizer.Form.NFKD);
}

From source file:org.alfresco.repo.security.authentication.NameBasedUserNameGenerator.java

private String cleanseName(String name) {
    // Replace whitespace with _
    String result = name.trim().toLowerCase().replaceAll("\\s+", "_");

    // Remove accents from characters and strips out non-alphanumeric chars.
    return Normalizer.normalize(result, Normalizer.Form.NFD).replaceAll("[^a-zA-z0-9_]+", "");
}

From source file:org.nuxeo.ecm.platform.filemanager.utils.FileManagerUtils.java

/**
 * Looks if an existing Document has the same value for a given property.
 *///from   w w  w .  j  a  v a 2 s .  co m
public static DocumentModel getExistingDocByPropertyName(CoreSession documentManager, String path, String value,
        String propertyName) {
    value = Normalizer.normalize(value, Normalizer.Form.NFC);
    DocumentModel existing = null;
    String parentId = documentManager.getDocument(new PathRef(path)).getId();
    String query = "SELECT * FROM Document WHERE ecm:parentId = '" + parentId + "' AND " + propertyName + " = '"
            + value.replace("'", "\\\'") + "' AND ecm:currentLifeCycleState != '"
            + LifeCycleConstants.DELETED_STATE + "'";
    DocumentModelList docs = documentManager.query(query, 1);
    if (docs.size() > 0) {
        existing = docs.get(0);
    }
    return existing;
}