List of usage examples for java.text Normalizer normalize
public static String normalize(CharSequence src, Form form)
From source file:nodomain.freeyourgadget.gadgetbridge.util.LanguageUtils.java
/** * Converts the diacritics/*from w w w .j av a 2 s . c o m*/ * @param string input text * @return converted text */ private static String flattenToAscii(String string) { string = Normalizer.normalize(string, Normalizer.Form.NFD); return string.replaceAll("\\p{M}", ""); }
From source file:org.uiautomation.ios.server.servlet.UIAScriptServlet.java
private void getResponse(HttpServletRequest request, HttpServletResponse response) throws Exception, JSONException { if (request.getInputStream() != null) { StringWriter writer = new StringWriter(); IOUtils.copy(request.getInputStream(), writer, "UTF-8"); String json = writer.toString(); json = Normalizer.normalize(json, LanguageDictionary.norme); UIAScriptResponse r = new UIAScriptResponse(json); if (r.isFirstResponse()) { log.fine("got first response"); Response resp = r.getResponse(); GetCapabilitiesNHandler.setCachedResponse(resp); getDriver().getSession(resp.getSessionId()).communication().registerUIAScript(); } else {//from www. j av a2s.c o m communication(request).setNextResponse(r); } log.fine("wait for next command"); UIAScriptRequest nextCommand = communication(request).getNextCommand(); String script = nextCommand.getScript(); log.fine("got " + script); response.setContentType("text/html"); response.setCharacterEncoding("UTF-8"); response.setStatus(200); response.getWriter().print(script); response.getWriter().close(); } }
From source file:de.xwic.sandbox.base.model.StringUtil.java
/** * Replaces all the funky characters with normal ASCII * //from www.j a va 2 s .c o m * @param s * @return */ public static String normalize(String s) { return Normalizer.normalize(s, Normalizer.Form.NFD).replaceAll("[^\\p{ASCII}]", ""); }
From source file:cz.muni.fi.mir.tools.Tools.java
/** * Method normalizes input string. Simply said it removes diacritics and other nonascii characters. * @param input to be normalized/* w w w . j a v a2s. c om*/ * @return normalized input, an input without unicode symbols * @throws IllegalArgumentException if input contains unsupported encoding */ public String normalizeString(String input) throws IllegalArgumentException { StringBuilder sb = new StringBuilder(); String s1 = Normalizer.normalize(input, Normalizer.Form.NFKD); String regex = Pattern.quote("[\\p{InCombiningDiacriticalMarks}+"); String s2 = null; try { s2 = new String(s1.replaceAll(regex, "").getBytes("ascii"), "ascii"); } catch (UnsupportedEncodingException uee) { throw new IllegalArgumentException(uee); } char[] data = s2.toCharArray(); for (char c : data) { if (c != '?') { sb.append(c); } } return sb.toString(); }
From source file:ca.nines.ise.dom.DOMStream.java
/** * Construct a DOMStream from an input stream and record the source of the * input data./*from ww w . ja v a 2 s. c o m*/ * * @param in * @param source * @throws java.io.IOException */ public DOMStream(InputStream in, String source) throws IOException { lines = new ArrayList<>(); boolean warnedSmartQuotes = false; BOMInputStream bomStream = new BOMInputStream(in, ByteOrderMark.UTF_8, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE); bom = bomStream.getBOM(); if (bom != null) { Message m = Message.builder("builder.bom").setSource(source) .addNote("The byte order mark was " + bom.getCharsetName()).build(); Log.addMessage(m); encoding = bom.getCharsetName(); } else { encoding = "UTF-8"; } if (!encoding.equals("UTF-8")) { Message m = Message.builder("builder.notutf8").setSource(source) .addNote("The incorrect encoding is " + encoding).build(); Log.addMessage(m); } BufferedReader buffer = new BufferedReader(new InputStreamReader(bomStream, encoding)); String line; StringBuilder sb = new StringBuilder(); Pattern p = Pattern.compile("\u201C|\u201D"); while ((line = buffer.readLine()) != null) { line = Normalizer.normalize(line, Form.NFKC); Matcher m = p.matcher(line); if (m.find()) { line = m.replaceAll("\""); if (!warnedSmartQuotes) { warnedSmartQuotes = true; Message msg = Message.builder("builder.smartquotes").setSource(source) .addNote("The first occurence of smart quotes was at line " + lines.size()).build(); Log.addMessage(msg); } } lines.add(line); sb.append(line).append("\n"); } content = sb.toString().trim(); }
From source file:org.silverpeas.core.util.StringUtil.java
/** * Same treatment as the one of {@link #normalize(String)} but removes also the accented * characters.//from w ww. jav a 2 s . c om * @param string the string to normalize. There is no guarantee when the string is not encoded * into UTF8. * @return the normalized string. */ public static String normalizeByRemovingAccent(final String string) { String normalized = string; if (normalized != null) { // separating all of the accent marks from the characters normalized = Normalizer.normalize(normalized, Normalizer.Form.NFD); // removing accent normalized = normalized.replaceAll("\\p{InCombiningDiacriticalMarks}+", ""); } return normalized; }
From source file:cn.org.once.cstack.model.Snapshot.java
public void setTag(String tag) { if (tag != null) { tag = tag.toLowerCase();/*from w w w .jav a 2s.c o m*/ tag = Normalizer.normalize(tag, Normalizer.Form.NFD); tag = tag.replaceAll("[\\p{InCombiningDiacriticalMarks}]", ""); tag = tag.replaceAll("[^a-z0-9]", ""); } this.tag = tag; }
From source file:bit.changepurse.wdk.bip.MnemonicService.java
private String normalizeNFKD(String mnemonic) { return Normalizer.normalize(mnemonic, Normalizer.Form.NFKD); }
From source file:org.alfresco.repo.security.authentication.NameBasedUserNameGenerator.java
private String cleanseName(String name) { // Replace whitespace with _ String result = name.trim().toLowerCase().replaceAll("\\s+", "_"); // Remove accents from characters and strips out non-alphanumeric chars. return Normalizer.normalize(result, Normalizer.Form.NFD).replaceAll("[^a-zA-z0-9_]+", ""); }
From source file:org.nuxeo.ecm.platform.filemanager.utils.FileManagerUtils.java
/** * Looks if an existing Document has the same value for a given property. *///from w w w . j a v a 2 s . co m public static DocumentModel getExistingDocByPropertyName(CoreSession documentManager, String path, String value, String propertyName) { value = Normalizer.normalize(value, Normalizer.Form.NFC); DocumentModel existing = null; String parentId = documentManager.getDocument(new PathRef(path)).getId(); String query = "SELECT * FROM Document WHERE ecm:parentId = '" + parentId + "' AND " + propertyName + " = '" + value.replace("'", "\\\'") + "' AND ecm:currentLifeCycleState != '" + LifeCycleConstants.DELETED_STATE + "'"; DocumentModelList docs = documentManager.query(query, 1); if (docs.size() > 0) { existing = docs.get(0); } return existing; }